From f7dbb6ef2b332e1df3b96b4251b4d4bc7dc8ee56 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Fri, 31 Oct 2025 16:02:18 -0700 Subject: [PATCH 01/66] Workflow completion checker --- .github/workflows/docker-images.yml | 10 +- cmd/cli/run_scenario.go | 47 +++- dockerfiles/cli.Dockerfile | 98 +++++-- go.mod | 1 + go.sum | 2 + loadgen/generic_executor.go | 143 +++++++++- loadgen/generic_executor_test.go | 2 + loadgen/helpers.go | 34 +++ loadgen/kitchen_sink_executor.go | 37 +-- loadgen/kitchen_sink_executor_test.go | 1 + loadgen/scenario.go | 65 ++++- loadgen/scenario_test.go | 2 +- loadgen/workflow_completion_checker.go | 153 +++++++++++ scenarios/ebb_and_flow.go | 91 +++---- scenarios/ebb_and_flow_test.go | 7 +- scenarios/fixed_resource_consumption.go | 2 +- scenarios/state_transitions_steady.go | 19 +- scenarios/throughput_stress.go | 253 ++++++++++-------- scenarios/throughput_stress_test.go | 22 +- scenarios/workflow_completion_checker_test.go | 76 ++++++ scenarios/workflow_on_many_task_queues.go | 2 +- scenarios/workflow_with_many_actions.go | 5 +- .../workflow_with_single_noop_activity.go | 2 +- workers/go/go.mod | 1 + workers/go/go.sum | 2 + workers/test_env.go | 5 + 26 files changed, 824 insertions(+), 258 deletions(-) create mode 100644 loadgen/workflow_completion_checker.go create mode 100644 scenarios/workflow_completion_checker_test.go diff --git a/.github/workflows/docker-images.yml b/.github/workflows/docker-images.yml index 51366e4a..4f1d1798 100644 --- a/.github/workflows/docker-images.yml +++ b/.github/workflows/docker-images.yml @@ -82,11 +82,19 @@ jobs: username: ${{ secrets.DOCKER_USERNAME }} password: ${{ secrets.DOCKER_PAT }} + - name: Extract branch name + id: extract_branch + run: | + BRANCH_NAME="${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}" + SANITIZED_BRANCH=$(echo "$BRANCH_NAME" | sed 's/\//-/g' | sed 's/[^a-zA-Z0-9._-]/-/g') + echo "branch_name=$SANITIZED_BRANCH" >> $GITHUB_OUTPUT + - name: Build and push to Docker Hub env: LANG: ${{ inputs.lang }} SDK_VERSION: ${{ inputs.sdk-version || 'checked-out-sdk/' }} - IMAGE_TAG_ARGS: ${{ inputs.sdk-repo-ref && format('--image-tag {0}-{1}', inputs.lang, inputs.docker-tag-ext) || ''}} + BRANCH_TAG_COMPONENT: ${{ inputs.lang && format('{0}-{1}', inputs.lang, steps.extract_branch.outputs.branch_name) || format('cli-{0}', steps.extract_branch.outputs.branch_name) }} + IMAGE_TAG_ARGS: ${{ inputs.sdk-repo-ref && format('--image-tag {0}-{1} --image-tag {2}', inputs.lang, inputs.docker-tag-ext, inputs.lang && format('{0}-{1}', inputs.lang, steps.extract_branch.outputs.branch_name) || format('cli-{0}', steps.extract_branch.outputs.branch_name)) || format('--image-tag {0}', inputs.lang && format('{0}-{1}', inputs.lang, steps.extract_branch.outputs.branch_name) || format('cli-{0}', steps.extract_branch.outputs.branch_name)) }} TAG_LATEST_ARGS: ${{ inputs.as-latest && '--tag-as-latest' || ''}} LANG_ARGS: ${{ inputs.lang && format('--language {0}', inputs.lang) || '' }} VERSION_ARGS: ${{ inputs.sdk-version && format('--version {0}', inputs.sdk-version) || '' }} diff --git a/cmd/cli/run_scenario.go b/cmd/cli/run_scenario.go index eee8d1b0..37bb8131 100644 --- a/cmd/cli/run_scenario.go +++ b/cmd/cli/run_scenario.go @@ -2,6 +2,9 @@ package cli import ( "context" + "crypto/rand" + "encoding/hex" + "errors" "fmt" "os" "strings" @@ -56,7 +59,6 @@ type scenarioRunConfig struct { scenarioOptions []string timeout time.Duration doNotRegisterSearchAttributes bool - ignoreAlreadyStarted bool } func (r *scenarioRunner) addCLIFlags(fs *pflag.FlagSet) { @@ -82,8 +84,6 @@ func (r *scenarioRunConfig) addCLIFlags(fs *pflag.FlagSet) { fs.BoolVar(&r.doNotRegisterSearchAttributes, "do-not-register-search-attributes", false, "Do not register the default search attributes used by scenarios. "+ "If the search attributes are not registed by the scenario they must be registered through some other method") - fs.BoolVar(&r.ignoreAlreadyStarted, "ignore-already-started", false, - "Ignore if a workflow with the same ID already exists. A Scenario may choose to override this behavior.") } func (r *scenarioRunner) preRun() { @@ -145,9 +145,16 @@ func (r *scenarioRunner) run(ctx context.Context) error { return fmt.Errorf("failed to get root directory: %w", err) } + // Generate a random execution ID to ensure no two executions with the same RunID collide + executionID, err := generateExecutionID() + if err != nil { + return fmt.Errorf("failed to generate execution ID: %w", err) + } + scenarioInfo := loadgen.ScenarioInfo{ ScenarioName: r.scenario.Scenario, RunID: r.scenario.RunID, + ExecutionID: executionID, Logger: r.logger, MetricsHandler: metrics.NewHandler(), Client: client, @@ -159,16 +166,40 @@ func (r *scenarioRunner) run(ctx context.Context) error { MaxIterationAttempts: r.maxIterationAttempts, Timeout: r.timeout, DoNotRegisterSearchAttributes: r.doNotRegisterSearchAttributes, - IgnoreAlreadyStarted: r.ignoreAlreadyStarted, }, ScenarioOptions: scenarioOptions, Namespace: r.clientOptions.Namespace, RootPath: repoDir, } executor := scenario.ExecutorFn() - err = executor.Run(ctx, scenarioInfo) - if err != nil { - return fmt.Errorf("failed scenario: %w", err) + + // 1. Run the scenario + scenarioErr := executor.Run(ctx, scenarioInfo) + + // Collect all errors + var allErrors []error + if scenarioErr != nil { + allErrors = append(allErrors, fmt.Errorf("scenario execution: %w", scenarioErr)) + } + + // 2. Run verifications + if verifiable, ok := executor.(loadgen.Verifyable); ok { + verifyErrs := verifiable.VerifyRun(ctx, scenarioInfo) + for _, err := range verifyErrs { + allErrors = append(allErrors, fmt.Errorf("post-scenario verification: %w", err)) + } + } + + // Aggregate all errors + return errors.Join(allErrors...) +} + +// generateExecutionID generates a random execution ID to uniquely identify this particular +// execution of a scenario. This ensures no two executions with the same RunID collide. +func generateExecutionID() (string, error) { + bytes := make([]byte, 8) // 8 bytes = 16 hex characters + if _, err := rand.Read(bytes); err != nil { + return "", err } - return nil + return hex.EncodeToString(bytes), nil } diff --git a/dockerfiles/cli.Dockerfile b/dockerfiles/cli.Dockerfile index f29bf435..07a57bf3 100644 --- a/dockerfiles/cli.Dockerfile +++ b/dockerfiles/cli.Dockerfile @@ -1,37 +1,73 @@ # Build in a full featured container ARG TARGETARCH -FROM --platform=linux/$TARGETARCH golang:1.25 AS build +# Source stage: prepare source code and install Antithesis SDK +FROM --platform=linux/$TARGETARCH golang:1.25 AS source WORKDIR /app # Install protobuf compiler and git RUN apt-get update \ - && DEBIAN_FRONTEND=noninteractive \ + && DEBIAN_FRONTEND=noninteractive \ apt-get install --no-install-recommends --assume-yes \ - protobuf-compiler=3.21.12-11 libprotoc-dev=3.21.12-11 \ - && rm -rf /var/lib/apt/lists/* + protobuf-compiler=3.21.12-11 libprotoc-dev=3.21.12-11 \ + && rm -rf /var/lib/apt/lists/* -# Install Rust for kitchen-sink-gen -RUN wget -q -O - https://sh.rustup.rs | sh -s -- -y \ - && . $HOME/.cargo/env \ - && echo "TARGETARCH: $TARGETARCH" \ - && ARCH=$(uname -m) \ - && echo "uname -m: $ARCH" \ - && if [ "$TARGETARCH" = "arm64" ] || [ "$ARCH" = "aarch64" ]; then \ - rustup target add aarch64-unknown-linux-musl; \ - else \ - rustup target add x86_64-unknown-linux-musl; \ - fi -ENV PATH="$PATH:/root/.cargo/bin" - -# Copy CLI build dependencies +# Copy all source code COPY cmd ./cmd COPY loadgen ./loadgen COPY scenarios ./scenarios COPY workers ./workers/ COPY go.mod go.sum ./ +# Install Antithesis SDK and instrumentor +RUN go get github.com/antithesishq/antithesis-sdk-go@feature-assertion-wrappers && \ + go install github.com/antithesishq/antithesis-sdk-go/tools/antithesis-go-instrumentor@feature-assertion-wrappers + +# Instrumented stage: instrument the code with Antithesis +FROM --platform=linux/$TARGETARCH golang:1.25 AS instrumented + +# Copy source and instrumentor +COPY --from=source /app /app +COPY --from=source /go/bin/antithesis-go-instrumentor /go/bin/antithesis-go-instrumentor +COPY --from=source /go/pkg/mod /go/pkg/mod + +WORKDIR /app + +RUN mkdir /app_transformed && \ + antithesis-go-instrumentor /app /app_transformed + +# Build stage: compile the instrumented code +FROM --platform=linux/$TARGETARCH golang:1.25 AS build + +ARG TARGETARCH + +# Install protobuf compiler and git +RUN apt-get update \ + && DEBIAN_FRONTEND=noninteractive \ + apt-get install --no-install-recommends --assume-yes \ + protobuf-compiler=3.21.12-11 libprotoc-dev=3.21.12-11 \ + && rm -rf /var/lib/apt/lists/* + +# Install Rust for kitchen-sink-gen +RUN wget -q -O - https://sh.rustup.rs | sh -s -- -y \ + && . $HOME/.cargo/env \ + && echo "TARGETARCH: $TARGETARCH" \ + && ARCH=$(uname -m) \ + && echo "uname -m: $ARCH" \ + && if [ "$TARGETARCH" = "arm64" ] || [ "$ARCH" = "aarch64" ]; then \ + rustup target add aarch64-unknown-linux-musl; \ + else \ + rustup target add x86_64-unknown-linux-musl; \ + fi +ENV PATH="$PATH:/root/.cargo/bin" + +# Copy entire instrumented structure +COPY --from=instrumented /app_transformed /app_transformed + +# Set working directory to the customer code +WORKDIR /app_transformed/customer + # Build the CLI RUN CGO_ENABLED=0 go build -o temporal-omes ./cmd @@ -40,22 +76,26 @@ RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.31.0 # Build kitchen-sink-gen (statically linked) RUN cd loadgen/kitchen-sink-gen && \ - echo "TARGETARCH: $TARGETARCH" && \ - ARCH=$(uname -m) && \ - echo "uname -m: $ARCH" && \ - if [ "$TARGETARCH" = "arm64" ] || [ "$ARCH" = "aarch64" ]; then \ + echo "TARGETARCH: $TARGETARCH" && \ + ARCH=$(uname -m) && \ + echo "uname -m: $ARCH" && \ + if [ "$TARGETARCH" = "arm64" ] || [ "$ARCH" = "aarch64" ]; then \ RUST_TARGET=aarch64-unknown-linux-musl; \ - else \ + else \ RUST_TARGET=x86_64-unknown-linux-musl; \ - fi && \ - echo "Building for rust target: $RUST_TARGET" && \ - RUSTFLAGS='-C target-feature=+crt-static' cargo build --release --target $RUST_TARGET + fi && \ + echo "Building for rust target: $RUST_TARGET" && \ + RUSTFLAGS='-C target-feature=+crt-static' cargo build --release --target $RUST_TARGET # Copy the CLI to a distroless "run" container FROM --platform=linux/$TARGETARCH gcr.io/distroless/static-debian11:nonroot -COPY --from=build /app/temporal-omes /app/temporal-omes -COPY --from=build /app/loadgen/kitchen-sink-gen/target/*/release/kitchen-sink-gen /app/kitchen-sink-gen +COPY --from=build /app_transformed/customer/temporal-omes /app/temporal-omes +COPY --from=build /app_transformed/customer/loadgen/kitchen-sink-gen/target/*/release/kitchen-sink-gen /app/kitchen-sink-gen + +# Copy instrumentation metadata +COPY --from=instrumented /app_transformed/notifier /notifier +COPY --from=instrumented /app_transformed/symbols /symbols # Default entrypoint for CLI usage -ENTRYPOINT ["/app/temporal-omes"] \ No newline at end of file +ENTRYPOINT ["/app/temporal-omes"] diff --git a/go.mod b/go.mod index 7ab7d254..5f842e26 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module github.com/temporalio/omes go 1.25.0 require ( + github.com/antithesishq/antithesis-sdk-go v0.5.1-0.20250924165633-f60b0222f1b6 github.com/gogo/protobuf v1.3.2 github.com/golang/protobuf v1.5.4 github.com/google/uuid v1.6.0 diff --git a/go.sum b/go.sum index 7fbdd22a..becb2674 100644 --- a/go.sum +++ b/go.sum @@ -1,3 +1,5 @@ +github.com/antithesishq/antithesis-sdk-go v0.5.1-0.20250924165633-f60b0222f1b6 h1:qSD74Vz3scN2SrfML8dy2Whcv0C3pNkfqYZXeL4SIq0= +github.com/antithesishq/antithesis-sdk-go v0.5.1-0.20250924165633-f60b0222f1b6/go.mod h1:IUpT2DPAKh6i/YhSbt6Gl3v2yvUZjmKncl7U91fup7E= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= diff --git a/loadgen/generic_executor.go b/loadgen/generic_executor.go index df28e3c8..8cfee3b1 100644 --- a/loadgen/generic_executor.go +++ b/loadgen/generic_executor.go @@ -2,16 +2,29 @@ package loadgen import ( "context" + "errors" "fmt" + "sync" "time" + "github.com/antithesishq/antithesis-sdk-go/assert" + "go.temporal.io/api/serviceerror" "go.temporal.io/sdk/client" "go.uber.org/zap" ) +// skipIterationErr is a sentinel error indicating that the iteration +// should be skipped and not recorded as a completion or failure. +var skipIterationErr = errors.New("skip iteration") + type GenericExecutor struct { // Function to execute a single iteration of this scenario Execute func(context.Context, *Run) error + + // State management + mu sync.Mutex + state *ExecutorState + workflowCompletionChecker *WorkflowCompletionChecker } type genericRun struct { @@ -24,6 +37,17 @@ type genericRun struct { } func (g *GenericExecutor) Run(ctx context.Context, info ScenarioInfo) error { + g.mu.Lock() + if g.state == nil { + g.state = &ExecutorState{ + ExecutionID: info.ExecutionID, + } + } + if g.state.StartedAt.IsZero() { + g.state.StartedAt = time.Now() + } + g.mu.Unlock() + r, err := g.newRun(info) if err != nil { return err @@ -31,6 +55,88 @@ func (g *GenericExecutor) Run(ctx context.Context, info ScenarioInfo) error { return r.Run(ctx) } +func (g *GenericExecutor) RecordCompletion() { + g.mu.Lock() + defer g.mu.Unlock() + + g.state.CompletedIterations += 1 + g.state.LastCompletedAt = time.Now() +} + +func (g *GenericExecutor) RecordError(err error) { + g.mu.Lock() + defer g.mu.Unlock() +} + +func (g *GenericExecutor) VerifyRun(ctx context.Context, info ScenarioInfo) []error { + g.mu.Lock() + if g.state == nil { + g.state = &ExecutorState{} + } + state := *g.state + checker := g.workflowCompletionChecker + g.mu.Unlock() + + if checker == nil { + return nil + } + if err := checker.Verify(ctx, state); err != nil { + return []error{err} + } + return nil +} + +// EnableWorkflowCompletionCheck enables workflow completion verification for this executor. +// It initializes a checker with the given timeout and registers the required search attributes. +// The timeout specifies how long to wait for workflow completion verification (defaults to 30 seconds if zero). +// The expectedWorkflowCount function, if provided, calculates the expected number of workflows from the ExecutorState. +// If nil, defaults to using state.CompletedIterations. +// Returns an error if search attribute registration fails. +func (g *GenericExecutor) EnableWorkflowCompletionCheck(ctx context.Context, info ScenarioInfo, timeout time.Duration, expectedWorkflowCount func(ExecutorState) int) error { + checker, err := NewWorkflowCompletionChecker(ctx, info, timeout) + if err != nil { + return err + } + + if expectedWorkflowCount != nil { + checker.SetExpectedWorkflowCount(expectedWorkflowCount) + } + + g.mu.Lock() + g.workflowCompletionChecker = checker + g.mu.Unlock() + + return nil +} + +// GetState returns a copy of the current state +func (g *GenericExecutor) GetState() ExecutorState { + g.mu.Lock() + defer g.mu.Unlock() + + if g.state == nil { + return ExecutorState{} + } + return *g.state +} + +func (g *GenericExecutor) Snapshot() any { + return g.GetState() +} + +func (g *GenericExecutor) LoadState(loader func(any) error) error { + var state ExecutorState + if err := loader(&state); err != nil { + return err + } + + g.mu.Lock() + g.state = &state + g.mu.Unlock() + + return nil +} + func (g *GenericExecutor) newRun(info ScenarioInfo) (*genericRun, error) { info.Configuration.ApplyDefaults() if err := info.Configuration.Validate(); err != nil { @@ -130,11 +236,20 @@ func (g *genericRun) Run(ctx context.Context) error { defer func() { g.executeTimer.Record(time.Since(iterStart)) + // Check if this is the special "skip iteration" error + isSkipIteration := errors.Is(err, skipIterationErr) + if isSkipIteration { + err = nil // Don't propagate this as an actual error + } + select { case <-ctx.Done(): case doneCh <- err: - if err == nil && g.config.OnCompletion != nil { - g.config.OnCompletion(ctx, run) + if err == nil && !isSkipIteration { + g.executor.RecordCompletion() + if g.config.OnCompletion != nil { + g.config.OnCompletion(ctx, run) + } } } }() @@ -142,13 +257,27 @@ func (g *genericRun) Run(ctx context.Context) error { retryLoop: for { err = g.executor.Execute(ctx, run) + + // Skip if workflow was already started. + if err != nil { + var alreadyStartedErr *serviceerror.WorkflowExecutionAlreadyStarted + if errors.As(err, &alreadyStartedErr) { + g.logger.Debugf("Workflow already started, skipping iteration %v", run.Iteration) + err = skipIterationErr + break + } + } + + // If defined, invoke user-defined error handler. if err != nil && g.config.HandleExecuteError != nil { err = g.config.HandleExecuteError(ctx, run, err) } + if err == nil { break } + // Attempt to retry. backoff, retry := run.ShouldRetry(err) if retry { err = fmt.Errorf("iteration %v encountered error: %w", run.Iteration, err) @@ -156,6 +285,14 @@ func (g *genericRun) Run(ctx context.Context) error { } else { err = fmt.Errorf("iteration %v failed: %w", run.Iteration, err) g.logger.Error(err) + assert.Unreachable( + "Workflow execution should never return an error after retries exhausted", + map[string]any{ + "iteration": run.Iteration, + "error": err.Error(), + "attempt_count": run.attemptCount, + }, + ) break retryLoop } @@ -177,7 +314,7 @@ func (g *genericRun) Run(ctx context.Context) error { for runErr == nil && currentlyRunning > 0 { waitOne(ctx) if ctx.Err() != nil { - return fmt.Errorf("timed out while waiting for runs to complete: %w", ctx.Err()) + return fmt.Errorf("timeout while waiting for runs to complete: %w", ctx.Err()) } } if runErr != nil { diff --git a/loadgen/generic_executor_test.go b/loadgen/generic_executor_test.go index 4eaf8582..9884ac28 100644 --- a/loadgen/generic_executor_test.go +++ b/loadgen/generic_executor_test.go @@ -42,6 +42,7 @@ func execute(executor *GenericExecutor, runConfig RunConfiguration) error { info := ScenarioInfo{ MetricsHandler: client.MetricsNopHandler, Logger: logger.Sugar(), + ExecutionID: "test-exec-id", Configuration: runConfig, } return executor.Run(context.Background(), info) @@ -258,3 +259,4 @@ func TestExecutorRetriesLimit(t *testing.T) { require.Equal(t, []int{1, 1, 1, 1, 1}, totalTracker.seen, "expected 5 attempts") }) } + diff --git a/loadgen/helpers.go b/loadgen/helpers.go index da1d9e64..54273053 100644 --- a/loadgen/helpers.go +++ b/loadgen/helpers.go @@ -103,6 +103,40 @@ func MinVisibilityCountEventually( return nil } +// GetNonCompletedWorkflows queries and returns details about non-completed workflows for debugging purposes. +// Returns a formatted string with up to the specified number of workflow details, or an error if the query fails. +func GetNonCompletedWorkflows(ctx context.Context, info ScenarioInfo, searchAttribute, runID string, limit int32) (string, error) { + nonCompletedQuery := fmt.Sprintf( + "%s='%s' AND ExecutionStatus != 'Completed'", + searchAttribute, + runID, + ) + + resp, err := info.Client.ListWorkflow(ctx, &workflowservice.ListWorkflowExecutionsRequest{ + Namespace: info.Namespace, + Query: nonCompletedQuery, + PageSize: limit, + }) + + if err != nil { + return "", fmt.Errorf("failed to list non-completed workflows: %w", err) + } + + if len(resp.Executions) == 0 { + return "", nil + } + + var workflowDetails string + for i, exec := range resp.Executions { + workflowDetails += fmt.Sprintf("\n %d. WorkflowID: %s, RunID: %s, Status: %s", + i+1, + exec.Execution.WorkflowId, + exec.Execution.RunId, + exec.Status.String()) + } + return workflowDetails, nil +} + // VerifyNoFailedWorkflows verifies that there are no failed or terminated workflows for the given search attribute. func VerifyNoFailedWorkflows(ctx context.Context, info ScenarioInfo, searchAttribute, runID string) error { var errors []string diff --git a/loadgen/kitchen_sink_executor.go b/loadgen/kitchen_sink_executor.go index 3bd4ce88..4ec73963 100644 --- a/loadgen/kitchen_sink_executor.go +++ b/loadgen/kitchen_sink_executor.go @@ -8,6 +8,8 @@ import ( ) type KitchenSinkExecutor struct { + GenericExecutor + TestInput *kitchensink.TestInput // Called once on start @@ -18,29 +20,28 @@ type KitchenSinkExecutor struct { UpdateWorkflowOptions func(context.Context, *Run, *KitchenSinkWorkflowOptions) error } -func (k KitchenSinkExecutor) Run(ctx context.Context, info ScenarioInfo) error { +func (k *KitchenSinkExecutor) Run(ctx context.Context, info ScenarioInfo) error { if k.PrepareTestInput != nil { if err := k.PrepareTestInput(ctx, info, k.TestInput); err != nil { return err } } - // Create generic executor and run it - ge := &GenericExecutor{ - Execute: func(ctx context.Context, run *Run) error { - options := run.DefaultKitchenSinkWorkflowOptions() - testInputClone, ok := proto.Clone(k.TestInput).(*kitchensink.TestInput) - if !ok { - panic("failed to clone test input") - } - options.Params = testInputClone - if k.UpdateWorkflowOptions != nil { - err := k.UpdateWorkflowOptions(ctx, run, &options) - if err != nil { - return err - } + + k.GenericExecutor.Execute = func(ctx context.Context, run *Run) error { + options := run.DefaultKitchenSinkWorkflowOptions() + testInputClone, ok := proto.Clone(k.TestInput).(*kitchensink.TestInput) + if !ok { + panic("failed to clone test input") + } + options.Params = testInputClone + if k.UpdateWorkflowOptions != nil { + err := k.UpdateWorkflowOptions(ctx, run, &options) + if err != nil { + return err } - return run.ExecuteKitchenSinkWorkflow(ctx, &options) - }, + } + return run.ExecuteKitchenSinkWorkflow(ctx, &options) } - return ge.Run(ctx, info) + + return k.GenericExecutor.Run(ctx, info) } diff --git a/loadgen/kitchen_sink_executor_test.go b/loadgen/kitchen_sink_executor_test.go index 5865df3a..ef222f14 100644 --- a/loadgen/kitchen_sink_executor_test.go +++ b/loadgen/kitchen_sink_executor_test.go @@ -912,6 +912,7 @@ func testForSDK( scenarioInfo := ScenarioInfo{ ScenarioName: "kitchenSinkTest", RunID: fmt.Sprintf("%s-%d", t.Name(), time.Now().Unix()), + ExecutionID: "test-exec-id", Configuration: RunConfiguration{ Iterations: 1, }, diff --git a/loadgen/scenario.go b/loadgen/scenario.go index 800bf550..8dd036b9 100644 --- a/loadgen/scenario.go +++ b/loadgen/scenario.go @@ -2,6 +2,7 @@ package loadgen import ( "context" + "errors" "fmt" "maps" "path/filepath" @@ -13,8 +14,10 @@ import ( "go.temporal.io/api/enums/v1" "go.temporal.io/api/operatorservice/v1" + "go.temporal.io/api/serviceerror" "go.temporal.io/sdk/client" + "go.temporal.io/sdk/temporal" "go.uber.org/zap" "github.com/temporalio/omes/loadgen/kitchensink" @@ -31,6 +34,17 @@ type Executor interface { Run(context.Context, ScenarioInfo) error } +type ExecutorState struct { + // ExecutionID is the unique identifier for this particular execution of the scenario. + ExecutionID string `json:"executionID"` + // StartedAt is the timestamp when the executor run started. + StartedAt time.Time `json:"startedAt"` + // CompletedIterations tracks the number of successfully completed iterations. + CompletedIterations int `json:"completedIterations"` + // LastCompletedAt is the timestamp of the last completed workflow. + LastCompletedAt time.Time `json:"lastCompletedAt"` +} + // Optional interface that can be implemented by an [Executor] to allow it to be resumable. type Resumable interface { // LoadState loads a snapshot into the executor's internal state. @@ -54,6 +68,12 @@ type Configurable interface { Configure(ScenarioInfo) error } +// Verifyable is an optional interface that executors can implement to perform verifications after Run() completes. +type Verifyable interface { + // VerifyRun performs post-execution verifications and returns a list of errors. + VerifyRun(context.Context, ScenarioInfo) []error +} + // ExecutorFunc is an [Executor] implementation for a function type ExecutorFunc func(context.Context, ScenarioInfo) error @@ -104,6 +124,9 @@ type ScenarioInfo struct { // and workflow ID prefix. This is a single value for the whole scenario, and // not a Workflow RunId. RunID string + // ExecutionID is a randomly generated ID that uniquely identifies this particular + // execution of the scenario. Combined with RunID, it ensures no two executions collide. + ExecutionID string // Metrics component for registering new metrics. MetricsHandler client.MetricsHandler // A zap logger. @@ -120,6 +143,12 @@ type ScenarioInfo struct { RootPath string } +// OmesRunID returns the full OmesRunID value that combines RunID with ExecutionID +// to ensure no two executions with the same RunID collide. +func (s *ScenarioInfo) OmesRunID() string { + return s.RunID + "-" + s.ExecutionID +} + func (s *ScenarioInfo) ScenarioOptionInt(name string, defaultValue int) int { v := s.ScenarioOptions[name] if v == "" { @@ -207,9 +236,6 @@ type RunConfiguration struct { // cannot use the SDK to register SAs, instead the SAs must be registered through the control plane. // Default is false. DoNotRegisterSearchAttributes bool - // IgnoreAlreadyStarted, if set, will not error when a workflow with the same ID already exists. - // Default is false. - IgnoreAlreadyStarted bool // OnCompletion, if set, is invoked after each successful iteration completes. OnCompletion func(context.Context, *Run) // HandleExecuteError, if set, is called when Execute returns an error, allowing transformation of errors. @@ -226,6 +252,18 @@ func (r *RunConfiguration) ApplyDefaults() { if r.MaxIterationAttempts == 0 { r.MaxIterationAttempts = DefaultMaxIterationAttempts } + if r.HandleExecuteError == nil { + r.HandleExecuteError = func(ctx context.Context, run *Run, err error) error { + if err != nil { + var alreadyStartedErr *serviceerror.WorkflowExecutionAlreadyStarted + if errors.As(err, &alreadyStartedErr) { + run.Logger.Debugf("Workflow already started, skipping iteration %v", run.Iteration) + return nil + } + } + return err + } + } } func (r RunConfiguration) Validate() error { @@ -275,8 +313,9 @@ func (s *ScenarioInfo) RegisterDefaultSearchAttributes(ctx context.Context) erro // Ensure custom search attributes are registered that many scenarios rely on _, err := s.Client.OperatorService().AddSearchAttributes(ctx, &operatorservice.AddSearchAttributesRequest{ SearchAttributes: map[string]enums.IndexedValueType{ - "KS_Keyword": enums.INDEXED_VALUE_TYPE_KEYWORD, - "KS_Int": enums.INDEXED_VALUE_TYPE_INT, + "KS_Int": enums.INDEXED_VALUE_TYPE_INT, + "KS_Keyword": enums.INDEXED_VALUE_TYPE_KEYWORD, + OmesExecutionIDSearchAttribute: enums.INDEXED_VALUE_TYPE_KEYWORD, }, Namespace: s.Namespace, }) @@ -312,9 +351,13 @@ func (r *Run) TaskQueue() string { // DefaultStartWorkflowOptions gets default start workflow info. func (r *Run) DefaultStartWorkflowOptions() client.StartWorkflowOptions { return client.StartWorkflowOptions{ - TaskQueue: TaskQueueForRun(r.RunID), - ID: fmt.Sprintf("w-%s-%d", r.RunID, r.Iteration), - WorkflowExecutionErrorWhenAlreadyStarted: !r.Configuration.IgnoreAlreadyStarted, + ID: fmt.Sprintf("w-%s-%s-%d", r.RunID, r.ExecutionID, r.Iteration), + TaskQueue: TaskQueueForRun(r.RunID), + // Always return error so that Executor can handle it and record starts accurately. + WorkflowExecutionErrorWhenAlreadyStarted: true, + TypedSearchAttributes: temporal.NewSearchAttributes( + temporal.NewSearchAttributeKeyString(OmesExecutionIDSearchAttribute).ValueSet(r.OmesRunID()), + ), } } @@ -380,10 +423,12 @@ func (r *Run) ExecuteKitchenSinkWorkflow(ctx context.Context, options *KitchenSi executeErr := executor.Handle.Get(cancelCtx, nil) if executeErr != nil { - return fmt.Errorf("failed to execute kitchen sink workflow: %w", executeErr) + return fmt.Errorf("failed to execute kitchen sink workflow (workflowID: %s, runID: %s): %w", + executor.Handle.GetID(), executor.Handle.GetRunID(), executeErr) } if clientActionsErr := clientActionsErrPtr.Load(); clientActionsErr != nil { - return fmt.Errorf("kitchen sink client actions failed: %w", *clientActionsErr) + return fmt.Errorf("kitchen sink client actions failed (workflowID: %s, runID: %s): %w", + executor.Handle.GetID(), executor.Handle.GetRunID(), *clientActionsErr) } return nil } diff --git a/loadgen/scenario_test.go b/loadgen/scenario_test.go index 6e809913..aa387db5 100644 --- a/loadgen/scenario_test.go +++ b/loadgen/scenario_test.go @@ -49,7 +49,7 @@ func TestScenarioConfigValidation(t *testing.T) { expectedErr: "iterations and duration are mutually exclusive", }, { - name: "both duration and start iteration (allowed)", + name: "both duration and start iteration", configuration: RunConfiguration{Duration: 3 * time.Second, StartFromIteration: 3}, expectedErr: "", }, diff --git a/loadgen/workflow_completion_checker.go b/loadgen/workflow_completion_checker.go new file mode 100644 index 00000000..8c6c894a --- /dev/null +++ b/loadgen/workflow_completion_checker.go @@ -0,0 +1,153 @@ +package loadgen + +import ( + "context" + "errors" + "fmt" + "time" + + "go.temporal.io/api/workflowservice/v1" +) + +const OmesExecutionIDSearchAttribute = "OmesExecutionID" + +// WorkflowCompletionChecker allows verifying the workflow completion count after a scenario completed. +type WorkflowCompletionChecker struct { + // expectedWorkflowCount is an optional function to calculate the expected number of workflows + // from the ExecutorState. If nil, defaults to using state.CompletedIterations. + expectedWorkflowCount func(ExecutorState) int + // timeout is the maximum time to wait for workflow completion verification in visibility. + timeout time.Duration + // info is the scenario information stored during initialization. + info ScenarioInfo +} + +// SetExpectedWorkflowCount sets a custom function to calculate the expected number of workflows. +// If not set, defaults to using state.CompletedIterations. +func (wct *WorkflowCompletionChecker) SetExpectedWorkflowCount(fn func(ExecutorState) int) { + wct.expectedWorkflowCount = fn +} + +// GetExpectedWorkflowCount returns the expected workflow count for the given state. +// If a custom function was set via SetExpectedWorkflowCount, it uses that. +// Otherwise, defaults to state.CompletedIterations. +func (wct *WorkflowCompletionChecker) GetExpectedWorkflowCount(state ExecutorState) int { + if wct.expectedWorkflowCount != nil { + return wct.expectedWorkflowCount(state) + } + return state.CompletedIterations +} + +// NewWorkflowCompletionChecker creates a new checker with the given timeout. +// If timeout is zero, it uses a default of 30 seconds. +// Call this before the scenario is started to initialize and register search attributes. +func NewWorkflowCompletionChecker(ctx context.Context, info ScenarioInfo, timeout time.Duration) (*WorkflowCompletionChecker, error) { + if timeout == 0 { + timeout = 30 * time.Second + } + + checker := &WorkflowCompletionChecker{ + timeout: timeout, + } + + if err := checker.init(ctx, info); err != nil { + return nil, err + } + + return checker, nil +} + +func (wct *WorkflowCompletionChecker) init(ctx context.Context, info ScenarioInfo) error { + // Store the scenario info for later use + wct.info = info + + if info.Configuration.DoNotRegisterSearchAttributes { + return nil + } + + if err := InitSearchAttribute(ctx, info, OmesExecutionIDSearchAttribute); err != nil { + return fmt.Errorf("failed to register search attribute %s: %w", + OmesExecutionIDSearchAttribute, err) + } + return nil +} + +// Verify checks that the expected number of workflows have completed. +func (wct *WorkflowCompletionChecker) Verify(ctx context.Context, state ExecutorState) error { + var allErrors []error + + // Calculate expected workflow count + expectedCount := state.CompletedIterations + if wct.expectedWorkflowCount != nil { + expectedCount = wct.expectedWorkflowCount(state) + } + + // Check that we have completions to verify + if expectedCount == 0 { + return fmt.Errorf("no workflows completed") + } + + query := fmt.Sprintf( + "%s='%s' AND ExecutionStatus = 'Completed'", + OmesExecutionIDSearchAttribute, + wct.info.OmesRunID(), + ) + + verifyCtx, cancel := context.WithTimeout(ctx, wct.timeout) + defer cancel() + + err := MinVisibilityCountEventually( + verifyCtx, + wct.info, + &workflowservice.CountWorkflowExecutionsRequest{ + Namespace: wct.info.Namespace, + Query: query, + }, + expectedCount, + wct.timeout, + ) + + if err != nil { + allErrors = append(allErrors, err) + } + + // If verification failed, query for non-completed workflows to aid debugging + if err != nil { + workflowDetails, listErr := GetNonCompletedWorkflows( + ctx, + wct.info, + OmesExecutionIDSearchAttribute, + wct.info.OmesRunID(), + 10, + ) + + if listErr != nil { + allErrors = append(allErrors, fmt.Errorf("failed to list non-completed workflows: %w", listErr)) + } else if workflowDetails != "" { + allErrors = append(allErrors, fmt.Errorf("non-completed workflows found:%s", workflowDetails)) + } + } + + return errors.Join(allErrors...) +} + +// VerifyNoRunningWorkflows waits until there are no running workflows on the task queue for the given run ID. +// This is useful for scenarios that want to ensure all started workflows have completed. +func (wct *WorkflowCompletionChecker) VerifyNoRunningWorkflows(ctx context.Context) error { + query := fmt.Sprintf("TaskQueue = %q and ExecutionStatus = 'Running'", + TaskQueueForRun(wct.info.RunID)) + + verifyCtx, cancel := context.WithTimeout(ctx, wct.timeout) + defer cancel() + + return MinVisibilityCountEventually( + verifyCtx, + wct.info, + &workflowservice.CountWorkflowExecutionsRequest{ + Namespace: wct.info.Namespace, + Query: query, + }, + 0, + wct.timeout, + ) +} diff --git a/scenarios/ebb_and_flow.go b/scenarios/ebb_and_flow.go index 35067911..73358af9 100644 --- a/scenarios/ebb_and_flow.go +++ b/scenarios/ebb_and_flow.go @@ -2,7 +2,6 @@ package scenarios import ( "context" - "errors" "fmt" "math" "math/rand" @@ -12,11 +11,6 @@ import ( "github.com/temporalio/omes/loadgen" "github.com/temporalio/omes/loadgen/ebbandflow" - "go.temporal.io/api/workflowservice/v1" -) - -const ( - EbbAndFlowScenarioIdSearchAttribute = "EbbAndFlowScenarioId" ) const ( @@ -56,9 +50,7 @@ type ebbAndFlowConfig struct { } type ebbAndFlowState struct { - // TotalCompletedWorkflows tracks the total number of completed workflows across - // all restarts. It is used to verify workflow counts after the scenario completes. - TotalCompletedWorkflows int64 `json:"totalCompletedWorkflows"` + ExecutorState loadgen.ExecutorState `json:"executorState"` } type ebbAndFlowExecutor struct { @@ -72,6 +64,8 @@ type ebbAndFlowExecutor struct { completedActivities atomic.Int64 stateLock sync.Mutex state *ebbAndFlowState + completionChecker *loadgen.WorkflowCompletionChecker + executorState *loadgen.ExecutorState } var _ loadgen.Configurable = (*ebbAndFlowExecutor)(nil) @@ -150,7 +144,7 @@ func (e *ebbAndFlowExecutor) Run(ctx context.Context, info loadgen.ScenarioInfo) } e.ScenarioInfo = info - e.id = fmt.Sprintf("ebb_and_flow_%s", e.RunID) + e.id = fmt.Sprintf("ebb_and_flow_%s", e.OmesRunID()) e.rng = rand.New(rand.NewSource(time.Now().UnixNano())) e.startTime = time.Now() @@ -160,15 +154,25 @@ func (e *ebbAndFlowExecutor) Run(ctx context.Context, info loadgen.ScenarioInfo) return fmt.Errorf("configuration not parsed - Parse must be called before run") } - // Initialize search attribute for visibility tracking - err := loadgen.InitSearchAttribute( - ctx, - e.ScenarioInfo, - EbbAndFlowScenarioIdSearchAttribute, - ) + // Initialize executor state if needed + if e.executorState == nil { + e.executorState = &loadgen.ExecutorState{ + ExecutionID: info.ExecutionID, + } + } + + // Restore state if resuming + if e.isResuming && e.state != nil { + *e.executorState = e.state.ExecutorState + } + + // Initialize workflow completion checker with timeout from scenario options + timeout := info.ScenarioOptionDuration(VisibilityVerificationTimeoutFlag, 30*time.Second) + checker, err := loadgen.NewWorkflowCompletionChecker(ctx, info, timeout) if err != nil { - return fmt.Errorf("failed to initialize search attribute %s: %w", EbbAndFlowScenarioIdSearchAttribute, err) + return fmt.Errorf("failed to initialize completion checker: %w", err) } + e.completionChecker = checker var consecutiveErrCount int errCh := make(chan error, 10000) @@ -226,34 +230,19 @@ func (e *ebbAndFlowExecutor) Run(ctx context.Context, info loadgen.ScenarioInfo) e.Logger.Info("Scenario complete; waiting for all workflows to finish...") startWG.Wait() - e.Logger.Info("Verifying scenario completion...") + e.Logger.Info("Scenario execution complete") - e.stateLock.Lock() - totalCompletedWorkflows := int(e.state.TotalCompletedWorkflows) - e.stateLock.Unlock() + return nil +} - // Post-scenario: verify that at least one workflow was completed. - if totalCompletedWorkflows == 0 { - return errors.New("No iterations completed. Either the scenario never ran, or it failed to resume correctly.") +func (e *ebbAndFlowExecutor) VerifyRun(ctx context.Context, info loadgen.ScenarioInfo) []error { + if e.executorState == nil { + return nil } - - // Post-scenario: verify reported workflow completion count from Visibility. - if err := loadgen.MinVisibilityCountEventually( - ctx, - e.ScenarioInfo, - &workflowservice.CountWorkflowExecutionsRequest{ - Namespace: e.Namespace, - Query: fmt.Sprintf("%s='%s'", - EbbAndFlowScenarioIdSearchAttribute, e.id), - }, - totalCompletedWorkflows, - config.VisibilityVerificationTimeout, - ); err != nil { - return err + if err := e.completionChecker.Verify(ctx, *e.executorState); err != nil { + return []error{err} } - - // Post-scenario: ensure there are no failed or terminated workflows for this run. - return loadgen.VerifyNoFailedWorkflows(ctx, e.ScenarioInfo, EbbAndFlowScenarioIdSearchAttribute, e.ScenarioInfo.RunID) + return nil } // Snapshot returns a snapshot of the current state. @@ -261,7 +250,9 @@ func (e *ebbAndFlowExecutor) Snapshot() any { e.stateLock.Lock() defer e.stateLock.Unlock() - return *e.state + return ebbAndFlowState{ + ExecutorState: *e.executorState, + } } // LoadState loads the state from the provided loader function. @@ -297,9 +288,7 @@ func (e *ebbAndFlowExecutor) spawnWorkflowWithActivities( options := run.DefaultStartWorkflowOptions() options.ID = fmt.Sprintf("%s-track-%d", e.id, iteration) options.WorkflowExecutionErrorWhenAlreadyStarted = false - options.SearchAttributes = map[string]interface{}{ - EbbAndFlowScenarioIdSearchAttribute: e.id, - } + // TypedSearchAttributes are already set by DefaultStartWorkflowOptions() workflowInput := &ebbandflow.WorkflowParams{ SleepActivities: &config, @@ -321,15 +310,13 @@ func (e *ebbAndFlowExecutor) spawnWorkflowWithActivities( e.completedActivities.Add(activities) e.incrementTotalCompletedWorkflow() - return nil -} - -func (e *ebbAndFlowExecutor) incrementTotalCompletedWorkflow() { + // Record completion in executor state for verification e.stateLock.Lock() - if e.state != nil { - e.state.TotalCompletedWorkflows++ - } + e.executorState.CompletedIterations++ + e.executorState.LastCompletedAt = time.Now() e.stateLock.Unlock() + + return nil } func calculateBacklogTarget( diff --git a/scenarios/ebb_and_flow_test.go b/scenarios/ebb_and_flow_test.go index 443925af..31b195b2 100644 --- a/scenarios/ebb_and_flow_test.go +++ b/scenarios/ebb_and_flow_test.go @@ -49,7 +49,8 @@ func TestEbbAndFlow(t *testing.T) { }` scenarioInfo := loadgen.ScenarioInfo{ - RunID: fmt.Sprintf("eaf-%d", time.Now().Unix()), + RunID: fmt.Sprintf("eaf-%d", time.Now().Unix()), + ExecutionID: "test-exec-id", Configuration: loadgen.RunConfiguration{ Duration: 10 * time.Second, }, @@ -72,7 +73,7 @@ func TestEbbAndFlow(t *testing.T) { require.NoError(t, err, "Executor should complete successfully") state = executor.Snapshot().(ebbAndFlowState) - require.GreaterOrEqual(t, state.TotalCompletedWorkflows, int64(1)) + require.GreaterOrEqual(t, state.ExecutorState.CompletedIterations, 1) }) t.Run("Run executor again, resuming from previous state", func(t *testing.T) { @@ -93,7 +94,7 @@ func TestEbbAndFlow(t *testing.T) { require.NoError(t, err, "Executor should complete successfully") state = executor.Snapshot().(ebbAndFlowState) - require.Greater(t, state.TotalCompletedWorkflows, previouState.TotalCompletedWorkflows) + require.Greater(t, state.ExecutorState.CompletedIterations, previouState.ExecutorState.CompletedIterations) }) t.Run("Run executor again, resuming from previous state but without any time left", func(t *testing.T) { diff --git a/scenarios/fixed_resource_consumption.go b/scenarios/fixed_resource_consumption.go index 94dbb5a6..d6379ddb 100644 --- a/scenarios/fixed_resource_consumption.go +++ b/scenarios/fixed_resource_consumption.go @@ -63,7 +63,7 @@ func init() { loadgen.MustRegisterScenario(loadgen.Scenario{ Description: "Used for testing slot provider performance. Runs activities that consume certain amounts of resources.", ExecutorFn: func() loadgen.Executor { - return loadgen.KitchenSinkExecutor{ + return &loadgen.KitchenSinkExecutor{ TestInput: &kitchensink.TestInput{ WorkflowInput: &kitchensink.WorkflowInput{ InitialActions: []*kitchensink.ActionSet{ diff --git a/scenarios/state_transitions_steady.go b/scenarios/state_transitions_steady.go index 3cc054a0..332f7fb9 100644 --- a/scenarios/state_transitions_steady.go +++ b/scenarios/state_transitions_steady.go @@ -8,7 +8,6 @@ import ( "github.com/temporalio/omes/loadgen" "github.com/temporalio/omes/loadgen/kitchensink" - "go.temporal.io/api/workflowservice/v1" ) func init() { @@ -50,6 +49,11 @@ func (s *stateTransitionsSteady) run(ctx context.Context) error { durationPerStateTransition, ) + completionChecker, err := loadgen.NewWorkflowCompletionChecker(ctx, s.ScenarioInfo, time.Minute) + if err != nil { + return fmt.Errorf("failed to create workflow completion checker: %w", err) + } + // Execute initial workflow and get the transition count workflowParams := &kitchensink.WorkflowInput{ InitialActions: []*kitchensink.ActionSet{ @@ -130,15 +134,6 @@ func (s *stateTransitionsSteady) run(ctx context.Context) error { s.Logger.Infof("Run complete, ran %v iterations, waiting on all workflows to complete", iter) // First, wait for all starts to have started (they are done in goroutine) startWG.Wait() - return loadgen.MinVisibilityCountEventually( - ctx, - s.ScenarioInfo, - &workflowservice.CountWorkflowExecutionsRequest{ - Namespace: s.Namespace, - Query: fmt.Sprintf("TaskQueue = %q and ExecutionStatus = 'Running'", - loadgen.TaskQueueForRun(s.RunID)), - }, - 0, - time.Minute, - ) + + return completionChecker.VerifyNoRunningWorkflows(ctx) } diff --git a/scenarios/throughput_stress.go b/scenarios/throughput_stress.go index f93829f1..5c4d5b86 100644 --- a/scenarios/throughput_stress.go +++ b/scenarios/throughput_stress.go @@ -3,11 +3,9 @@ package scenarios import ( "cmp" "context" - "errors" "fmt" "hash/fnv" "math/rand" - "strings" "sync" "time" @@ -15,7 +13,6 @@ import ( . "github.com/temporalio/omes/loadgen/kitchensink" "go.temporal.io/api/common/v1" "go.temporal.io/api/workflowservice/v1" - "go.temporal.io/sdk/temporal" "google.golang.org/protobuf/types/known/emptypb" ) @@ -39,20 +36,13 @@ const ( // MinThroughputPerHourFlag is the minimum workflow throughput required (workflows/hour). // Default is 0, meaning disabled. The scenario calculates actual throughput and compares. MinThroughputPerHourFlag = "min-throughput-per-hour" -) - -const ( - ThroughputStressScenarioIdSearchAttribute = "ThroughputStressScenarioId" + // DisableLocalActivitiesFlag converts all local activities to remote activities when set to true. + // Default is false, meaning local activities will be used as designed. + DisableLocalActivitiesFlag = "disable-local-activities" ) type tpsState struct { - // CompletedIterations is the number of iteration that have been completed. - CompletedIterations int `json:"completedIterations"` - // LastCompletedIterationAt is the time when the last iteration was completed. Helpful for debugging. - LastCompletedIterationAt time.Time `json:"lastCompletedIterationAt"` - // AccumulatedDuration is the total execution time across all runs (original + resumes). - // This excludes any downtime between runs. Used for accurate throughput calculation. - AccumulatedDuration time.Duration `json:"accumulatedDuration"` + ExecutorState any `json:"executorState"` } type tpsConfig struct { @@ -67,9 +57,11 @@ type tpsConfig struct { MinThroughputPerHour float64 ScenarioRunID string RngSeed int64 + DisableLocalActivities bool } type tpsExecutor struct { + executor *loadgen.KitchenSinkExecutor lock sync.Mutex state *tpsState config *tpsConfig @@ -99,7 +91,13 @@ func (t *tpsExecutor) Snapshot() any { t.lock.Lock() defer t.lock.Unlock() - return *t.state + if t.executor == nil { + return *t.state + } + + return tpsState{ + ExecutorState: t.executor.Snapshot(), + } } // LoadState loads the state from the provided byte slice. @@ -165,8 +163,11 @@ func (t *tpsExecutor) Configure(info loadgen.ScenarioInfo) error { return fmt.Errorf("%s must be positive, got %v", VisibilityVerificationTimeoutFlag, config.VisibilityVerificationTimeout) } + config.DisableLocalActivities = info.ScenarioOptionBool(DisableLocalActivitiesFlag, false) + t.config = config t.rng = rand.New(rand.NewSource(config.RngSeed)) + return nil } @@ -183,15 +184,6 @@ func (t *tpsExecutor) Run(ctx context.Context, info loadgen.ScenarioInfo) error } t.runID = info.RunID - // Track start time of current run - currentRunStartTime := time.Now() - - // Add search attribute, if it doesn't exist yet, to query for workflows by run ID. - // Running this on resume, too, in case a previous Omes run crashed before it could add the search attribute. - if err := loadgen.InitSearchAttribute(ctx, info, ThroughputStressScenarioIdSearchAttribute); err != nil { - return err - } - t.lock.Lock() isResuming := t.isResuming currentState := *t.state @@ -199,19 +191,15 @@ func (t *tpsExecutor) Run(ctx context.Context, info loadgen.ScenarioInfo) error if isResuming { info.Logger.Info(fmt.Sprintf("Resuming scenario from state: %#v", currentState)) - info.Configuration.StartFromIteration = int(currentState.CompletedIterations) + 1 + if execState, ok := currentState.ExecutorState.(loadgen.ExecutorState); ok { + info.Configuration.StartFromIteration = execState.CompletedIterations + } } else { if err := t.verifyFirstRun(ctx, info, t.config.SkipCleanNamespaceCheck); err != nil { return err } } - // Listen to iteration completion events to update the state. - info.Configuration.OnCompletion = func(ctx context.Context, run *loadgen.Run) { - t.updateStateOnIterationCompletion() - info.Logger.Debugf("Completed iteration %d", run.Iteration) - } - // Start the scenario run. // // NOTE: When resuming, it can happen that there are no more iterations/time left to run more iterations. @@ -219,24 +207,13 @@ func (t *tpsExecutor) Run(ctx context.Context, info loadgen.ScenarioInfo) error if isResuming && info.Configuration.Duration <= 0 && info.Configuration.Iterations == 0 { info.Logger.Info("Skipping executor run: out of time") } else { - ksExec := &loadgen.KitchenSinkExecutor{ + t.executor = &loadgen.KitchenSinkExecutor{ TestInput: &TestInput{ WorkflowInput: &WorkflowInput{ InitialActions: []*ActionSet{}, }, }, UpdateWorkflowOptions: func(ctx context.Context, run *loadgen.Run, options *loadgen.KitchenSinkWorkflowOptions) error { - options.StartOptions = run.DefaultStartWorkflowOptions() - if isResuming { - // Enforce to never fail on "workflow already started" when resuming. - options.StartOptions.WorkflowExecutionErrorWhenAlreadyStarted = false - } - - // Add search attribute to the workflow options so that it can be used in visibility queries. - options.StartOptions.TypedSearchAttributes = temporal.NewSearchAttributes( - temporal.NewSearchAttributeKeyString(ThroughputStressScenarioIdSearchAttribute).ValueSet(info.RunID), - ) - // Start some workflows via Update-with-Start. if t.maybeWithStart(0.5) { options.Params.WithStartAction = &WithStartClientAction{ @@ -262,19 +239,61 @@ func (t *tpsExecutor) Run(ctx context.Context, info loadgen.ScenarioInfo) error return nil }, } - if err := ksExec.Run(ctx, info); err != nil { + + // Restore state if resuming + if isResuming { + if execState, ok := t.state.ExecutorState.(loadgen.ExecutorState); ok { + t.executor.LoadState(func(v any) error { + s := v.(*loadgen.ExecutorState) + *s = execState + return nil + }) + } + } + + timeout := info.ScenarioOptionDuration(VisibilityVerificationTimeoutFlag, 30*time.Second) + + // Configure expected workflow count function based on scenario config + expectedWorkflowCount := func(state loadgen.ExecutorState) int { + completedIterations := state.CompletedIterations + + // Calculate continue-as-new workflows + var continueAsNewWorkflows int + if t.config.ContinueAsNewAfterIter > 0 { + // Subtract 1 because the last iteration doesn't trigger a continue-as-new. + continueAsNewPerIter := (t.config.InternalIterations - 1) / t.config.ContinueAsNewAfterIter + continueAsNewWorkflows = continueAsNewPerIter * completedIterations + } + + // Calculate child workflows + completedChildWorkflows := completedIterations * t.config.InternalIterations + + // Total: parent + children + continue-as-new + return completedIterations + completedChildWorkflows + continueAsNewWorkflows + } + + if err := t.executor.EnableWorkflowCompletionCheck(ctx, info, timeout, expectedWorkflowCount); err != nil { + return fmt.Errorf("failed to initialize workflow completion checker: %w", err) + } + + if err := t.executor.Run(ctx, info); err != nil { return err } } t.lock.Lock() - completedIterations := t.state.CompletedIterations - t.state.AccumulatedDuration += time.Since(currentRunStartTime) - totalDuration := t.state.AccumulatedDuration + var completedIterations int + if t.executor != nil { + completedIterations = t.executor.GetState().CompletedIterations + } else { + // Executor was skipped, use state from previous run + if execState, ok := t.state.ExecutorState.(loadgen.ExecutorState); ok { + completedIterations = execState.CompletedIterations + } + } t.lock.Unlock() - completedChildWorkflows := completedIterations * t.config.InternalIterations - + // Calculate completion metrics for logging. var continueAsNewPerIter int var continueAsNewWorkflows int if t.config.ContinueAsNewAfterIter > 0 { @@ -282,58 +301,66 @@ func (t *tpsExecutor) Run(ctx context.Context, info loadgen.ScenarioInfo) error continueAsNewPerIter = (t.config.InternalIterations - 1) / t.config.ContinueAsNewAfterIter continueAsNewWorkflows = continueAsNewPerIter * completedIterations } - + completedChildWorkflows := completedIterations * t.config.InternalIterations completedWorkflows := completedIterations + completedChildWorkflows + continueAsNewWorkflows - var sb strings.Builder - sb.WriteString("[Scenario completion summary] ") - sb.WriteString(fmt.Sprintf("Run ID: %s, ", info.RunID)) - sb.WriteString(fmt.Sprintf("Total iterations completed: %d, ", completedIterations)) - sb.WriteString(fmt.Sprintf("Total child workflows: %d (%d per iteration), ", completedChildWorkflows, t.config.InternalIterations)) - sb.WriteString(fmt.Sprintf("Total continue-as-new workflows: %d (%d per iteration), ", continueAsNewWorkflows, continueAsNewPerIter)) - sb.WriteString(fmt.Sprintf("Total workflows completed: %d", completedWorkflows)) - info.Logger.Info(sb.String()) - - // Post-scenario: verify that at least one iteration was completed. - if completedIterations == 0 { - return errors.New("No iterations completed. Either the scenario never ran, or it failed to resume correctly.") - } - - // Post-scenario: verify reported workflow completion count from Visibility. - if err := loadgen.MinVisibilityCountEventually( - ctx, - info, - &workflowservice.CountWorkflowExecutionsRequest{ - Namespace: info.Namespace, - Query: fmt.Sprintf("%s='%s'", - ThroughputStressScenarioIdSearchAttribute, info.RunID), - }, - completedWorkflows, - t.config.VisibilityVerificationTimeout, - ); err != nil { - return err - } + // Log completion summary. + info.Logger.Info(fmt.Sprintf( + "[Scenario completion summary] Run ID: %s, Total iterations completed: %d, "+ + "Total child workflows: %d (%d per iteration), Total continue-as-new workflows: %d (%d per iteration), "+ + "Total workflows completed: %d", + info.RunID, completedIterations, completedChildWorkflows, t.config.InternalIterations, + continueAsNewWorkflows, continueAsNewPerIter, completedWorkflows)) + + return nil +} - // Post-scenario: check throughput threshold +// VerifyRun implements loadgen.VerifyRunnable for post-execution verifications +func (t *tpsExecutor) VerifyRun(ctx context.Context, info loadgen.ScenarioInfo) []error { + var errors []error + + // 1. Delegate to executor's internal verifier + errors = append(errors, t.executor.VerifyRun(ctx, info)...) + + // 2. Check throughput, if configured. if t.config.MinThroughputPerHour > 0 { - actualThroughputPerHour := float64(completedWorkflows) / totalDuration.Hours() - - if actualThroughputPerHour < t.config.MinThroughputPerHour { - // Calculate how many workflows we expected given the duration - expectedWorkflows := int(totalDuration.Hours() * t.config.MinThroughputPerHour) - - return fmt.Errorf("insufficient throughput: %.1f workflows/hour < %.1f required "+ - "(completed %d workflows, expected %d in %v)", - actualThroughputPerHour, - t.config.MinThroughputPerHour, - completedWorkflows, - expectedWorkflows, - totalDuration.Round(time.Second)) + state := t.executor.GetState() + + // Recalculate expected workflow count for throughput check + var continueAsNewWorkflows int + if t.config.ContinueAsNewAfterIter > 0 { + continueAsNewPerIter := (t.config.InternalIterations - 1) / t.config.ContinueAsNewAfterIter + continueAsNewWorkflows = continueAsNewPerIter * state.CompletedIterations + } + completedChildWorkflows := state.CompletedIterations * t.config.InternalIterations + completedWorkflows := state.CompletedIterations + completedChildWorkflows + continueAsNewWorkflows + + // Calculate duration from executor state + var totalDuration time.Duration + if !state.StartedAt.IsZero() && !state.LastCompletedAt.IsZero() { + totalDuration = state.LastCompletedAt.Sub(state.StartedAt) + } + + if totalDuration == 0 { + errors = append(errors, fmt.Errorf("throughput check: no duration recorded (startedAt=%v, lastCompletedAt=%v)", + state.StartedAt, state.LastCompletedAt)) + } else { + actualThroughput := float64(completedWorkflows) / totalDuration.Hours() + + if actualThroughput < t.config.MinThroughputPerHour { + expectedWorkflows := int(totalDuration.Hours() * t.config.MinThroughputPerHour) + errors = append(errors, fmt.Errorf("throughput check: %.1f workflows/hour < %.1f required "+ + "(completed %d workflows, expected %d in %v)", + actualThroughput, + t.config.MinThroughputPerHour, + completedWorkflows, + expectedWorkflows, + totalDuration.Round(time.Second))) + } } } - // Post-scenario: ensure there are no failed or terminated workflows for this run. - return loadgen.VerifyNoFailedWorkflows(ctx, info, ThroughputStressScenarioIdSearchAttribute, info.RunID) + return errors } func (t *tpsExecutor) verifyFirstRun(ctx context.Context, info loadgen.ScenarioInfo, skipCleanNamespaceCheck bool) error { @@ -343,7 +370,7 @@ func (t *tpsExecutor) verifyFirstRun(ctx context.Context, info loadgen.ScenarioI } // Complain if there are already existing workflows with the provided run id; unless resuming. - workflowCountQry := fmt.Sprintf("%s='%s'", ThroughputStressScenarioIdSearchAttribute, info.RunID) + workflowCountQry := fmt.Sprintf("%s='%s'", loadgen.OmesExecutionIDSearchAttribute, info.OmesRunID()) visibilityCount, err := info.Client.CountWorkflow(ctx, &workflowservice.CountWorkflowExecutionsRequest{ Namespace: info.Namespace, Query: workflowCountQry, @@ -359,13 +386,6 @@ func (t *tpsExecutor) verifyFirstRun(ctx context.Context, info loadgen.ScenarioI return nil } -func (t *tpsExecutor) updateStateOnIterationCompletion() { - t.lock.Lock() - defer t.lock.Unlock() - t.state.CompletedIterations += 1 - t.state.LastCompletedIterationAt = time.Now() -} - func (t *tpsExecutor) createActions(run *loadgen.Run) []*ActionSet { return []*ActionSet{ { @@ -375,6 +395,15 @@ func (t *tpsExecutor) createActions(run *loadgen.Run) []*ActionSet { } } +// activityLocality returns the appropriate activity locality function based on the config. +// If DisableLocalActivities is true, all activities will be remote; otherwise, return the local activity function. +func (t *tpsExecutor) activityLocality() func(*ExecuteActivityAction) *Action { + if t.config.DisableLocalActivities { + return DefaultRemoteActivity + } + return DefaultLocalActivity +} + func (t *tpsExecutor) createActionsChunk( run *loadgen.Run, childCount int, @@ -395,9 +424,9 @@ func (t *tpsExecutor) createActionsChunk( // Create actions for the current chunk for i := 0; i < itersPerChunk; i++ { syncActions := []*Action{ - PayloadActivity(256, 256, DefaultLocalActivity), - PayloadActivity(0, 256, DefaultLocalActivity), - PayloadActivity(0, 256, DefaultLocalActivity), + PayloadActivity(256, 256, t.activityLocality()), + PayloadActivity(0, 256, t.activityLocality()), + PayloadActivity(0, 256, t.activityLocality()), // TODO: use local activity: server error log "failed to set query completion state to succeeded ClientActivity(ClientActions(t.createSelfQuery()), DefaultRemoteActivity), } @@ -407,11 +436,11 @@ func (t *tpsExecutor) createActionsChunk( t.createChildWorkflowAction(run, childCount), PayloadActivity(256, 256, DefaultRemoteActivity), PayloadActivity(256, 256, DefaultRemoteActivity), - PayloadActivity(0, 256, DefaultLocalActivity), - PayloadActivity(0, 256, DefaultLocalActivity), - GenericActivity("noop", DefaultLocalActivity), + PayloadActivity(0, 256, t.activityLocality()), + PayloadActivity(0, 256, t.activityLocality()), + GenericActivity("noop", t.activityLocality()), ClientActivity(ClientActions(t.createSelfQuery()), DefaultRemoteActivity), - ClientActivity(ClientActions(t.createSelfSignal()), DefaultLocalActivity), + ClientActivity(ClientActions(t.createSelfSignal()), t.activityLocality()), ClientActivity(ClientActions(t.createSelfUpdateWithTimer()), DefaultRemoteActivity), ClientActivity(ClientActions(t.createSelfUpdateWithPayload()), DefaultRemoteActivity), // TODO: use local activity: there is an 8s gap in the event history @@ -506,9 +535,9 @@ func (t *tpsExecutor) createChildWorkflowAction(run *loadgen.Run, childID int) * }, WorkflowId: fmt.Sprintf("%s/child-%d", run.DefaultStartWorkflowOptions().ID, childID), SearchAttributes: map[string]*common.Payload{ - ThroughputStressScenarioIdSearchAttribute: &common.Payload{ + loadgen.OmesExecutionIDSearchAttribute: &common.Payload{ Metadata: map[string][]byte{"encoding": []byte("json/plain"), "type": []byte("Keyword")}, - Data: []byte(fmt.Sprintf("%q", t.config.ScenarioRunID)), // quoted to be valid JSON string + Data: []byte(fmt.Sprintf("%q", run.OmesRunID())), // quoted to be valid JSON string }, }, }, @@ -592,7 +621,7 @@ func (t *tpsExecutor) createSelfUpdateWithPayloadAsLocal() *ClientAction { DoActions: &DoActionsUpdate{ Variant: &DoActionsUpdate_DoActions{ DoActions: SingleActionSet( - PayloadActivity(0, 256, DefaultLocalActivity), + PayloadActivity(0, 256, t.activityLocality()), ), }, }, diff --git a/scenarios/throughput_stress_test.go b/scenarios/throughput_stress_test.go index 47613010..c5dac149 100644 --- a/scenarios/throughput_stress_test.go +++ b/scenarios/throughput_stress_test.go @@ -22,7 +22,8 @@ func TestThroughputStress(t *testing.T) { workers.WithNexusEndpoint(taskQueueName)) scenarioInfo := loadgen.ScenarioInfo{ - RunID: runID, + RunID: runID, + ExecutionID: "test-exec-id", Configuration: loadgen.RunConfiguration{ Iterations: 2, }, @@ -42,7 +43,8 @@ func TestThroughputStress(t *testing.T) { require.NoError(t, err, "Executor should complete successfully") state := executor.Snapshot().(tpsState) - require.Equal(t, state.CompletedIterations, 2) + execState := state.ExecutorState.(loadgen.ExecutorState) + require.Equal(t, 2, execState.CompletedIterations) }) t.Run("Run executor again, resuming from middle", func(t *testing.T) { @@ -50,13 +52,19 @@ func TestThroughputStress(t *testing.T) { err := executor.LoadState(func(v any) error { s := v.(*tpsState) - s.CompletedIterations = 0 // execution will start from iteration 1 + s.ExecutorState = loadgen.ExecutorState{ + CompletedIterations: 1, // execution will start from iteration 1 + } return nil }) require.NoError(t, err) _, err = env.RunExecutorTest(t, executor, scenarioInfo, clioptions.LangGo) require.NoError(t, err, "Executor should complete successfully when resuming from middle") + + state := executor.Snapshot().(tpsState) + execState := state.ExecutorState.(loadgen.ExecutorState) + require.Equal(t, 2, execState.CompletedIterations) }) t.Run("Run executor again, resuming from end", func(t *testing.T) { @@ -64,12 +72,18 @@ func TestThroughputStress(t *testing.T) { err := executor.LoadState(func(v any) error { s := v.(*tpsState) - s.CompletedIterations = s.CompletedIterations + s.ExecutorState = loadgen.ExecutorState{ + CompletedIterations: 2, + } return nil }) require.NoError(t, err) _, err = env.RunExecutorTest(t, executor, scenarioInfo, clioptions.LangGo) require.NoError(t, err, "Executor should complete successfully when resuming from end") + + state := executor.Snapshot().(tpsState) + execState := state.ExecutorState.(loadgen.ExecutorState) + require.Equal(t, 2, execState.CompletedIterations) }) } diff --git a/scenarios/workflow_completion_checker_test.go b/scenarios/workflow_completion_checker_test.go new file mode 100644 index 00000000..1eadbb6b --- /dev/null +++ b/scenarios/workflow_completion_checker_test.go @@ -0,0 +1,76 @@ +package scenarios + +import ( + "context" + "fmt" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/require" + "github.com/temporalio/omes/cmd/clioptions" + "github.com/temporalio/omes/loadgen" + "github.com/temporalio/omes/loadgen/kitchensink" + "github.com/temporalio/omes/workers" +) + +// Test that WorkflowCompletionChecker is able to detect a stuck workflow. +func TestWorkflowCompletionChecker(t *testing.T) { + t.Parallel() + + env := workers.SetupTestEnvironment(t, + workers.WithExecutorTimeout(5*time.Second)) + + scenarioInfo := loadgen.ScenarioInfo{ + RunID: fmt.Sprintf("stuck-%d", time.Now().Unix()), + ExecutionID: "test-exec-id", + Configuration: loadgen.RunConfiguration{ + Iterations: 10, + }, + } + + executor := &loadgen.KitchenSinkExecutor{ + TestInput: &kitchensink.TestInput{ + WorkflowInput: &kitchensink.WorkflowInput{}, + }, + UpdateWorkflowOptions: func(ctx context.Context, run *loadgen.Run, options *loadgen.KitchenSinkWorkflowOptions) error { + // Only the first iteration should block forever. + if run.Iteration == 1 { + options.Params.WorkflowInput.InitialActions = []*kitchensink.ActionSet{ + { + Actions: []*kitchensink.Action{ + { + Variant: &kitchensink.Action_AwaitWorkflowState{ + AwaitWorkflowState: &kitchensink.AwaitWorkflowState{ + Key: "will-never-be-set", + Value: "never", + }, + }, + }, + }, + Concurrent: false, + }, + } + } else { + options.Params.WorkflowInput.InitialActions = []*kitchensink.ActionSet{ + kitchensink.NoOpSingleActivityActionSet(), + } + } + return nil + }, + } + + _, err := env.RunExecutorTest(t, executor, scenarioInfo, clioptions.LangGo) + require.Error(t, err, "Executor should fail because first workflow times out") + + errorMsg := err.Error() + require.True(t, + strings.Contains(errorMsg, "timeout") || + strings.Contains(errorMsg, "Timeout") || + strings.Contains(errorMsg, "deadline") || + strings.Contains(errorMsg, "DeadlineExceeded"), + "Expected timeout-related error, got: %s", errorMsg) + + execState := executor.Snapshot().(loadgen.ExecutorState) + require.Equal(t, 9, execState.CompletedIterations, "Should complete 9 iterations") +} diff --git a/scenarios/workflow_on_many_task_queues.go b/scenarios/workflow_on_many_task_queues.go index e838832c..a4b7a96a 100644 --- a/scenarios/workflow_on_many_task_queues.go +++ b/scenarios/workflow_on_many_task_queues.go @@ -14,7 +14,7 @@ func init() { "Workers must be started with --task-queue-suffix-index-end as one less than task queue count here. " + "Additional options: task-queue-count (required).", ExecutorFn: func() loadgen.Executor { - return loadgen.KitchenSinkExecutor{ + return &loadgen.KitchenSinkExecutor{ TestInput: &kitchensink.TestInput{ WorkflowInput: &kitchensink.WorkflowInput{ InitialActions: []*kitchensink.ActionSet{ diff --git a/scenarios/workflow_with_many_actions.go b/scenarios/workflow_with_many_actions.go index 2325c18b..c7d2a362 100644 --- a/scenarios/workflow_with_many_actions.go +++ b/scenarios/workflow_with_many_actions.go @@ -2,10 +2,11 @@ package scenarios import ( "context" + "strconv" + "go.temporal.io/api/common/v1" "go.temporal.io/sdk/converter" "google.golang.org/protobuf/types/known/durationpb" - "strconv" "github.com/temporalio/omes/loadgen" "github.com/temporalio/omes/loadgen/kitchensink" @@ -16,7 +17,7 @@ func init() { Description: "Each iteration executes a single workflow with a number of child workflows and/or activities. " + "Additional options: children-per-workflow (default 30), activities-per-workflow (default 30).", ExecutorFn: func() loadgen.Executor { - return loadgen.KitchenSinkExecutor{ + return &loadgen.KitchenSinkExecutor{ TestInput: &kitchensink.TestInput{ WorkflowInput: &kitchensink.WorkflowInput{ InitialActions: []*kitchensink.ActionSet{}, diff --git a/scenarios/workflow_with_single_noop_activity.go b/scenarios/workflow_with_single_noop_activity.go index f859aad1..2fb81c0a 100644 --- a/scenarios/workflow_with_single_noop_activity.go +++ b/scenarios/workflow_with_single_noop_activity.go @@ -9,7 +9,7 @@ func init() { loadgen.MustRegisterScenario(loadgen.Scenario{ Description: "Each iteration executes a single workflow with a noop activity.", ExecutorFn: func() loadgen.Executor { - return loadgen.KitchenSinkExecutor{ + return &loadgen.KitchenSinkExecutor{ TestInput: &kitchensink.TestInput{ WorkflowInput: &kitchensink.WorkflowInput{ InitialActions: []*kitchensink.ActionSet{ diff --git a/workers/go/go.mod b/workers/go/go.mod index 69457921..1a1123f0 100644 --- a/workers/go/go.mod +++ b/workers/go/go.mod @@ -13,6 +13,7 @@ require ( ) require ( + github.com/antithesishq/antithesis-sdk-go v0.5.1-0.20250924165633-f60b0222f1b6 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/davecgh/go-spew v1.1.1 // indirect diff --git a/workers/go/go.sum b/workers/go/go.sum index e8fbc88a..f89b9b63 100644 --- a/workers/go/go.sum +++ b/workers/go/go.sum @@ -1,3 +1,5 @@ +github.com/antithesishq/antithesis-sdk-go v0.5.1-0.20250924165633-f60b0222f1b6 h1:qSD74Vz3scN2SrfML8dy2Whcv0C3pNkfqYZXeL4SIq0= +github.com/antithesishq/antithesis-sdk-go v0.5.1-0.20250924165633-f60b0222f1b6/go.mod h1:IUpT2DPAKh6i/YhSbt6Gl3v2yvUZjmKncl7U91fup7E= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= diff --git a/workers/test_env.go b/workers/test_env.go index 9663b2b3..c61b712c 100644 --- a/workers/test_env.go +++ b/workers/test_env.go @@ -96,6 +96,11 @@ func SetupTestEnvironment(t *testing.T, opts ...TestEnvOption) *TestEnvironment LogLevel: "error", Stdout: &logWriter{logger: serverLogger}, Stderr: &logWriter{logger: serverLogger}, + ExtraArgs: []string{ + "--search-attribute", "OmesExecutionID=Keyword", + "--search-attribute", "KS_Int=Int", + "--search-attribute", "KS_Keyword=Keyword", + }, }) require.NoError(t, err, "Failed to start dev server") From d74bcd4c1e170c8be77876fbe70e740d1400bcbe Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Fri, 31 Oct 2025 16:15:00 -0700 Subject: [PATCH 02/66] Update ebb_and_flow.go --- scenarios/ebb_and_flow.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scenarios/ebb_and_flow.go b/scenarios/ebb_and_flow.go index 73358af9..1e71c8a5 100644 --- a/scenarios/ebb_and_flow.go +++ b/scenarios/ebb_and_flow.go @@ -64,7 +64,7 @@ type ebbAndFlowExecutor struct { completedActivities atomic.Int64 stateLock sync.Mutex state *ebbAndFlowState - completionChecker *loadgen.WorkflowCompletionChecker + completionVerifier *loadgen.WorkflowCompletionChecker executorState *loadgen.ExecutorState } @@ -172,7 +172,7 @@ func (e *ebbAndFlowExecutor) Run(ctx context.Context, info loadgen.ScenarioInfo) if err != nil { return fmt.Errorf("failed to initialize completion checker: %w", err) } - e.completionChecker = checker + e.completionVerifier = checker var consecutiveErrCount int errCh := make(chan error, 10000) @@ -239,7 +239,7 @@ func (e *ebbAndFlowExecutor) VerifyRun(ctx context.Context, info loadgen.Scenari if e.executorState == nil { return nil } - if err := e.completionChecker.Verify(ctx, *e.executorState); err != nil { + if err := e.completionVerifier.Verify(ctx, *e.executorState); err != nil { return []error{err} } return nil From fd0a6faaf14a07d6653812a7240ad7d1493dc01d Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Fri, 31 Oct 2025 16:15:22 -0700 Subject: [PATCH 03/66] Update generic_executor.go --- loadgen/generic_executor.go | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/loadgen/generic_executor.go b/loadgen/generic_executor.go index 8cfee3b1..0a9ef40c 100644 --- a/loadgen/generic_executor.go +++ b/loadgen/generic_executor.go @@ -22,9 +22,9 @@ type GenericExecutor struct { Execute func(context.Context, *Run) error // State management - mu sync.Mutex - state *ExecutorState - workflowCompletionChecker *WorkflowCompletionChecker + mu sync.Mutex + state *ExecutorState + workflowCompletionVerifier *WorkflowCompletionVerifier } type genericRun struct { @@ -74,36 +74,36 @@ func (g *GenericExecutor) VerifyRun(ctx context.Context, info ScenarioInfo) []er g.state = &ExecutorState{} } state := *g.state - checker := g.workflowCompletionChecker + verifier := g.workflowCompletionVerifier g.mu.Unlock() - if checker == nil { + if verifier == nil { return nil } - if err := checker.Verify(ctx, state); err != nil { + if err := verifier.Verify(ctx, state); err != nil { return []error{err} } return nil } // EnableWorkflowCompletionCheck enables workflow completion verification for this executor. -// It initializes a checker with the given timeout and registers the required search attributes. +// It initializes a verifier with the given timeout and registers the required search attributes. // The timeout specifies how long to wait for workflow completion verification (defaults to 30 seconds if zero). // The expectedWorkflowCount function, if provided, calculates the expected number of workflows from the ExecutorState. // If nil, defaults to using state.CompletedIterations. // Returns an error if search attribute registration fails. func (g *GenericExecutor) EnableWorkflowCompletionCheck(ctx context.Context, info ScenarioInfo, timeout time.Duration, expectedWorkflowCount func(ExecutorState) int) error { - checker, err := NewWorkflowCompletionChecker(ctx, info, timeout) + verifier, err := NewWorkflowCompletionChecker(ctx, info, timeout) if err != nil { return err } if expectedWorkflowCount != nil { - checker.SetExpectedWorkflowCount(expectedWorkflowCount) + verifier.SetExpectedWorkflowCount(expectedWorkflowCount) } g.mu.Lock() - g.workflowCompletionChecker = checker + g.workflowCompletionVerifier = verifier g.mu.Unlock() return nil From 9c2886581ece55624265a127b1e5882a7bbf06f7 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Fri, 31 Oct 2025 16:15:27 -0700 Subject: [PATCH 04/66] drop --- cmd/cli/run_scenario.go | 4 +- loadgen/generic_executor.go | 48 +------ loadgen/helpers.go | 22 ++-- loadgen/scenario.go | 16 +-- loadgen/workflow_completion_checker.go | 107 +++++++-------- scenarios/ebb_and_flow.go | 19 ++- scenarios/throughput_stress.go | 122 ++++++++++-------- scenarios/workflow_completion_checker_test.go | 31 +++-- 8 files changed, 168 insertions(+), 201 deletions(-) diff --git a/cmd/cli/run_scenario.go b/cmd/cli/run_scenario.go index 37bb8131..1a565a11 100644 --- a/cmd/cli/run_scenario.go +++ b/cmd/cli/run_scenario.go @@ -183,8 +183,8 @@ func (r *scenarioRunner) run(ctx context.Context) error { } // 2. Run verifications - if verifiable, ok := executor.(loadgen.Verifyable); ok { - verifyErrs := verifiable.VerifyRun(ctx, scenarioInfo) + if scenario.VerifyFn != nil { + verifyErrs := scenario.VerifyFn(ctx, scenarioInfo, executor) for _, err := range verifyErrs { allErrors = append(allErrors, fmt.Errorf("post-scenario verification: %w", err)) } diff --git a/loadgen/generic_executor.go b/loadgen/generic_executor.go index 0a9ef40c..c0685feb 100644 --- a/loadgen/generic_executor.go +++ b/loadgen/generic_executor.go @@ -22,9 +22,8 @@ type GenericExecutor struct { Execute func(context.Context, *Run) error // State management - mu sync.Mutex - state *ExecutorState - workflowCompletionVerifier *WorkflowCompletionVerifier + mu sync.Mutex + state *ExecutorState } type genericRun struct { @@ -68,47 +67,6 @@ func (g *GenericExecutor) RecordError(err error) { defer g.mu.Unlock() } -func (g *GenericExecutor) VerifyRun(ctx context.Context, info ScenarioInfo) []error { - g.mu.Lock() - if g.state == nil { - g.state = &ExecutorState{} - } - state := *g.state - verifier := g.workflowCompletionVerifier - g.mu.Unlock() - - if verifier == nil { - return nil - } - if err := verifier.Verify(ctx, state); err != nil { - return []error{err} - } - return nil -} - -// EnableWorkflowCompletionCheck enables workflow completion verification for this executor. -// It initializes a verifier with the given timeout and registers the required search attributes. -// The timeout specifies how long to wait for workflow completion verification (defaults to 30 seconds if zero). -// The expectedWorkflowCount function, if provided, calculates the expected number of workflows from the ExecutorState. -// If nil, defaults to using state.CompletedIterations. -// Returns an error if search attribute registration fails. -func (g *GenericExecutor) EnableWorkflowCompletionCheck(ctx context.Context, info ScenarioInfo, timeout time.Duration, expectedWorkflowCount func(ExecutorState) int) error { - verifier, err := NewWorkflowCompletionChecker(ctx, info, timeout) - if err != nil { - return err - } - - if expectedWorkflowCount != nil { - verifier.SetExpectedWorkflowCount(expectedWorkflowCount) - } - - g.mu.Lock() - g.workflowCompletionVerifier = verifier - g.mu.Unlock() - - return nil -} - // GetState returns a copy of the current state func (g *GenericExecutor) GetState() ExecutorState { g.mu.Lock() @@ -314,7 +272,7 @@ func (g *genericRun) Run(ctx context.Context) error { for runErr == nil && currentlyRunning > 0 { waitOne(ctx) if ctx.Err() != nil { - return fmt.Errorf("timeout while waiting for runs to complete: %w", ctx.Err()) + return fmt.Errorf("timed out while waiting for runs to complete: %w", ctx.Err()) } } if runErr != nil { diff --git a/loadgen/helpers.go b/loadgen/helpers.go index 54273053..55952f09 100644 --- a/loadgen/helpers.go +++ b/loadgen/helpers.go @@ -103,9 +103,9 @@ func MinVisibilityCountEventually( return nil } -// GetNonCompletedWorkflows queries and returns details about non-completed workflows for debugging purposes. -// Returns a formatted string with up to the specified number of workflow details, or an error if the query fails. -func GetNonCompletedWorkflows(ctx context.Context, info ScenarioInfo, searchAttribute, runID string, limit int32) (string, error) { +// GetNonCompletedWorkflows queries and returns an error for each non-completed workflow. +// Returns a list of errors (one per non-completed workflow) with workflow details, or a query error if the list fails. +func GetNonCompletedWorkflows(ctx context.Context, info ScenarioInfo, searchAttribute, runID string, limit int32) []error { nonCompletedQuery := fmt.Sprintf( "%s='%s' AND ExecutionStatus != 'Completed'", searchAttribute, @@ -119,22 +119,22 @@ func GetNonCompletedWorkflows(ctx context.Context, info ScenarioInfo, searchAttr }) if err != nil { - return "", fmt.Errorf("failed to list non-completed workflows: %w", err) + return []error{fmt.Errorf("failed to list non-completed workflows: %w", err)} } if len(resp.Executions) == 0 { - return "", nil + return nil } - var workflowDetails string - for i, exec := range resp.Executions { - workflowDetails += fmt.Sprintf("\n %d. WorkflowID: %s, RunID: %s, Status: %s", - i+1, + var workflowErrors []error + for _, exec := range resp.Executions { + workflowErrors = append(workflowErrors, fmt.Errorf( + "non-completed workflow: WorkflowID=%s, RunID=%s, Status=%s", exec.Execution.WorkflowId, exec.Execution.RunId, - exec.Status.String()) + exec.Status.String())) } - return workflowDetails, nil + return workflowErrors } // VerifyNoFailedWorkflows verifies that there are no failed or terminated workflows for the given search attribute. diff --git a/loadgen/scenario.go b/loadgen/scenario.go index 8dd036b9..d99e5338 100644 --- a/loadgen/scenario.go +++ b/loadgen/scenario.go @@ -26,6 +26,7 @@ import ( type Scenario struct { Description string ExecutorFn func() Executor + VerifyFn func(context.Context, ScenarioInfo, Executor) []error } // Executor for a scenario. @@ -68,10 +69,11 @@ type Configurable interface { Configure(ScenarioInfo) error } -// Verifyable is an optional interface that executors can implement to perform verifications after Run() completes. -type Verifyable interface { +// Verifier performs post-execution verifications and returns a list of errors. +type Verifier interface { // VerifyRun performs post-execution verifications and returns a list of errors. - VerifyRun(context.Context, ScenarioInfo) []error + // The ExecutorState is provided by the caller. + VerifyRun(context.Context, ScenarioInfo, ExecutorState) []error } // ExecutorFunc is an [Executor] implementation for a function @@ -143,12 +145,6 @@ type ScenarioInfo struct { RootPath string } -// OmesRunID returns the full OmesRunID value that combines RunID with ExecutionID -// to ensure no two executions with the same RunID collide. -func (s *ScenarioInfo) OmesRunID() string { - return s.RunID + "-" + s.ExecutionID -} - func (s *ScenarioInfo) ScenarioOptionInt(name string, defaultValue int) int { v := s.ScenarioOptions[name] if v == "" { @@ -356,7 +352,7 @@ func (r *Run) DefaultStartWorkflowOptions() client.StartWorkflowOptions { // Always return error so that Executor can handle it and record starts accurately. WorkflowExecutionErrorWhenAlreadyStarted: true, TypedSearchAttributes: temporal.NewSearchAttributes( - temporal.NewSearchAttributeKeyString(OmesExecutionIDSearchAttribute).ValueSet(r.OmesRunID()), + temporal.NewSearchAttributeKeyString(OmesExecutionIDSearchAttribute).ValueSet(r.ExecutionID), ), } } diff --git a/loadgen/workflow_completion_checker.go b/loadgen/workflow_completion_checker.go index 8c6c894a..cc4e16e1 100644 --- a/loadgen/workflow_completion_checker.go +++ b/loadgen/workflow_completion_checker.go @@ -2,7 +2,6 @@ package loadgen import ( "context" - "errors" "fmt" "time" @@ -11,8 +10,8 @@ import ( const OmesExecutionIDSearchAttribute = "OmesExecutionID" -// WorkflowCompletionChecker allows verifying the workflow completion count after a scenario completed. -type WorkflowCompletionChecker struct { +// WorkflowCompletionVerifier allows verifying the workflow completion count after a scenario completed. +type WorkflowCompletionVerifier struct { // expectedWorkflowCount is an optional function to calculate the expected number of workflows // from the ExecutorState. If nil, defaults to using state.CompletedIterations. expectedWorkflowCount func(ExecutorState) int @@ -24,29 +23,19 @@ type WorkflowCompletionChecker struct { // SetExpectedWorkflowCount sets a custom function to calculate the expected number of workflows. // If not set, defaults to using state.CompletedIterations. -func (wct *WorkflowCompletionChecker) SetExpectedWorkflowCount(fn func(ExecutorState) int) { +func (wct *WorkflowCompletionVerifier) SetExpectedWorkflowCount(fn func(ExecutorState) int) { wct.expectedWorkflowCount = fn } -// GetExpectedWorkflowCount returns the expected workflow count for the given state. -// If a custom function was set via SetExpectedWorkflowCount, it uses that. -// Otherwise, defaults to state.CompletedIterations. -func (wct *WorkflowCompletionChecker) GetExpectedWorkflowCount(state ExecutorState) int { - if wct.expectedWorkflowCount != nil { - return wct.expectedWorkflowCount(state) - } - return state.CompletedIterations -} - // NewWorkflowCompletionChecker creates a new checker with the given timeout. // If timeout is zero, it uses a default of 30 seconds. // Call this before the scenario is started to initialize and register search attributes. -func NewWorkflowCompletionChecker(ctx context.Context, info ScenarioInfo, timeout time.Duration) (*WorkflowCompletionChecker, error) { +func NewWorkflowCompletionChecker(ctx context.Context, info ScenarioInfo, timeout time.Duration) (*WorkflowCompletionVerifier, error) { if timeout == 0 { timeout = 30 * time.Second } - checker := &WorkflowCompletionChecker{ + checker := &WorkflowCompletionVerifier{ timeout: timeout, } @@ -57,7 +46,7 @@ func NewWorkflowCompletionChecker(ctx context.Context, info ScenarioInfo, timeou return checker, nil } -func (wct *WorkflowCompletionChecker) init(ctx context.Context, info ScenarioInfo) error { +func (wct *WorkflowCompletionVerifier) init(ctx context.Context, info ScenarioInfo) error { // Store the scenario info for later use wct.info = info @@ -72,8 +61,14 @@ func (wct *WorkflowCompletionChecker) init(ctx context.Context, info ScenarioInf return nil } +// VerifyRun implements the Verifier interface. +// It checks that the expected number of workflows have completed using the provided state. +func (wct *WorkflowCompletionVerifier) VerifyRun(ctx context.Context, info ScenarioInfo, state ExecutorState) []error { + return wct.Verify(ctx, state) +} + // Verify checks that the expected number of workflows have completed. -func (wct *WorkflowCompletionChecker) Verify(ctx context.Context, state ExecutorState) error { +func (wct *WorkflowCompletionVerifier) Verify(ctx context.Context, state ExecutorState) []error { var allErrors []error // Calculate expected workflow count @@ -82,58 +77,52 @@ func (wct *WorkflowCompletionChecker) Verify(ctx context.Context, state Executor expectedCount = wct.expectedWorkflowCount(state) } - // Check that we have completions to verify + // (1) Verify that we have completions at all. if expectedCount == 0 { - return fmt.Errorf("no workflows completed") - } - - query := fmt.Sprintf( - "%s='%s' AND ExecutionStatus = 'Completed'", - OmesExecutionIDSearchAttribute, - wct.info.OmesRunID(), - ) - - verifyCtx, cancel := context.WithTimeout(ctx, wct.timeout) - defer cancel() - - err := MinVisibilityCountEventually( - verifyCtx, - wct.info, - &workflowservice.CountWorkflowExecutionsRequest{ - Namespace: wct.info.Namespace, - Query: query, - }, - expectedCount, - wct.timeout, - ) - - if err != nil { - allErrors = append(allErrors, err) - } - - // If verification failed, query for non-completed workflows to aid debugging - if err != nil { - workflowDetails, listErr := GetNonCompletedWorkflows( - ctx, - wct.info, + allErrors = append(allErrors, fmt.Errorf("no workflows completed")) + } else { + // (2) Verify that all completed workflows have indeed completed. + verifyCtx, cancel := context.WithTimeout(ctx, wct.timeout) + defer cancel() + + query := fmt.Sprintf( + "%s='%s' AND ExecutionStatus = 'Completed'", OmesExecutionIDSearchAttribute, - wct.info.OmesRunID(), - 10, + wct.info.ExecutionID, ) - if listErr != nil { - allErrors = append(allErrors, fmt.Errorf("failed to list non-completed workflows: %w", listErr)) - } else if workflowDetails != "" { - allErrors = append(allErrors, fmt.Errorf("non-completed workflows found:%s", workflowDetails)) + err := MinVisibilityCountEventually( + verifyCtx, + wct.info, + &workflowservice.CountWorkflowExecutionsRequest{ + Namespace: wct.info.Namespace, + Query: query, + }, + expectedCount, + wct.timeout, + ) + if err != nil { + allErrors = append(allErrors, err) } } - return errors.Join(allErrors...) + // (3) Verify that all started workflows have completed. + nonCompletedErrs := GetNonCompletedWorkflows( + ctx, + wct.info, + OmesExecutionIDSearchAttribute, + wct.info.ExecutionID, + 10, + ) + allErrors = append(allErrors, nonCompletedErrs...) + + return allErrors } +// TODO: remove this // VerifyNoRunningWorkflows waits until there are no running workflows on the task queue for the given run ID. // This is useful for scenarios that want to ensure all started workflows have completed. -func (wct *WorkflowCompletionChecker) VerifyNoRunningWorkflows(ctx context.Context) error { +func (wct *WorkflowCompletionVerifier) VerifyNoRunningWorkflows(ctx context.Context) error { query := fmt.Sprintf("TaskQueue = %q and ExecutionStatus = 'Running'", TaskQueueForRun(wct.info.RunID)) diff --git a/scenarios/ebb_and_flow.go b/scenarios/ebb_and_flow.go index 1e71c8a5..d08c5bad 100644 --- a/scenarios/ebb_and_flow.go +++ b/scenarios/ebb_and_flow.go @@ -79,6 +79,13 @@ func init() { " control-interval, max-consecutive-errors, backlog-log-interval.\n" + "Duration must be set.", ExecutorFn: func() loadgen.Executor { return newEbbAndFlowExecutor() }, + VerifyFn: func(ctx context.Context, info loadgen.ScenarioInfo, executor loadgen.Executor) []error { + e := executor.(*ebbAndFlowExecutor) + if e.completionVerifier == nil || e.executorState == nil { + return nil + } + return e.completionVerifier.VerifyRun(ctx, info, *e.executorState) + }, }) } @@ -144,7 +151,7 @@ func (e *ebbAndFlowExecutor) Run(ctx context.Context, info loadgen.ScenarioInfo) } e.ScenarioInfo = info - e.id = fmt.Sprintf("ebb_and_flow_%s", e.OmesRunID()) + e.id = fmt.Sprintf("ebb_and_flow_%s", e.ExecutionID) e.rng = rand.New(rand.NewSource(time.Now().UnixNano())) e.startTime = time.Now() @@ -235,16 +242,6 @@ func (e *ebbAndFlowExecutor) Run(ctx context.Context, info loadgen.ScenarioInfo) return nil } -func (e *ebbAndFlowExecutor) VerifyRun(ctx context.Context, info loadgen.ScenarioInfo) []error { - if e.executorState == nil { - return nil - } - if err := e.completionVerifier.Verify(ctx, *e.executorState); err != nil { - return []error{err} - } - return nil -} - // Snapshot returns a snapshot of the current state. func (e *ebbAndFlowExecutor) Snapshot() any { e.stateLock.Lock() diff --git a/scenarios/throughput_stress.go b/scenarios/throughput_stress.go index 5c4d5b86..85ed360c 100644 --- a/scenarios/throughput_stress.go +++ b/scenarios/throughput_stress.go @@ -62,6 +62,7 @@ type tpsConfig struct { type tpsExecutor struct { executor *loadgen.KitchenSinkExecutor + verifier *tpsVerifier lock sync.Mutex state *tpsState config *tpsConfig @@ -79,6 +80,14 @@ func init() { "Throughput stress scenario. Use --option with '%s', '%s' to control internal parameters", IterFlag, ContinueAsNewAfterIterFlag), ExecutorFn: func() loadgen.Executor { return newThroughputStressExecutor() }, + VerifyFn: func(ctx context.Context, info loadgen.ScenarioInfo, executor loadgen.Executor) []error { + t := executor.(*tpsExecutor) + if t.verifier == nil || t.executor == nil { + return nil + } + state := t.executor.GetState() + return t.verifier.VerifyRun(ctx, info, state) + }, }) } @@ -272,9 +281,18 @@ func (t *tpsExecutor) Run(ctx context.Context, info loadgen.ScenarioInfo) error return completedIterations + completedChildWorkflows + continueAsNewWorkflows } - if err := t.executor.EnableWorkflowCompletionCheck(ctx, info, timeout, expectedWorkflowCount); err != nil { + // Initialize workflow completion checker + completionVerifier, err := loadgen.NewWorkflowCompletionChecker(ctx, info, timeout) + if err != nil { return fmt.Errorf("failed to initialize workflow completion checker: %w", err) } + completionVerifier.SetExpectedWorkflowCount(expectedWorkflowCount) + + // Create verifier that combines workflow completion and throughput checking + t.verifier = &tpsVerifier{ + completionVerifier: completionVerifier, + config: t.config, + } if err := t.executor.Run(ctx, info); err != nil { return err @@ -315,54 +333,6 @@ func (t *tpsExecutor) Run(ctx context.Context, info loadgen.ScenarioInfo) error return nil } -// VerifyRun implements loadgen.VerifyRunnable for post-execution verifications -func (t *tpsExecutor) VerifyRun(ctx context.Context, info loadgen.ScenarioInfo) []error { - var errors []error - - // 1. Delegate to executor's internal verifier - errors = append(errors, t.executor.VerifyRun(ctx, info)...) - - // 2. Check throughput, if configured. - if t.config.MinThroughputPerHour > 0 { - state := t.executor.GetState() - - // Recalculate expected workflow count for throughput check - var continueAsNewWorkflows int - if t.config.ContinueAsNewAfterIter > 0 { - continueAsNewPerIter := (t.config.InternalIterations - 1) / t.config.ContinueAsNewAfterIter - continueAsNewWorkflows = continueAsNewPerIter * state.CompletedIterations - } - completedChildWorkflows := state.CompletedIterations * t.config.InternalIterations - completedWorkflows := state.CompletedIterations + completedChildWorkflows + continueAsNewWorkflows - - // Calculate duration from executor state - var totalDuration time.Duration - if !state.StartedAt.IsZero() && !state.LastCompletedAt.IsZero() { - totalDuration = state.LastCompletedAt.Sub(state.StartedAt) - } - - if totalDuration == 0 { - errors = append(errors, fmt.Errorf("throughput check: no duration recorded (startedAt=%v, lastCompletedAt=%v)", - state.StartedAt, state.LastCompletedAt)) - } else { - actualThroughput := float64(completedWorkflows) / totalDuration.Hours() - - if actualThroughput < t.config.MinThroughputPerHour { - expectedWorkflows := int(totalDuration.Hours() * t.config.MinThroughputPerHour) - errors = append(errors, fmt.Errorf("throughput check: %.1f workflows/hour < %.1f required "+ - "(completed %d workflows, expected %d in %v)", - actualThroughput, - t.config.MinThroughputPerHour, - completedWorkflows, - expectedWorkflows, - totalDuration.Round(time.Second))) - } - } - } - - return errors -} - func (t *tpsExecutor) verifyFirstRun(ctx context.Context, info loadgen.ScenarioInfo, skipCleanNamespaceCheck bool) error { if skipCleanNamespaceCheck { info.Logger.Info("Skipping check to verify if the namespace is clean") @@ -370,7 +340,7 @@ func (t *tpsExecutor) verifyFirstRun(ctx context.Context, info loadgen.ScenarioI } // Complain if there are already existing workflows with the provided run id; unless resuming. - workflowCountQry := fmt.Sprintf("%s='%s'", loadgen.OmesExecutionIDSearchAttribute, info.OmesRunID()) + workflowCountQry := fmt.Sprintf("%s='%s'", loadgen.OmesExecutionIDSearchAttribute, info.ExecutionID) visibilityCount, err := info.Client.CountWorkflow(ctx, &workflowservice.CountWorkflowExecutionsRequest{ Namespace: info.Namespace, Query: workflowCountQry, @@ -537,7 +507,7 @@ func (t *tpsExecutor) createChildWorkflowAction(run *loadgen.Run, childID int) * SearchAttributes: map[string]*common.Payload{ loadgen.OmesExecutionIDSearchAttribute: &common.Payload{ Metadata: map[string][]byte{"encoding": []byte("json/plain"), "type": []byte("Keyword")}, - Data: []byte(fmt.Sprintf("%q", run.OmesRunID())), // quoted to be valid JSON string + Data: []byte(fmt.Sprintf("%q", run.ExecutionID)), // quoted to be valid JSON string }, }, }, @@ -680,3 +650,53 @@ func (t *tpsExecutor) maybeWithStart(likelihood float64) bool { defer t.lock.Unlock() return t.rng.Float64() <= likelihood } + +type tpsVerifier struct { + completionVerifier *loadgen.WorkflowCompletionVerifier + config *tpsConfig +} + +func (v *tpsVerifier) VerifyRun(ctx context.Context, info loadgen.ScenarioInfo, state loadgen.ExecutorState) []error { + var errors []error + + // 1. Delegate to completion verifier + errors = append(errors, v.completionVerifier.VerifyRun(ctx, info, state)...) + + // 2. Check throughput, if configured. + if v.config.MinThroughputPerHour > 0 { + // Recalculate expected workflow count for throughput check + var continueAsNewWorkflows int + if v.config.ContinueAsNewAfterIter > 0 { + continueAsNewPerIter := (v.config.InternalIterations - 1) / v.config.ContinueAsNewAfterIter + continueAsNewWorkflows = continueAsNewPerIter * state.CompletedIterations + } + completedChildWorkflows := state.CompletedIterations * v.config.InternalIterations + completedWorkflows := state.CompletedIterations + completedChildWorkflows + continueAsNewWorkflows + + // Calculate duration from executor state + var totalDuration time.Duration + if !state.StartedAt.IsZero() && !state.LastCompletedAt.IsZero() { + totalDuration = state.LastCompletedAt.Sub(state.StartedAt) + } + + if totalDuration == 0 { + errors = append(errors, fmt.Errorf("throughput check: no duration recorded (startedAt=%v, lastCompletedAt=%v)", + state.StartedAt, state.LastCompletedAt)) + } else { + actualThroughput := float64(completedWorkflows) / totalDuration.Hours() + + if actualThroughput < v.config.MinThroughputPerHour { + expectedWorkflows := int(totalDuration.Hours() * v.config.MinThroughputPerHour) + errors = append(errors, fmt.Errorf("throughput check: %.1f workflows/hour < %.1f required "+ + "(completed %d workflows, expected %d in %v)", + actualThroughput, + v.config.MinThroughputPerHour, + completedWorkflows, + expectedWorkflows, + totalDuration.Round(time.Second))) + } + } + } + + return errors +} diff --git a/scenarios/workflow_completion_checker_test.go b/scenarios/workflow_completion_checker_test.go index 1eadbb6b..0e0f7f99 100644 --- a/scenarios/workflow_completion_checker_test.go +++ b/scenarios/workflow_completion_checker_test.go @@ -3,7 +3,6 @@ package scenarios import ( "context" "fmt" - "strings" "testing" "time" @@ -12,6 +11,7 @@ import ( "github.com/temporalio/omes/loadgen" "github.com/temporalio/omes/loadgen/kitchensink" "github.com/temporalio/omes/workers" + "go.uber.org/zap/zaptest" ) // Test that WorkflowCompletionChecker is able to detect a stuck workflow. @@ -21,14 +21,23 @@ func TestWorkflowCompletionChecker(t *testing.T) { env := workers.SetupTestEnvironment(t, workers.WithExecutorTimeout(5*time.Second)) + testLogger := zaptest.NewLogger(t).Sugar() + scenarioInfo := loadgen.ScenarioInfo{ RunID: fmt.Sprintf("stuck-%d", time.Now().Unix()), ExecutionID: "test-exec-id", Configuration: loadgen.RunConfiguration{ Iterations: 10, }, + Client: env.TemporalClient(), + Namespace: "default", + Logger: testLogger, } + // Create workflow completion verifier + verifier, err := loadgen.NewWorkflowCompletionChecker(t.Context(), scenarioInfo, 30*time.Second) + require.NoError(t, err, "failed to create verifier") + executor := &loadgen.KitchenSinkExecutor{ TestInput: &kitchensink.TestInput{ WorkflowInput: &kitchensink.WorkflowInput{}, @@ -60,17 +69,15 @@ func TestWorkflowCompletionChecker(t *testing.T) { }, } - _, err := env.RunExecutorTest(t, executor, scenarioInfo, clioptions.LangGo) - require.Error(t, err, "Executor should fail because first workflow times out") - - errorMsg := err.Error() - require.True(t, - strings.Contains(errorMsg, "timeout") || - strings.Contains(errorMsg, "Timeout") || - strings.Contains(errorMsg, "deadline") || - strings.Contains(errorMsg, "DeadlineExceeded"), - "Expected timeout-related error, got: %s", errorMsg) + _, err = env.RunExecutorTest(t, executor, scenarioInfo, clioptions.LangGo) + require.Error(t, err, "executor should fail because first iteration times out") + require.Contains(t, err.Error(), "deadline exceeded", "should report timed out iteration") execState := executor.Snapshot().(loadgen.ExecutorState) - require.Equal(t, 9, execState.CompletedIterations, "Should complete 9 iterations") + require.Equal(t, 9, execState.CompletedIterations, "should complete 9 iterations") + + // Verify using the verifier - pass the state directly + verifyErrs := verifier.VerifyRun(t.Context(), scenarioInfo, execState) + require.NotEmpty(t, verifyErrs) + require.Contains(t, verifyErrs[0].Error(), "non-completed workflow: WorkflowID=w-stuck-") } From 71ee24a4864cf7f835f8c91bb37d378a3fe95271 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Mon, 3 Nov 2025 07:43:04 -0800 Subject: [PATCH 05/66] include CAN Signed-off-by: Stephan Behnke --- loadgen/helpers.go | 2 +- scenarios/workflow_completion_checker_test.go | 19 +++++++++++++++++-- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/loadgen/helpers.go b/loadgen/helpers.go index 55952f09..a57efd9b 100644 --- a/loadgen/helpers.go +++ b/loadgen/helpers.go @@ -107,7 +107,7 @@ func MinVisibilityCountEventually( // Returns a list of errors (one per non-completed workflow) with workflow details, or a query error if the list fails. func GetNonCompletedWorkflows(ctx context.Context, info ScenarioInfo, searchAttribute, runID string, limit int32) []error { nonCompletedQuery := fmt.Sprintf( - "%s='%s' AND ExecutionStatus != 'Completed'", + "%s='%s' AND ExecutionStatus != 'Completed' AND ExecutionStatus != 'ContinuedAsNew'", searchAttribute, runID, ) diff --git a/scenarios/workflow_completion_checker_test.go b/scenarios/workflow_completion_checker_test.go index 0e0f7f99..dbc0d168 100644 --- a/scenarios/workflow_completion_checker_test.go +++ b/scenarios/workflow_completion_checker_test.go @@ -11,6 +11,7 @@ import ( "github.com/temporalio/omes/loadgen" "github.com/temporalio/omes/loadgen/kitchensink" "github.com/temporalio/omes/workers" + "go.temporal.io/api/common/v1" "go.uber.org/zap/zaptest" ) @@ -57,7 +58,21 @@ func TestWorkflowCompletionChecker(t *testing.T) { }, }, }, - Concurrent: false, + }, + } + } else if run.Iteration%2 == 0 { + // Have some Continue-As-New. + options.Params.WorkflowInput.InitialActions = []*kitchensink.ActionSet{ + { + Actions: []*kitchensink.Action{ + { + Variant: &kitchensink.Action_ContinueAsNew{ + ContinueAsNew: &kitchensink.ContinueAsNewAction{ + Arguments: []*common.Payload{}, + }, + }, + }, + }, }, } } else { @@ -74,7 +89,7 @@ func TestWorkflowCompletionChecker(t *testing.T) { require.Contains(t, err.Error(), "deadline exceeded", "should report timed out iteration") execState := executor.Snapshot().(loadgen.ExecutorState) - require.Equal(t, 9, execState.CompletedIterations, "should complete 9 iterations") + require.Equal(t, 4, execState.CompletedIterations, "should complete 4 iterations") // Verify using the verifier - pass the state directly verifyErrs := verifier.VerifyRun(t.Context(), scenarioInfo, execState) From 8124a50321bc51ada99d7801b6507bace1df3cd8 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Mon, 3 Nov 2025 09:13:24 -0800 Subject: [PATCH 06/66] reuse Signed-off-by: Stephan Behnke --- scenarios/throughput_stress.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scenarios/throughput_stress.go b/scenarios/throughput_stress.go index 85ed360c..3f2bb343 100644 --- a/scenarios/throughput_stress.go +++ b/scenarios/throughput_stress.go @@ -12,6 +12,7 @@ import ( "github.com/temporalio/omes/loadgen" . "github.com/temporalio/omes/loadgen/kitchensink" "go.temporal.io/api/common/v1" + "go.temporal.io/api/enums/v1" "go.temporal.io/api/workflowservice/v1" "google.golang.org/protobuf/types/known/emptypb" ) @@ -503,7 +504,8 @@ func (t *tpsExecutor) createChildWorkflowAction(run *loadgen.Run, childID int) * }, }), }, - WorkflowId: fmt.Sprintf("%s/child-%d", run.DefaultStartWorkflowOptions().ID, childID), + WorkflowId: fmt.Sprintf("%s/child-%d", run.DefaultStartWorkflowOptions().ID, childID), + WorkflowIdReusePolicy: enums.WorkflowIdReusePolicy(enums.WORKFLOW_ID_CONFLICT_POLICY_USE_EXISTING), SearchAttributes: map[string]*common.Payload{ loadgen.OmesExecutionIDSearchAttribute: &common.Payload{ Metadata: map[string][]byte{"encoding": []byte("json/plain"), "type": []byte("Keyword")}, From a02a954a6cbffa02f27a351d92eeab804b8e7235 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Mon, 3 Nov 2025 09:50:38 -0800 Subject: [PATCH 07/66] add log Signed-off-by: Stephan Behnke --- cmd/cli/run_scenario.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cmd/cli/run_scenario.go b/cmd/cli/run_scenario.go index 1a565a11..9915a69e 100644 --- a/cmd/cli/run_scenario.go +++ b/cmd/cli/run_scenario.go @@ -137,6 +137,8 @@ func (r *scenarioRunner) run(ctx context.Context) error { } // Wait 300ms and try again time.Sleep(300 * time.Millisecond) + + r.logger.Error("Failed to dial, retrying ...", zap.Error(err)) } defer client.Close() From 60fe1fdc121d40ed50bd6b8e1ece0f0e92493c5e Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Wed, 5 Nov 2025 07:31:39 -0800 Subject: [PATCH 08/66] bump timeout Signed-off-by: Stephan Behnke --- loadgen/workflow_completion_checker.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loadgen/workflow_completion_checker.go b/loadgen/workflow_completion_checker.go index cc4e16e1..cd8805f8 100644 --- a/loadgen/workflow_completion_checker.go +++ b/loadgen/workflow_completion_checker.go @@ -32,7 +32,7 @@ func (wct *WorkflowCompletionVerifier) SetExpectedWorkflowCount(fn func(Executor // Call this before the scenario is started to initialize and register search attributes. func NewWorkflowCompletionChecker(ctx context.Context, info ScenarioInfo, timeout time.Duration) (*WorkflowCompletionVerifier, error) { if timeout == 0 { - timeout = 30 * time.Second + timeout = 3 * time.Minute // TODO: set back to 30s } checker := &WorkflowCompletionVerifier{ From cb02c5c432130c4eec20526063bb9eb3c41e4cda Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Wed, 5 Nov 2025 07:35:49 -0800 Subject: [PATCH 09/66] max-consecutive-errors Signed-off-by: Stephan Behnke --- scenarios/state_transitions_steady.go | 42 ++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/scenarios/state_transitions_steady.go b/scenarios/state_transitions_steady.go index 332f7fb9..167343da 100644 --- a/scenarios/state_transitions_steady.go +++ b/scenarios/state_transitions_steady.go @@ -10,6 +10,15 @@ import ( "github.com/temporalio/omes/loadgen/kitchensink" ) +type steadyStateConfig struct { + MaxConsecutiveErrors int +} + +type stateTransitionsSteadyExecutor struct { + loadgen.ScenarioInfo + config *steadyStateConfig +} + func init() { loadgen.MustRegisterScenario(loadgen.Scenario{ Description: "Run a certain number of state transitions per second. This requires duration option to be set " + @@ -17,16 +26,34 @@ func init() { "example, can be run with: run-scenario-with-worker --scenario state_transitions_steady --language go " + "--embedded-server --duration 5m --option state-transitions-per-second=3", ExecutorFn: func() loadgen.Executor { - return loadgen.ExecutorFunc(func(ctx context.Context, runOptions loadgen.ScenarioInfo) error { - return (&stateTransitionsSteady{runOptions}).run(ctx) - }) + return &stateTransitionsSteadyExecutor{} }, }) } -type stateTransitionsSteady struct{ loadgen.ScenarioInfo } +var _ loadgen.Configurable = (*stateTransitionsSteadyExecutor)(nil) + +// Configure initializes the steadyStateConfig by reading scenario options +func (s *stateTransitionsSteadyExecutor) Configure(info loadgen.ScenarioInfo) error { + s.ScenarioInfo = info + s.config = &steadyStateConfig{ + MaxConsecutiveErrors: s.ScenarioOptionInt(MaxConsecutiveErrorsFlag, 5), + } + if s.config.MaxConsecutiveErrors < 1 { + return fmt.Errorf("%s must be at least 1, got %d", MaxConsecutiveErrorsFlag, s.config.MaxConsecutiveErrors) + } + return nil +} + +// Run executes the state transitions steady scenario +func (s *stateTransitionsSteadyExecutor) Run(ctx context.Context, info loadgen.ScenarioInfo) error { + if err := s.Configure(info); err != nil { + return fmt.Errorf("failed to parse scenario configuration: %w", err) + } + return s.run(ctx) +} -func (s *stateTransitionsSteady) run(ctx context.Context) error { +func (s *stateTransitionsSteadyExecutor) run(ctx context.Context) error { // The goal here is to meet a certain number of state transitions per second. // For us this means a certain number of workflows per second. So we must // first execute a basic workflow (i.e. with a simple activity) and get the @@ -88,7 +115,6 @@ func (s *stateTransitionsSteady) run(ctx context.Context) error { // Start a workflow every X interval until duration reached or there are N // start failures in a row - const maxConsecutiveErrors = 5 errCh := make(chan error, 10000) ticker := time.NewTicker(workflowStartInterval) defer ticker.Stop() @@ -104,8 +130,8 @@ func (s *stateTransitionsSteady) run(ctx context.Context) error { consecutiveErrCount = 0 } else { consecutiveErrCount++ - if consecutiveErrCount >= maxConsecutiveErrors { - return fmt.Errorf("got %v consecutive errors, most recent: %w", maxConsecutiveErrors, err) + if consecutiveErrCount >= s.config.MaxConsecutiveErrors { + return fmt.Errorf("got %v consecutive errors, most recent: %w", s.config.MaxConsecutiveErrors, err) } } case <-ticker.C: From 40f750427ae029f31fb735e5c6c7f8e8dde424f9 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Thu, 6 Nov 2025 07:16:58 -0800 Subject: [PATCH 10/66] assert.Unreachable Signed-off-by: Stephan Behnke --- cmd/cli/run_scenario.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/cmd/cli/run_scenario.go b/cmd/cli/run_scenario.go index 9915a69e..a43f5e5b 100644 --- a/cmd/cli/run_scenario.go +++ b/cmd/cli/run_scenario.go @@ -10,6 +10,7 @@ import ( "strings" "time" + "github.com/antithesishq/antithesis-sdk-go/assert" "github.com/spf13/cobra" "github.com/spf13/pflag" "github.com/temporalio/omes/cmd/clioptions" @@ -181,14 +182,16 @@ func (r *scenarioRunner) run(ctx context.Context) error { // Collect all errors var allErrors []error if scenarioErr != nil { - allErrors = append(allErrors, fmt.Errorf("scenario execution: %w", scenarioErr)) + allErrors = append(allErrors, fmt.Errorf("scenario execution failed: %w", scenarioErr)) + assert.Unreachable("scenario execution failed", map[string]any{"error": scenarioErr}) } // 2. Run verifications if scenario.VerifyFn != nil { verifyErrs := scenario.VerifyFn(ctx, scenarioInfo, executor) for _, err := range verifyErrs { - allErrors = append(allErrors, fmt.Errorf("post-scenario verification: %w", err)) + allErrors = append(allErrors, fmt.Errorf("post-scenario verification failed: %w", err)) + assert.Unreachable("post-scenario verification failed", map[string]any{"error": err}) } } From 4b2526941ef4422bbb344bfa686d768267b7de67 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Fri, 7 Nov 2025 15:55:46 -0800 Subject: [PATCH 11/66] Update test_env.go Signed-off-by: Stephan Behnke --- workers/test_env.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/workers/test_env.go b/workers/test_env.go index c61b712c..2296e79d 100644 --- a/workers/test_env.go +++ b/workers/test_env.go @@ -100,6 +100,9 @@ func SetupTestEnvironment(t *testing.T, opts ...TestEnvOption) *TestEnvironment "--search-attribute", "OmesExecutionID=Keyword", "--search-attribute", "KS_Int=Int", "--search-attribute", "KS_Keyword=Keyword", + "--dynamic-config-value", "frontend.workerVersioningDataAPIs=true", + "--dynamic-config-value", "frontend.workerVersioningWorkflowAPIs=true", + "--dynamic-config-value", "frontend.workerVersioningRuleAPIs=true", }, }) require.NoError(t, err, "Failed to start dev server") From 562594d34905ce0e794866b458bb49e5e9aab6ec Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Fri, 7 Nov 2025 15:56:05 -0800 Subject: [PATCH 12/66] wip Signed-off-by: Stephan Behnke --- scenarios/ebb_and_flow.go | 2 +- scenarios/versioning_pinned_workflows.go | 632 ++++++++++++++++++ scenarios/versioning_pinned_workflows_test.go | 115 ++++ workers/test_workers.go | 4 + 4 files changed, 752 insertions(+), 1 deletion(-) create mode 100644 scenarios/versioning_pinned_workflows.go create mode 100644 scenarios/versioning_pinned_workflows_test.go diff --git a/scenarios/ebb_and_flow.go b/scenarios/ebb_and_flow.go index d08c5bad..9c9a40b2 100644 --- a/scenarios/ebb_and_flow.go +++ b/scenarios/ebb_and_flow.go @@ -64,7 +64,7 @@ type ebbAndFlowExecutor struct { completedActivities atomic.Int64 stateLock sync.Mutex state *ebbAndFlowState - completionVerifier *loadgen.WorkflowCompletionChecker + completionVerifier *loadgen.WorkflowCompletionVerifier executorState *loadgen.ExecutorState } diff --git a/scenarios/versioning_pinned_workflows.go b/scenarios/versioning_pinned_workflows.go new file mode 100644 index 00000000..235233fb --- /dev/null +++ b/scenarios/versioning_pinned_workflows.go @@ -0,0 +1,632 @@ +package scenarios + +// versioning_pinned_workflows implements a scenario for testing worker versioning with pinned workflows. +// +// This scenario uses the Worker Deployment APIs for worker versioning (non-deprecated). +// See: https://docs.temporal.io/develop/go/versioning +// +// Implementation approach: +// - Manages Go SDK workers directly within the scenario (not via OMES worker infrastructure) +// - Uses DeploymentOptions to configure workers with deployment names and build IDs +// - Starts multiple workers concurrently with different build IDs to support version bumping +// - Old workers remain running to handle pinned workflows while new workers handle new traffic +// +// The scenario: +// 1. Starts N workflows pinned to an initial version (default: 1) +// 2. Signals all workflows on each iteration +// 3. Bumps the version every N iterations by starting new workers and setting them as current +// 4. Verifies that workflow build IDs always move forward, never backward + +import ( + "context" + "fmt" + "sync" + "time" + + "github.com/temporalio/omes/loadgen" + "github.com/temporalio/omes/loadgen/kitchensink" + commonpb "go.temporal.io/api/common/v1" + "go.temporal.io/api/enums/v1" + "go.temporal.io/api/workflowservice/v1" + "go.temporal.io/sdk/activity" + "go.temporal.io/sdk/client" + "go.temporal.io/sdk/worker" + "go.temporal.io/sdk/workflow" +) + +const ( + // NumWorkflowsFlag controls how many workflows to start on iteration 0 + NumWorkflowsFlag = "num-workflows" + // VersionBumpIntervalFlag controls how many iterations between version bumps + VersionBumpIntervalFlag = "version-bump-interval" + // InitialVersionFlag is the initial version number to pin workflows to (default: 1) + InitialVersionFlag = "initial-version" +) + +type versioningPinnedState struct { + WorkflowIDs []string `json:"workflowIds"` + CurrentVersion string `json:"currentVersion"` + VersionSequence []string `json:"versionSequence"` +} + +type versioningPinnedConfig struct { + NumWorkflows int + VersionBumpInterval int + InitialVersion string +} + +type versioningPinnedExecutor struct { + lock sync.Mutex + state *versioningPinnedState + config *versioningPinnedConfig + workers []worker.Worker // All active workers (one per version) + deploymentName string +} + +var _ loadgen.Configurable = (*versioningPinnedExecutor)(nil) + +// noopActivity is a simple activity for testing +func noopActivity(_ context.Context) error { + return nil +} + +// simpleKitchenSinkWorkflow is a simplified kitchensink workflow for this scenario +// It executes a single activity and then waits indefinitely (until cancelled/terminated) +func simpleKitchenSinkWorkflow(ctx workflow.Context, params *kitchensink.WorkflowInput) (*commonpb.Payload, error) { + // Execute a simple activity to generate history with build ID + ao := workflow.ActivityOptions{ + StartToCloseTimeout: 10 * time.Second, + } + activityCtx := workflow.WithActivityOptions(ctx, ao) + if err := workflow.ExecuteActivity(activityCtx, "noop").Get(activityCtx, nil); err != nil { + return nil, err + } + + // Wait for signals indefinitely + signalChan := workflow.GetSignalChannel(ctx, "do_signal") + selector := workflow.NewSelector(ctx) + + // Keep workflow alive by continuously waiting for signals + for { + selector.AddReceive(signalChan, func(c workflow.ReceiveChannel, more bool) { + // Receive signal with the correct type (kitchensink.DoSignal) + var signal kitchensink.DoSignal + c.Receive(ctx, &signal) + + // Execute another activity when signaled + ao := workflow.ActivityOptions{ + StartToCloseTimeout: 10 * time.Second, + } + activityCtx := workflow.WithActivityOptions(ctx, ao) + _ = workflow.ExecuteActivity(activityCtx, "noop").Get(activityCtx, nil) + }) + + selector.Select(ctx) + + // Create a new selector for the next iteration + selector = workflow.NewSelector(ctx) + } +} + +func init() { + loadgen.MustRegisterScenario(loadgen.Scenario{ + Description: fmt.Sprintf( + "Worker versioning scenario with pinned workflows. Starts n workflows pinned to version 1, "+ + "signals them each iteration, and bumps versions every n iterations. "+ + "Use --option with '%s' (default: 10), '%s' (default: 5), '%s' (default: 1)", + NumWorkflowsFlag, VersionBumpIntervalFlag, InitialVersionFlag), + ExecutorFn: func() loadgen.Executor { return newVersioningPinnedExecutor() }, + VerifyFn: func(ctx context.Context, info loadgen.ScenarioInfo, executor loadgen.Executor) []error { + e := executor.(*versioningPinnedExecutor) + return e.Verify(ctx, info) + }, + }) +} + +func newVersioningPinnedExecutor() *versioningPinnedExecutor { + return &versioningPinnedExecutor{ + state: &versioningPinnedState{ + WorkflowIDs: []string{}, + CurrentVersion: "", + VersionSequence: []string{}, + }, + } +} + +// Configure initializes the executor configuration from scenario options. +func (e *versioningPinnedExecutor) Configure(info loadgen.ScenarioInfo) error { + initialVersionNum := info.ScenarioOptionInt(InitialVersionFlag, 1) + + config := &versioningPinnedConfig{ + NumWorkflows: info.ScenarioOptionInt(NumWorkflowsFlag, 10), + VersionBumpInterval: info.ScenarioOptionInt(VersionBumpIntervalFlag, 5), + InitialVersion: fmt.Sprintf("%d", initialVersionNum), + } + + if config.NumWorkflows <= 0 { + return fmt.Errorf("%s must be positive, got %d", NumWorkflowsFlag, config.NumWorkflows) + } + + if config.VersionBumpInterval <= 0 { + return fmt.Errorf("%s must be positive, got %d", VersionBumpIntervalFlag, config.VersionBumpInterval) + } + + if initialVersionNum <= 0 { + return fmt.Errorf("%s must be positive, got %d", InitialVersionFlag, initialVersionNum) + } + + e.config = config + return nil +} + +// startWorker creates and starts a new worker with the specified build ID and deployment options. +func (e *versioningPinnedExecutor) startWorker(ctx context.Context, info loadgen.ScenarioInfo, buildID string) (worker.Worker, error) { + taskQueue := info.RunID + ".local" + + // Create worker with deployment options + w := worker.New(info.Client, taskQueue, worker.Options{ + BuildID: buildID, + UseBuildIDForVersioning: true, + DeploymentOptions: worker.DeploymentOptions{ + UseVersioning: true, + Version: worker.WorkerDeploymentVersion{ + DeploymentName: e.deploymentName, + BuildID: buildID, + }, + // Use Pinned behavior by default - workflows stay on the version they started with + DefaultVersioningBehavior: workflow.VersioningBehaviorPinned, + }, + }) + + // Register workflow and activities + w.RegisterWorkflowWithOptions(simpleKitchenSinkWorkflow, workflow.RegisterOptions{Name: "kitchenSink"}) + w.RegisterActivityWithOptions(noopActivity, activity.RegisterOptions{Name: "noop"}) + + // Start the worker + if err := w.Start(); err != nil { + return nil, fmt.Errorf("failed to start worker with build ID %s: %w", buildID, err) + } + + info.Logger.Infof("Started worker with build ID %s on task queue %s", buildID, taskQueue) + return w, nil +} + +// stopAllWorkers stops all running workers. +func (e *versioningPinnedExecutor) stopAllWorkers() { + e.lock.Lock() + workers := e.workers + e.workers = nil + e.lock.Unlock() + + for _, w := range workers { + if w != nil { + w.Stop() + } + } +} + +// Run executes the versioning scenario. +func (e *versioningPinnedExecutor) Run(ctx context.Context, info loadgen.ScenarioInfo) error { + if err := e.Configure(info); err != nil { + return fmt.Errorf("failed to configure scenario: %w", err) + } + + e.lock.Lock() + e.state.CurrentVersion = e.config.InitialVersion + e.state.VersionSequence = []string{e.config.InitialVersion} + e.deploymentName = fmt.Sprintf("omes-deployment-%s", info.RunID) + e.lock.Unlock() + + // Ensure all workers are stopped when we exit + defer e.stopAllWorkers() + + // Calculate total iterations + totalIterations := info.Configuration.Iterations + if totalIterations == 0 && info.Configuration.Duration > 0 { + // Estimate iterations based on duration (assuming ~1 iteration per second) + totalIterations = int(info.Configuration.Duration.Seconds()) + } + + for iteration := 0; iteration < totalIterations; iteration++ { + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + + if iteration == 0 { + // Iteration 0: Start worker with initial version and start workflows + w, err := e.startWorker(ctx, info, e.state.CurrentVersion) + if err != nil { + return fmt.Errorf("failed to start initial worker: %w", err) + } + e.lock.Lock() + e.workers = append(e.workers, w) + e.lock.Unlock() + + // Wait for worker to be ready + time.Sleep(1 * time.Second) + + // Start n kitchensink workflows + if err := e.startWorkflows(ctx, info, iteration); err != nil { + return fmt.Errorf("failed to start workflows on iteration 0: %w", err) + } + info.Logger.Infof("Started %d workflows pinned to version %s", e.config.NumWorkflows, e.state.CurrentVersion) + } else { + // Check if we need to bump the version + if iteration > 0 && iteration%e.config.VersionBumpInterval == 0 { + if err := e.bumpVersion(ctx, info); err != nil { + return fmt.Errorf("failed to bump version on iteration %d: %w", iteration, err) + } + } + + // Send signals to all workflows + if err := e.signalAllWorkflows(ctx, info, iteration); err != nil { + // Log signal failures but don't fail the scenario (as per requirements) + info.Logger.Warnf("Some signals failed on iteration %d: %v", iteration, err) + } + } + + // Add a small delay between iterations to avoid overwhelming the system + time.Sleep(100 * time.Millisecond) + } + + // After all iterations, terminate workflows to complete the scenario + info.Logger.Info("Terminating workflows after scenario completion") + e.lock.Lock() + workflowIDs := make([]string, len(e.state.WorkflowIDs)) + copy(workflowIDs, e.state.WorkflowIDs) + e.lock.Unlock() + + for _, workflowID := range workflowIDs { + err := info.Client.TerminateWorkflow(ctx, workflowID, "", "scenario completed") + if err != nil { + info.Logger.Warnf("Failed to terminate workflow %s: %v", workflowID, err) + } + } + + return nil +} + +// startWorkflows starts n kitchensink workflows pinned to the current version. +func (e *versioningPinnedExecutor) startWorkflows(ctx context.Context, info loadgen.ScenarioInfo, iteration int) error { + e.lock.Lock() + currentVersion := e.state.CurrentVersion + deploymentName := e.deploymentName + e.lock.Unlock() + + taskQueue := info.RunID + ".local" + + // Set the current version as the deployment's current version + // The worker has already registered the deployment, now we set it as current + if err := e.setupVersioning(ctx, info.Client, info.Namespace, deploymentName, currentVersion); err != nil { + return fmt.Errorf("failed to setup versioning: %w", err) + } + + var wg sync.WaitGroup + errChan := make(chan error, e.config.NumWorkflows) + + for i := 0; i < e.config.NumWorkflows; i++ { + wg.Add(1) + go func(workflowNum int) { + defer wg.Done() + + workflowID := fmt.Sprintf("%s-versioned-%d", info.RunID, workflowNum) + + // Create a long-running workflow that waits for signals + testInput := &kitchensink.TestInput{ + WorkflowInput: &kitchensink.WorkflowInput{ + InitialActions: []*kitchensink.ActionSet{ + { + Actions: []*kitchensink.Action{ + // Set a workflow state to track initialization + kitchensink.NewSetWorkflowStateAction(fmt.Sprintf("workflow-%d-started", workflowNum), "true"), + // Execute a noop activity to generate some history with build ID + kitchensink.GenericActivity("noop", kitchensink.DefaultRemoteActivity), + // Wait for completion signal (this state will be set by a final signal) + kitchensink.NewAwaitWorkflowStateAction(fmt.Sprintf("workflow-%d-complete", workflowNum), "true"), + }, + Concurrent: false, + }, + }, + }, + } + + options := client.StartWorkflowOptions{ + ID: workflowID, + TaskQueue: taskQueue, + WorkflowExecutionTimeout: 24 * time.Hour, + SearchAttributes: map[string]any{ + loadgen.OmesExecutionIDSearchAttribute: info.ExecutionID, + }, + } + + _, err := info.Client.ExecuteWorkflow( + ctx, + options, + "kitchenSink", + testInput.WorkflowInput, + ) + if err != nil { + errChan <- fmt.Errorf("failed to start workflow %s: %w", workflowID, err) + return + } + + e.lock.Lock() + e.state.WorkflowIDs = append(e.state.WorkflowIDs, workflowID) + e.lock.Unlock() + }(i) + } + + wg.Wait() + close(errChan) + + // Collect any errors + var errs []error + for err := range errChan { + errs = append(errs, err) + } + + if len(errs) > 0 { + return fmt.Errorf("failed to start %d workflows: %v", len(errs), errs[0]) + } + + return nil +} + +// setupVersioning configures the worker versioning for the deployment using Worker Deployment APIs. +func (e *versioningPinnedExecutor) setupVersioning(ctx context.Context, c client.Client, namespace, deploymentName, buildID string) error { + // Set the build ID as the current deployment version + _, err := c.WorkflowService().SetWorkerDeploymentCurrentVersion(ctx, &workflowservice.SetWorkerDeploymentCurrentVersionRequest{ + Namespace: namespace, + DeploymentName: deploymentName, + BuildId: buildID, + }) + if err != nil { + return fmt.Errorf("failed to set version %s as current for deployment %s: %w", buildID, deploymentName, err) + } + + return nil +} + +// bumpVersion increases the version, starts a new worker with the new build ID, and sets it as current. +func (e *versioningPinnedExecutor) bumpVersion(ctx context.Context, info loadgen.ScenarioInfo) error { + e.lock.Lock() + // Parse current version (e.g., "1" -> 1) and increment + var versionNum int + _, err := fmt.Sscanf(e.state.CurrentVersion, "%d", &versionNum) + if err != nil { + e.lock.Unlock() + return fmt.Errorf("failed to parse version %s: %w", e.state.CurrentVersion, err) + } + + versionNum++ + newVersion := fmt.Sprintf("%d", versionNum) + e.lock.Unlock() + + // Start a new worker with the new build ID + // This keeps the old worker running to handle pinned workflows + w, err := e.startWorker(ctx, info, newVersion) + if err != nil { + return fmt.Errorf("failed to start worker for version %s: %w", newVersion, err) + } + + e.lock.Lock() + e.workers = append(e.workers, w) + deploymentName := e.deploymentName + e.lock.Unlock() + + // Wait for worker to be ready + time.Sleep(1 * time.Second) + + // Set the new version as the current deployment version + _, err = info.Client.WorkflowService().SetWorkerDeploymentCurrentVersion(ctx, &workflowservice.SetWorkerDeploymentCurrentVersionRequest{ + Namespace: info.Namespace, + DeploymentName: deploymentName, + BuildId: newVersion, + }) + if err != nil { + return fmt.Errorf("failed to set version %s as current: %w", newVersion, err) + } + + e.lock.Lock() + info.Logger.Infof("Bumped version from %s to %s", e.state.CurrentVersion, newVersion) + e.state.CurrentVersion = newVersion + e.state.VersionSequence = append(e.state.VersionSequence, newVersion) + e.lock.Unlock() + + return nil +} + +// signalAllWorkflows sends a signal to all tracked workflows. +func (e *versioningPinnedExecutor) signalAllWorkflows(ctx context.Context, info loadgen.ScenarioInfo, iteration int) error { + e.lock.Lock() + workflowIDs := make([]string, len(e.state.WorkflowIDs)) + copy(workflowIDs, e.state.WorkflowIDs) + e.lock.Unlock() + + var wg sync.WaitGroup + errChan := make(chan error, len(workflowIDs)) + + for _, workflowID := range workflowIDs { + wg.Add(1) + go func(wfID string) { + defer wg.Done() + + // Send a signal that executes a simple action + signalAction := &kitchensink.DoSignal{ + Variant: &kitchensink.DoSignal_DoSignalActions_{ + DoSignalActions: &kitchensink.DoSignal_DoSignalActions{ + Variant: &kitchensink.DoSignal_DoSignalActions_DoActions{ + DoActions: kitchensink.SingleActionSet( + // Execute a noop activity as part of signal processing + kitchensink.GenericActivity("noop", kitchensink.DefaultLocalActivity), + ), + }, + }, + }, + } + + err := info.Client.SignalWorkflow( + ctx, + wfID, + "", + "do_signal", + signalAction, + ) + if err != nil { + // As per requirements, we ignore signal failures + info.Logger.Warnf("Signal failed for workflow %s: %v", wfID, err) + } + }(workflowID) + } + + wg.Wait() + close(errChan) + + return nil +} + +// Verify checks that each workflow's build ID always moved forward and never backward. +func (e *versioningPinnedExecutor) Verify(ctx context.Context, info loadgen.ScenarioInfo) []error { + e.lock.Lock() + workflowIDs := make([]string, len(e.state.WorkflowIDs)) + copy(workflowIDs, e.state.WorkflowIDs) + e.lock.Unlock() + + var errors []error + var errorsMutex sync.Mutex + + // Check each workflow's history + var wg sync.WaitGroup + for _, workflowID := range workflowIDs { + wg.Add(1) + go func(wfID string) { + defer wg.Done() + + violations := e.checkWorkflowHistory(ctx, info, wfID) + if len(violations) > 0 { + errorsMutex.Lock() + errors = append(errors, violations...) + errorsMutex.Unlock() + } + }(workflowID) + } + + wg.Wait() + + if len(errors) == 0 { + info.Logger.Infof("Verification passed: All %d workflows maintained forward-only version progression", len(workflowIDs)) + } else { + info.Logger.Errorf("Verification failed: Found %d version progression violations", len(errors)) + } + + return errors +} + +// checkWorkflowHistory checks a workflow's versioning info for build ID violations. +func (e *versioningPinnedExecutor) checkWorkflowHistory(ctx context.Context, info loadgen.ScenarioInfo, workflowID string) []error { + var errors []error + + // Get workflow execution description to access versioning info + describeResp, err := info.Client.WorkflowService().DescribeWorkflowExecution(ctx, &workflowservice.DescribeWorkflowExecutionRequest{ + Namespace: info.Namespace, + Execution: &commonpb.WorkflowExecution{ + WorkflowId: workflowID, + }, + }) + if err != nil { + errors = append(errors, fmt.Errorf("workflow %s: failed to describe execution: %w", workflowID, err)) + return errors + } + + versioningInfo := describeResp.WorkflowExecutionInfo.GetVersioningInfo() + if versioningInfo == nil { + errors = append(errors, fmt.Errorf("workflow %s: no versioning info found", workflowID)) + return errors + } + + // Get workflow history to track build ID sequence + historyIter := info.Client.GetWorkflowHistory( + ctx, + workflowID, + "", + false, + enums.HISTORY_EVENT_FILTER_TYPE_ALL_EVENT, + ) + + var buildIDSequence []string + buildIDVersionMap := make(map[string]int) // Map build IDs to version numbers + + // Parse version numbers from version sequence + e.lock.Lock() + for _, version := range e.state.VersionSequence { + var versionNum int + fmt.Sscanf(version, "%d", &versionNum) + buildIDVersionMap[version] = versionNum + } + e.lock.Unlock() + + // Iterate through history events to track build ID progression + // Use Started events (not deprecated) instead of Completed events + for historyIter.HasNext() { + event, err := historyIter.Next() + if err != nil { + errors = append(errors, fmt.Errorf("workflow %s: failed to read history: %w", workflowID, err)) + return errors + } + + // Check for build ID in Started events (GetWorkerVersion on Started events is not deprecated) + var buildID string + switch event.EventType { + case enums.EVENT_TYPE_WORKFLOW_TASK_STARTED: + if event.GetWorkflowTaskStartedEventAttributes() != nil && + event.GetWorkflowTaskStartedEventAttributes().GetWorkerVersion() != nil { + buildID = event.GetWorkflowTaskStartedEventAttributes().GetWorkerVersion().GetBuildId() + } + case enums.EVENT_TYPE_ACTIVITY_TASK_STARTED: + if event.GetActivityTaskStartedEventAttributes() != nil && + event.GetActivityTaskStartedEventAttributes().GetWorkerVersion() != nil { + buildID = event.GetActivityTaskStartedEventAttributes().GetWorkerVersion().GetBuildId() + } + } + + // If we found a build ID, track it (avoid duplicates) + if buildID != "" { + if len(buildIDSequence) == 0 || buildIDSequence[len(buildIDSequence)-1] != buildID { + buildIDSequence = append(buildIDSequence, buildID) + } + } + } + + // Check that build IDs never moved backward + for i := 1; i < len(buildIDSequence); i++ { + prevBuildID := buildIDSequence[i-1] + currBuildID := buildIDSequence[i] + + prevVersion, prevExists := buildIDVersionMap[prevBuildID] + currVersion, currExists := buildIDVersionMap[currBuildID] + + if !prevExists { + errors = append(errors, fmt.Errorf( + "workflow %s: unknown build ID '%s' at position %d in sequence", + workflowID, prevBuildID, i-1)) + continue + } + + if !currExists { + errors = append(errors, fmt.Errorf( + "workflow %s: unknown build ID '%s' at position %d in sequence", + workflowID, currBuildID, i)) + continue + } + + if currVersion < prevVersion { + errors = append(errors, fmt.Errorf( + "workflow %s: build ID moved backward from %s (%d) to %s (%d) at history position %d", + workflowID, prevBuildID, prevVersion, currBuildID, currVersion, i)) + } + } + + return errors +} diff --git a/scenarios/versioning_pinned_workflows_test.go b/scenarios/versioning_pinned_workflows_test.go new file mode 100644 index 00000000..d18f058e --- /dev/null +++ b/scenarios/versioning_pinned_workflows_test.go @@ -0,0 +1,115 @@ +package scenarios + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/stretchr/testify/require" + "github.com/temporalio/omes/cmd/clioptions" + "github.com/temporalio/omes/loadgen" + "github.com/temporalio/omes/workers" + "go.temporal.io/api/enums/v1" + "go.uber.org/zap/zaptest" +) + +func TestVersioningPinnedWorkflows(t *testing.T) { + t.Parallel() + + runID := fmt.Sprintf("vpw-%d", time.Now().Unix()) + + env := workers.SetupTestEnvironment(t, + workers.WithExecutorTimeout(2*time.Minute)) + + scenarioInfo := loadgen.ScenarioInfo{ + RunID: runID, + ExecutionID: "test-exec-id", + Configuration: loadgen.RunConfiguration{ + Iterations: 12, // 0 (start) + 11 iterations, will bump versions at 5 and 10 + }, + ScenarioOptions: map[string]string{ + NumWorkflowsFlag: "3", // Start 3 workflows + VersionBumpIntervalFlag: "5", // Bump every 5 iterations + InitialVersionFlag: "1", // Start with version 1 + }, + } + + t.Run("Run executor", func(t *testing.T) { + executor := newVersioningPinnedExecutor() + + _, err := env.RunExecutorTest(t, executor, scenarioInfo, clioptions.LangGo) + require.NoError(t, err, "Executor should complete successfully") + + executor.lock.Lock() + state := *executor.state + executor.lock.Unlock() + + require.Len(t, state.WorkflowIDs, 3, "Should have started 3 workflows") + require.Equal(t, "3", state.CurrentVersion, "Should have bumped to 3 (1->2 at iter 5, 2->3 at iter 10)") + require.Equal(t, []string{"1", "2", "3"}, state.VersionSequence, "Should track all versions") + }) + + t.Run("Verify checks build ID progression", func(t *testing.T) { + executor := newVersioningPinnedExecutor() + + // Run a simple scenario + shortScenarioInfo := loadgen.ScenarioInfo{ + RunID: fmt.Sprintf("vpw-verify-%d", time.Now().Unix()), + ExecutionID: "test-verify-exec-id", + Configuration: loadgen.RunConfiguration{ + Iterations: 7, // 0 (start) + 6 iterations, will bump at iteration 5 + }, + ScenarioOptions: map[string]string{ + NumWorkflowsFlag: "2", + VersionBumpIntervalFlag: "5", + InitialVersionFlag: "1", // Start with version 1 + }, + } + + _, err := env.RunExecutorTest(t, executor, shortScenarioInfo, clioptions.LangGo) + require.NoError(t, err, "Executor should complete successfully") + + // Now verify the workflows + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + // Create a new scenario info for verification with proper client setup + verifyInfo := loadgen.ScenarioInfo{ + RunID: shortScenarioInfo.RunID, + ExecutionID: shortScenarioInfo.ExecutionID, + Client: env.TemporalClient(), + Logger: zaptest.NewLogger(t).Sugar(), + Namespace: "default", + } + + errors := executor.Verify(ctx, verifyInfo) + require.Empty(t, errors, "Verification should pass with no errors") + + executor.lock.Lock() + state := *executor.state + executor.lock.Unlock() + + require.Len(t, state.WorkflowIDs, 2) + require.Equal(t, "2", state.CurrentVersion, "Should have bumped to 2") + + // Verify we can read the workflow histories + for _, workflowID := range state.WorkflowIDs { + iter := env.TemporalClient().GetWorkflowHistory( + ctx, + workflowID, + "", + false, + enums.HISTORY_EVENT_FILTER_TYPE_ALL_EVENT, + ) + + eventCount := 0 + for iter.HasNext() { + _, err := iter.Next() + require.NoError(t, err) + eventCount++ + } + require.Greater(t, eventCount, 0, "Should have history events for workflow %s", workflowID) + } + }) +} diff --git a/workers/test_workers.go b/workers/test_workers.go index 9f4bdcfc..e72251a1 100644 --- a/workers/test_workers.go +++ b/workers/test_workers.go @@ -114,6 +114,10 @@ func (w *workerPool) startWorker( PreparedLogger: logger.Named(fmt.Sprintf("%s-worker", sdk)), }, } + // Configure build ID for versioning if specified in scenario options + if buildID, ok := scenarioInfo.ScenarioOptions["worker-build-id"]; ok && buildID != "" { + runner.WorkerOptions.FlagSet("worker-").Set("worker-build-id", buildID) + } runner.ClientOptions.FlagSet().Set("server-address", w.env.DevServerAddress()) runner.ClientOptions.FlagSet().Set("namespace", testNamespace) workerDone <- runner.Run(ctx, baseDir) From e0d2d1d44feeef2617a6239d68d8159d35356fa9 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Sun, 9 Nov 2025 11:58:08 -0800 Subject: [PATCH 13/66] Update run_scenario_with_worker.go Signed-off-by: Stephan Behnke --- cmd/cli/run_scenario_with_worker.go | 1 + 1 file changed, 1 insertion(+) diff --git a/cmd/cli/run_scenario_with_worker.go b/cmd/cli/run_scenario_with_worker.go index 327e6485..9fd2f5bb 100644 --- a/cmd/cli/run_scenario_with_worker.go +++ b/cmd/cli/run_scenario_with_worker.go @@ -80,6 +80,7 @@ func (r *workerWithScenarioRunner) run(ctx context.Context) error { maxIterationsPerSecond: r.maxIterationsPerSecond, scenarioOptions: r.scenarioOptions, timeout: r.timeout, + verificationTimeout: r.verificationTimeout, doNotRegisterSearchAttributes: r.doNotRegisterSearchAttributes, }, clientOptions: r.ClientOptions, From 481e51fa79d724b5a6d073b7ec9102c6a73f064f Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Sun, 9 Nov 2025 12:07:55 -0800 Subject: [PATCH 14/66] Update run_scenario.go Signed-off-by: Stephan Behnke --- cmd/cli/run_scenario.go | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/cmd/cli/run_scenario.go b/cmd/cli/run_scenario.go index a43f5e5b..66e8b7e2 100644 --- a/cmd/cli/run_scenario.go +++ b/cmd/cli/run_scenario.go @@ -60,6 +60,7 @@ type scenarioRunConfig struct { scenarioOptions []string timeout time.Duration doNotRegisterSearchAttributes bool + verificationTimeout time.Duration } func (r *scenarioRunner) addCLIFlags(fs *pflag.FlagSet) { @@ -85,6 +86,8 @@ func (r *scenarioRunConfig) addCLIFlags(fs *pflag.FlagSet) { fs.BoolVar(&r.doNotRegisterSearchAttributes, "do-not-register-search-attributes", false, "Do not register the default search attributes used by scenarios. "+ "If the search attributes are not registed by the scenario they must be registered through some other method") + fs.DurationVar(&r.verificationTimeout, "verification-timeout", 2*time.Minute, + "Maximum duration to wait for post-scenario verification (default 2m).") } func (r *scenarioRunner) preRun() { @@ -99,6 +102,8 @@ func (r *scenarioRunner) run(ctx context.Context) error { return fmt.Errorf("run ID not found") } else if r.iterations > 0 && r.duration > 0 { return fmt.Errorf("cannot provide both iterations and duration") + } else if r.verificationTimeout <= 0 { + return fmt.Errorf("verification-timeout must be greater than 0") } // Parse options @@ -186,9 +191,12 @@ func (r *scenarioRunner) run(ctx context.Context) error { assert.Unreachable("scenario execution failed", map[string]any{"error": scenarioErr}) } + verifyCtx, verifyCancel := context.WithTimeout(ctx, r.verificationTimeout) + defer verifyCancel() + // 2. Run verifications if scenario.VerifyFn != nil { - verifyErrs := scenario.VerifyFn(ctx, scenarioInfo, executor) + verifyErrs := scenario.VerifyFn(verifyCtx, scenarioInfo, executor) for _, err := range verifyErrs { allErrors = append(allErrors, fmt.Errorf("post-scenario verification failed: %w", err)) assert.Unreachable("post-scenario verification failed", map[string]any{"error": err}) From 2a36807f3ff9e89f246dddf0f027890553657748 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Sun, 9 Nov 2025 12:11:02 -0800 Subject: [PATCH 15/66] fix Signed-off-by: Stephan Behnke --- loadgen/helpers.go | 38 +++++++++++------------ loadgen/workflow_completion_checker.go | 42 +++++++++++++++----------- 2 files changed, 41 insertions(+), 39 deletions(-) diff --git a/loadgen/helpers.go b/loadgen/helpers.go index a57efd9b..5becd126 100644 --- a/loadgen/helpers.go +++ b/loadgen/helpers.go @@ -49,10 +49,7 @@ func MinVisibilityCountEventually( info ScenarioInfo, request *workflowservice.CountWorkflowExecutionsRequest, minCount int, - waitAtMost time.Duration, ) error { - timeoutCtx, cancel := context.WithTimeout(ctx, waitAtMost) - defer cancel() countTicker := time.NewTicker(3 * time.Second) defer countTicker.Stop() @@ -61,46 +58,45 @@ func MinVisibilityCountEventually( defer printTicker.Stop() var lastVisibilityCount int64 - done := false check := func() error { - visibilityCount, err := info.Client.CountWorkflow(timeoutCtx, request) + visibilityCount, err := info.Client.CountWorkflow(ctx, request) if err != nil { return fmt.Errorf("failed to count workflows in visibility: %w", err) } lastVisibilityCount = visibilityCount.Count - if lastVisibilityCount >= int64(minCount) { - done = true - } return nil } - // Initial check before entering the loop. + // Initial check if err := check(); err != nil { return err } + if lastVisibilityCount >= int64(minCount) { + return nil + } - // Loop until we reach the desired count or timeout. - for !done { + for { select { - case <-timeoutCtx.Done(): - return fmt.Errorf( - "expected at least %d workflows in visibility, got %d after waiting %v", - minCount, lastVisibilityCount, waitAtMost, - ) - + case <-ctx.Done(): + // Context ended (deadline or cancellation). Return success only if min reached. + if lastVisibilityCount >= int64(minCount) { + return nil + } + return fmt.Errorf("expected at least %d workflows in visibility, got %d (context done)", + minCount, lastVisibilityCount) case <-printTicker.C: - info.Logger.Infof("current visibility count: %d (expected at least: %d)\n", + info.Logger.Infof("current visibility count: %d (expected at least: %d)", lastVisibilityCount, minCount) - case <-countTicker.C: if err := check(); err != nil { return err } + if lastVisibilityCount >= int64(minCount) { + return nil + } } } - - return nil } // GetNonCompletedWorkflows queries and returns an error for each non-completed workflow. diff --git a/loadgen/workflow_completion_checker.go b/loadgen/workflow_completion_checker.go index cd8805f8..ed1b968d 100644 --- a/loadgen/workflow_completion_checker.go +++ b/loadgen/workflow_completion_checker.go @@ -15,8 +15,7 @@ type WorkflowCompletionVerifier struct { // expectedWorkflowCount is an optional function to calculate the expected number of workflows // from the ExecutorState. If nil, defaults to using state.CompletedIterations. expectedWorkflowCount func(ExecutorState) int - // timeout is the maximum time to wait for workflow completion verification in visibility. - timeout time.Duration + // info is the scenario information stored during initialization. info ScenarioInfo } @@ -31,13 +30,7 @@ func (wct *WorkflowCompletionVerifier) SetExpectedWorkflowCount(fn func(Executor // If timeout is zero, it uses a default of 30 seconds. // Call this before the scenario is started to initialize and register search attributes. func NewWorkflowCompletionChecker(ctx context.Context, info ScenarioInfo, timeout time.Duration) (*WorkflowCompletionVerifier, error) { - if timeout == 0 { - timeout = 3 * time.Minute // TODO: set back to 30s - } - - checker := &WorkflowCompletionVerifier{ - timeout: timeout, - } + checker := &WorkflowCompletionVerifier{} if err := checker.init(ctx, info); err != nil { return nil, err @@ -82,8 +75,6 @@ func (wct *WorkflowCompletionVerifier) Verify(ctx context.Context, state Executo allErrors = append(allErrors, fmt.Errorf("no workflows completed")) } else { // (2) Verify that all completed workflows have indeed completed. - verifyCtx, cancel := context.WithTimeout(ctx, wct.timeout) - defer cancel() query := fmt.Sprintf( "%s='%s' AND ExecutionStatus = 'Completed'", @@ -91,15 +82,24 @@ func (wct *WorkflowCompletionVerifier) Verify(ctx context.Context, state Executo wct.info.ExecutionID, ) + // Bound waits to the parent context's deadline; otherwise allow up to 24h. + var waitAtMost time.Duration + if dl, ok := ctx.Deadline(); ok { + waitAtMost = time.Until(dl) + if waitAtMost < 0 { + waitAtMost = 0 + } + } else { + waitAtMost = 24 * time.Hour + } err := MinVisibilityCountEventually( - verifyCtx, + ctx, wct.info, &workflowservice.CountWorkflowExecutionsRequest{ Namespace: wct.info.Namespace, Query: query, }, expectedCount, - wct.timeout, ) if err != nil { allErrors = append(allErrors, err) @@ -126,17 +126,23 @@ func (wct *WorkflowCompletionVerifier) VerifyNoRunningWorkflows(ctx context.Cont query := fmt.Sprintf("TaskQueue = %q and ExecutionStatus = 'Running'", TaskQueueForRun(wct.info.RunID)) - verifyCtx, cancel := context.WithTimeout(ctx, wct.timeout) - defer cancel() - + // Bound waits to the parent context's deadline; otherwise allow up to 24h. + var waitAtMost time.Duration + if dl, ok := ctx.Deadline(); ok { + waitAtMost = time.Until(dl) + if waitAtMost < 0 { + waitAtMost = 0 + } + } else { + waitAtMost = 24 * time.Hour + } return MinVisibilityCountEventually( - verifyCtx, + ctx, wct.info, &workflowservice.CountWorkflowExecutionsRequest{ Namespace: wct.info.Namespace, Query: query, }, 0, - wct.timeout, ) } From 9c265a55b3a823a6d9d7c51486cfda8d5e195c87 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Sun, 9 Nov 2025 12:12:54 -0800 Subject: [PATCH 16/66] Update versioning_pinned_workflows.go Signed-off-by: Stephan Behnke --- scenarios/versioning_pinned_workflows.go | 37 +++++++++++++++++------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/scenarios/versioning_pinned_workflows.go b/scenarios/versioning_pinned_workflows.go index 235233fb..3f201dcd 100644 --- a/scenarios/versioning_pinned_workflows.go +++ b/scenarios/versioning_pinned_workflows.go @@ -376,17 +376,34 @@ func (e *versioningPinnedExecutor) startWorkflows(ctx context.Context, info load // setupVersioning configures the worker versioning for the deployment using Worker Deployment APIs. func (e *versioningPinnedExecutor) setupVersioning(ctx context.Context, c client.Client, namespace, deploymentName, buildID string) error { - // Set the build ID as the current deployment version - _, err := c.WorkflowService().SetWorkerDeploymentCurrentVersion(ctx, &workflowservice.SetWorkerDeploymentCurrentVersionRequest{ - Namespace: namespace, - DeploymentName: deploymentName, - BuildId: buildID, - }) - if err != nil { - return fmt.Errorf("failed to set version %s as current for deployment %s: %w", buildID, deploymentName, err) - } + // Retry indefinitely until ctx is done + backoff := 1 * time.Second + for { + _, err := c.WorkflowService().SetWorkerDeploymentCurrentVersion(ctx, &workflowservice.SetWorkerDeploymentCurrentVersionRequest{ + Namespace: namespace, + DeploymentName: deploymentName, + BuildId: buildID, + }) + if err == nil { + return nil + } - return nil + // Wait for backoff or exit if context is done + select { + case <-ctx.Done(): + // Return the last observed error to preserve original cause (e.g., "Not enough hosts...") + return fmt.Errorf("failed to set version %s as current for deployment %s: %w", buildID, deploymentName, err) + case <-time.After(backoff): + } + + // Exponential backoff capped at 30s + if backoff < 30*time.Second { + backoff *= 2 + if backoff > 30*time.Second { + backoff = 30 * time.Second + } + } + } } // bumpVersion increases the version, starts a new worker with the new build ID, and sets it as current. From e6a885b2630dfecd5121a13a652be8a3b406ff24 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Sun, 9 Nov 2025 12:13:06 -0800 Subject: [PATCH 17/66] Update versioning_pinned_workflows.go Signed-off-by: Stephan Behnke --- scenarios/versioning_pinned_workflows.go | 34 ++++++++++++++++++------ 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/scenarios/versioning_pinned_workflows.go b/scenarios/versioning_pinned_workflows.go index 3f201dcd..a8995cfc 100644 --- a/scenarios/versioning_pinned_workflows.go +++ b/scenarios/versioning_pinned_workflows.go @@ -436,14 +436,32 @@ func (e *versioningPinnedExecutor) bumpVersion(ctx context.Context, info loadgen // Wait for worker to be ready time.Sleep(1 * time.Second) - // Set the new version as the current deployment version - _, err = info.Client.WorkflowService().SetWorkerDeploymentCurrentVersion(ctx, &workflowservice.SetWorkerDeploymentCurrentVersionRequest{ - Namespace: info.Namespace, - DeploymentName: deploymentName, - BuildId: newVersion, - }) - if err != nil { - return fmt.Errorf("failed to set version %s as current: %w", newVersion, err) + // Retry indefinitely until ctx is done when setting the new current version + backoff := 1 * time.Second + for { + _, err = info.Client.WorkflowService().SetWorkerDeploymentCurrentVersion(ctx, &workflowservice.SetWorkerDeploymentCurrentVersionRequest{ + Namespace: info.Namespace, + DeploymentName: deploymentName, + BuildId: newVersion, + }) + if err == nil { + break + } + + // Wait for backoff or exit if context is done + select { + case <-ctx.Done(): + return fmt.Errorf("failed to set version %s as current: %w", newVersion, err) + case <-time.After(backoff): + } + + // Exponential backoff capped at 30s + if backoff < 30*time.Second { + backoff *= 2 + if backoff > 30*time.Second { + backoff = 30 * time.Second + } + } } e.lock.Lock() From c71259d9468d51af35a8926588a856ea5db7605f Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Sun, 9 Nov 2025 12:21:01 -0800 Subject: [PATCH 18/66] Update versioning_pinned_workflows.go Signed-off-by: Stephan Behnke --- scenarios/versioning_pinned_workflows.go | 147 +++++++++++++---------- 1 file changed, 85 insertions(+), 62 deletions(-) diff --git a/scenarios/versioning_pinned_workflows.go b/scenarios/versioning_pinned_workflows.go index a8995cfc..f751b5ed 100644 --- a/scenarios/versioning_pinned_workflows.go +++ b/scenarios/versioning_pinned_workflows.go @@ -27,6 +27,8 @@ import ( "github.com/temporalio/omes/loadgen/kitchensink" commonpb "go.temporal.io/api/common/v1" "go.temporal.io/api/enums/v1" + historypb "go.temporal.io/api/history/v1" + "go.temporal.io/api/serviceerror" "go.temporal.io/api/workflowservice/v1" "go.temporal.io/sdk/activity" "go.temporal.io/sdk/client" @@ -34,6 +36,32 @@ import ( "go.temporal.io/sdk/workflow" ) +// retryUntilCtx retries the given function until it reports done or the context is done. +// Backoff starts at 1s and is capped at 10s. +func retryUntilCtx(ctx context.Context, fn func(context.Context) (bool, error)) error { + backoff := 1 * time.Second + for { + done, err := fn(ctx) + if done { + return err + } + select { + case <-ctx.Done(): + if err != nil { + return err + } + return ctx.Err() + case <-time.After(backoff): + } + if backoff < 10*time.Second { + backoff *= 2 + if backoff > 10*time.Second { + backoff = 10 * time.Second + } + } + } +} + const ( // NumWorkflowsFlag controls how many workflows to start on iteration 0 NumWorkflowsFlag = "num-workflows" @@ -182,8 +210,13 @@ func (e *versioningPinnedExecutor) startWorker(ctx context.Context, info loadgen w.RegisterWorkflowWithOptions(simpleKitchenSinkWorkflow, workflow.RegisterOptions{Name: "kitchenSink"}) w.RegisterActivityWithOptions(noopActivity, activity.RegisterOptions{Name: "noop"}) - // Start the worker - if err := w.Start(); err != nil { + // Start the worker with retry until context done + if err := retryUntilCtx(ctx, func(ctx context.Context) (bool, error) { + if err := w.Start(); err != nil { + return false, err + } + return true, nil + }); err != nil { return nil, fmt.Errorf("failed to start worker with build ID %s: %w", buildID, err) } @@ -341,14 +374,24 @@ func (e *versioningPinnedExecutor) startWorkflows(ctx context.Context, info load }, } - _, err := info.Client.ExecuteWorkflow( - ctx, - options, - "kitchenSink", - testInput.WorkflowInput, - ) - if err != nil { - errChan <- fmt.Errorf("failed to start workflow %s: %w", workflowID, err) + var startErr error + if err := retryUntilCtx(ctx, func(ctx context.Context) (bool, error) { + _, startErr = info.Client.ExecuteWorkflow( + ctx, + options, + "kitchenSink", + testInput.WorkflowInput, + ) + if startErr == nil { + return true, nil + } + // Treat AlreadyStarted as success for idempotency + if _, ok := startErr.(*serviceerror.WorkflowExecutionAlreadyStarted); ok { + return true, nil + } + return false, startErr + }); err != nil { + errChan <- fmt.Errorf("failed to start workflow %s: %w", workflowID, startErr) return } @@ -376,34 +419,17 @@ func (e *versioningPinnedExecutor) startWorkflows(ctx context.Context, info load // setupVersioning configures the worker versioning for the deployment using Worker Deployment APIs. func (e *versioningPinnedExecutor) setupVersioning(ctx context.Context, c client.Client, namespace, deploymentName, buildID string) error { - // Retry indefinitely until ctx is done - backoff := 1 * time.Second - for { + if err := retryUntilCtx(ctx, func(ctx context.Context) (bool, error) { _, err := c.WorkflowService().SetWorkerDeploymentCurrentVersion(ctx, &workflowservice.SetWorkerDeploymentCurrentVersionRequest{ Namespace: namespace, DeploymentName: deploymentName, BuildId: buildID, }) - if err == nil { - return nil - } - - // Wait for backoff or exit if context is done - select { - case <-ctx.Done(): - // Return the last observed error to preserve original cause (e.g., "Not enough hosts...") - return fmt.Errorf("failed to set version %s as current for deployment %s: %w", buildID, deploymentName, err) - case <-time.After(backoff): - } - - // Exponential backoff capped at 30s - if backoff < 30*time.Second { - backoff *= 2 - if backoff > 30*time.Second { - backoff = 30 * time.Second - } - } + return err == nil, err + }); err != nil { + return fmt.Errorf("failed to set version %s as current for deployment %s: %w", buildID, deploymentName, err) } + return nil } // bumpVersion increases the version, starts a new worker with the new build ID, and sets it as current. @@ -437,31 +463,16 @@ func (e *versioningPinnedExecutor) bumpVersion(ctx context.Context, info loadgen time.Sleep(1 * time.Second) // Retry indefinitely until ctx is done when setting the new current version - backoff := 1 * time.Second - for { + // Set the new version as the current deployment version (retry until ctx done) + if err := retryUntilCtx(ctx, func(ctx context.Context) (bool, error) { _, err = info.Client.WorkflowService().SetWorkerDeploymentCurrentVersion(ctx, &workflowservice.SetWorkerDeploymentCurrentVersionRequest{ Namespace: info.Namespace, DeploymentName: deploymentName, BuildId: newVersion, }) - if err == nil { - break - } - - // Wait for backoff or exit if context is done - select { - case <-ctx.Done(): - return fmt.Errorf("failed to set version %s as current: %w", newVersion, err) - case <-time.After(backoff): - } - - // Exponential backoff capped at 30s - if backoff < 30*time.Second { - backoff *= 2 - if backoff > 30*time.Second { - backoff = 30 * time.Second - } - } + return err == nil, err + }); err != nil { + return fmt.Errorf("failed to set version %s as current: %w", newVersion, err) } e.lock.Lock() @@ -563,15 +574,20 @@ func (e *versioningPinnedExecutor) Verify(ctx context.Context, info loadgen.Scen func (e *versioningPinnedExecutor) checkWorkflowHistory(ctx context.Context, info loadgen.ScenarioInfo, workflowID string) []error { var errors []error - // Get workflow execution description to access versioning info - describeResp, err := info.Client.WorkflowService().DescribeWorkflowExecution(ctx, &workflowservice.DescribeWorkflowExecutionRequest{ - Namespace: info.Namespace, - Execution: &commonpb.WorkflowExecution{ - WorkflowId: workflowID, - }, + // Get workflow execution description to access versioning info (with retry) + var describeResp *workflowservice.DescribeWorkflowExecutionResponse + var derr error + _ = retryUntilCtx(ctx, func(ctx context.Context) (bool, error) { + describeResp, derr = info.Client.WorkflowService().DescribeWorkflowExecution(ctx, &workflowservice.DescribeWorkflowExecutionRequest{ + Namespace: info.Namespace, + Execution: &commonpb.WorkflowExecution{ + WorkflowId: workflowID, + }, + }) + return derr == nil, derr }) - if err != nil { - errors = append(errors, fmt.Errorf("workflow %s: failed to describe execution: %w", workflowID, err)) + if derr != nil { + errors = append(errors, fmt.Errorf("workflow %s: failed to describe execution: %w", workflowID, derr)) return errors } @@ -605,8 +621,15 @@ func (e *versioningPinnedExecutor) checkWorkflowHistory(ctx context.Context, inf // Iterate through history events to track build ID progression // Use Started events (not deprecated) instead of Completed events for historyIter.HasNext() { - event, err := historyIter.Next() - if err != nil { + var event *historypb.HistoryEvent + if err := retryUntilCtx(ctx, func(ctx context.Context) (bool, error) { + var err error + event, err = historyIter.Next() + if err != nil { + return false, err + } + return true, nil + }); err != nil { errors = append(errors, fmt.Errorf("workflow %s: failed to read history: %w", workflowID, err)) return errors } From 6eac4e87a2e44217019f4954bd317078b3fa9807 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Sun, 9 Nov 2025 12:52:25 -0800 Subject: [PATCH 19/66] backoff Signed-off-by: Stephan Behnke --- loadgen/retry.go | 39 ++++++++++++++++++++ loadgen/scenario.go | 45 +++++++++++++----------- scenarios/versioning_pinned_workflows.go | 36 ++++--------------- 3 files changed, 70 insertions(+), 50 deletions(-) create mode 100644 loadgen/retry.go diff --git a/loadgen/retry.go b/loadgen/retry.go new file mode 100644 index 00000000..a64d8211 --- /dev/null +++ b/loadgen/retry.go @@ -0,0 +1,39 @@ +package loadgen + +import ( + "context" + "time" +) + +// RetryUntilCtx repeatedly invokes fn until it reports completion or the context is done. +// - fn should return (true, nil) when the operation has succeeded and no further retries are needed. +// - If fn returns (true, err), the retry loop stops and err is returned. +// - If fn returns (false, err), the function will be retried after a backoff delay. +// Backoff starts at 1s and doubles each time up to a maximum of 10s. +// If the context is canceled or its deadline expires, the last non-nil error from fn is returned if present; +// otherwise, the context error is returned. +func RetryUntilCtx(ctx context.Context, fn func(context.Context) (bool, error)) error { + backoff := 1 * time.Second + for { + done, err := fn(ctx) + if done { + return err + } + + select { + case <-ctx.Done(): + if err != nil { + return err + } + return ctx.Err() + case <-time.After(backoff): + } + + if backoff < 10*time.Second { + backoff *= 2 + if backoff > 10*time.Second { + backoff = 10 * time.Second + } + } + } +} diff --git a/loadgen/scenario.go b/loadgen/scenario.go index d99e5338..43e8d866 100644 --- a/loadgen/scenario.go +++ b/loadgen/scenario.go @@ -302,36 +302,39 @@ func (s *ScenarioInfo) NewRun(iteration int) *Run { func (s *ScenarioInfo) RegisterDefaultSearchAttributes(ctx context.Context) error { if s.Client == nil { - // No client in some unit tests. Ideally this would be mocked but no mock operator service - // client is readily available. return nil } - // Ensure custom search attributes are registered that many scenarios rely on - _, err := s.Client.OperatorService().AddSearchAttributes(ctx, &operatorservice.AddSearchAttributesRequest{ - SearchAttributes: map[string]enums.IndexedValueType{ - "KS_Int": enums.INDEXED_VALUE_TYPE_INT, - "KS_Keyword": enums.INDEXED_VALUE_TYPE_KEYWORD, - OmesExecutionIDSearchAttribute: enums.INDEXED_VALUE_TYPE_KEYWORD, - }, - Namespace: s.Namespace, - }) - // Throw an error if the attributes could not be registered, but ignore already exists errs + + attrs := map[string]enums.IndexedValueType{ + "KS_Int": enums.INDEXED_VALUE_TYPE_INT, + "KS_Keyword": enums.INDEXED_VALUE_TYPE_KEYWORD, + OmesExecutionIDSearchAttribute: enums.INDEXED_VALUE_TYPE_KEYWORD, + } + alreadyExistsStrings := []string{ "already exists", "attributes mapping unavailble", } - if err != nil { - isAlreadyExistsErr := false - for _, s := range alreadyExistsStrings { - if strings.Contains(err.Error(), s) { - isAlreadyExistsErr = true - break - } + + var lastErr error + if err := RetryUntilCtx(ctx, func(ctx context.Context) (bool, error) { + _, lastErr = s.Client.OperatorService().AddSearchAttributes(ctx, &operatorservice.AddSearchAttributesRequest{ + SearchAttributes: attrs, + Namespace: s.Namespace, + }) + if lastErr == nil { + return true, nil } - if !isAlreadyExistsErr { - return fmt.Errorf("failed to register search attributes: %w", err) + for _, substr := range alreadyExistsStrings { + if strings.Contains(lastErr.Error(), substr) { + return true, nil + } } + return false, lastErr + }); err != nil { + return fmt.Errorf("failed to register search attributes: %w", err) } + return nil } diff --git a/scenarios/versioning_pinned_workflows.go b/scenarios/versioning_pinned_workflows.go index f751b5ed..a96650b3 100644 --- a/scenarios/versioning_pinned_workflows.go +++ b/scenarios/versioning_pinned_workflows.go @@ -38,29 +38,7 @@ import ( // retryUntilCtx retries the given function until it reports done or the context is done. // Backoff starts at 1s and is capped at 10s. -func retryUntilCtx(ctx context.Context, fn func(context.Context) (bool, error)) error { - backoff := 1 * time.Second - for { - done, err := fn(ctx) - if done { - return err - } - select { - case <-ctx.Done(): - if err != nil { - return err - } - return ctx.Err() - case <-time.After(backoff): - } - if backoff < 10*time.Second { - backoff *= 2 - if backoff > 10*time.Second { - backoff = 10 * time.Second - } - } - } -} +// Using loadgen.RetryUntilCtx (removed local helper) const ( // NumWorkflowsFlag controls how many workflows to start on iteration 0 @@ -211,7 +189,7 @@ func (e *versioningPinnedExecutor) startWorker(ctx context.Context, info loadgen w.RegisterActivityWithOptions(noopActivity, activity.RegisterOptions{Name: "noop"}) // Start the worker with retry until context done - if err := retryUntilCtx(ctx, func(ctx context.Context) (bool, error) { + if err := loadgen.RetryUntilCtx(ctx, func(ctx context.Context) (bool, error) { if err := w.Start(); err != nil { return false, err } @@ -375,7 +353,7 @@ func (e *versioningPinnedExecutor) startWorkflows(ctx context.Context, info load } var startErr error - if err := retryUntilCtx(ctx, func(ctx context.Context) (bool, error) { + if err := loadgen.RetryUntilCtx(ctx, func(ctx context.Context) (bool, error) { _, startErr = info.Client.ExecuteWorkflow( ctx, options, @@ -419,7 +397,7 @@ func (e *versioningPinnedExecutor) startWorkflows(ctx context.Context, info load // setupVersioning configures the worker versioning for the deployment using Worker Deployment APIs. func (e *versioningPinnedExecutor) setupVersioning(ctx context.Context, c client.Client, namespace, deploymentName, buildID string) error { - if err := retryUntilCtx(ctx, func(ctx context.Context) (bool, error) { + if err := loadgen.RetryUntilCtx(ctx, func(ctx context.Context) (bool, error) { _, err := c.WorkflowService().SetWorkerDeploymentCurrentVersion(ctx, &workflowservice.SetWorkerDeploymentCurrentVersionRequest{ Namespace: namespace, DeploymentName: deploymentName, @@ -464,7 +442,7 @@ func (e *versioningPinnedExecutor) bumpVersion(ctx context.Context, info loadgen // Retry indefinitely until ctx is done when setting the new current version // Set the new version as the current deployment version (retry until ctx done) - if err := retryUntilCtx(ctx, func(ctx context.Context) (bool, error) { + if err := loadgen.RetryUntilCtx(ctx, func(ctx context.Context) (bool, error) { _, err = info.Client.WorkflowService().SetWorkerDeploymentCurrentVersion(ctx, &workflowservice.SetWorkerDeploymentCurrentVersionRequest{ Namespace: info.Namespace, DeploymentName: deploymentName, @@ -577,7 +555,7 @@ func (e *versioningPinnedExecutor) checkWorkflowHistory(ctx context.Context, inf // Get workflow execution description to access versioning info (with retry) var describeResp *workflowservice.DescribeWorkflowExecutionResponse var derr error - _ = retryUntilCtx(ctx, func(ctx context.Context) (bool, error) { + _ = loadgen.RetryUntilCtx(ctx, func(ctx context.Context) (bool, error) { describeResp, derr = info.Client.WorkflowService().DescribeWorkflowExecution(ctx, &workflowservice.DescribeWorkflowExecutionRequest{ Namespace: info.Namespace, Execution: &commonpb.WorkflowExecution{ @@ -622,7 +600,7 @@ func (e *versioningPinnedExecutor) checkWorkflowHistory(ctx context.Context, inf // Use Started events (not deprecated) instead of Completed events for historyIter.HasNext() { var event *historypb.HistoryEvent - if err := retryUntilCtx(ctx, func(ctx context.Context) (bool, error) { + if err := loadgen.RetryUntilCtx(ctx, func(ctx context.Context) (bool, error) { var err error event, err = historyIter.Next() if err != nil { From bdaacd24f82b9bc870c71764f1539f6bb056e029 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Mon, 10 Nov 2025 11:24:43 -0800 Subject: [PATCH 20/66] retry full checks Signed-off-by: Stephan Behnke --- loadgen/helpers.go | 53 ++-------- loadgen/workflow_completion_checker.go | 140 ++++++++++++++++--------- 2 files changed, 99 insertions(+), 94 deletions(-) diff --git a/loadgen/helpers.go b/loadgen/helpers.go index 5becd126..25cfa575 100644 --- a/loadgen/helpers.go +++ b/loadgen/helpers.go @@ -5,7 +5,6 @@ import ( "errors" "fmt" "strings" - "time" "go.temporal.io/api/enums/v1" "go.temporal.io/api/operatorservice/v1" @@ -44,59 +43,23 @@ func InitSearchAttribute( return nil } -func MinVisibilityCountEventually( +func MinVisibilityCount( ctx context.Context, info ScenarioInfo, request *workflowservice.CountWorkflowExecutionsRequest, minCount int, ) error { - - countTicker := time.NewTicker(3 * time.Second) - defer countTicker.Stop() - - printTicker := time.NewTicker(30 * time.Second) - defer printTicker.Stop() - - var lastVisibilityCount int64 - - check := func() error { - visibilityCount, err := info.Client.CountWorkflow(ctx, request) - if err != nil { - return fmt.Errorf("failed to count workflows in visibility: %w", err) - } - lastVisibilityCount = visibilityCount.Count - return nil + visibilityCount, err := info.Client.CountWorkflow(ctx, request) + if err != nil { + return fmt.Errorf("failed to count workflows in visibility: %w", err) } - // Initial check - if err := check(); err != nil { - return err - } - if lastVisibilityCount >= int64(minCount) { - return nil + if visibilityCount.Count < int64(minCount) { + return fmt.Errorf("expected at least %d workflows in visibility, got %d", + minCount, visibilityCount.Count) } - for { - select { - case <-ctx.Done(): - // Context ended (deadline or cancellation). Return success only if min reached. - if lastVisibilityCount >= int64(minCount) { - return nil - } - return fmt.Errorf("expected at least %d workflows in visibility, got %d (context done)", - minCount, lastVisibilityCount) - case <-printTicker.C: - info.Logger.Infof("current visibility count: %d (expected at least: %d)", - lastVisibilityCount, minCount) - case <-countTicker.C: - if err := check(); err != nil { - return err - } - if lastVisibilityCount >= int64(minCount) { - return nil - } - } - } + return nil } // GetNonCompletedWorkflows queries and returns an error for each non-completed workflow. diff --git a/loadgen/workflow_completion_checker.go b/loadgen/workflow_completion_checker.go index ed1b968d..d3c967ba 100644 --- a/loadgen/workflow_completion_checker.go +++ b/loadgen/workflow_completion_checker.go @@ -61,9 +61,8 @@ func (wct *WorkflowCompletionVerifier) VerifyRun(ctx context.Context, info Scena } // Verify checks that the expected number of workflows have completed. +// It retries all checks until the context deadline is reached. func (wct *WorkflowCompletionVerifier) Verify(ctx context.Context, state ExecutorState) []error { - var allErrors []error - // Calculate expected workflow count expectedCount := state.CompletedIterations if wct.expectedWorkflowCount != nil { @@ -72,27 +71,30 @@ func (wct *WorkflowCompletionVerifier) Verify(ctx context.Context, state Executo // (1) Verify that we have completions at all. if expectedCount == 0 { - allErrors = append(allErrors, fmt.Errorf("no workflows completed")) - } else { - // (2) Verify that all completed workflows have indeed completed. + return []error{fmt.Errorf("no workflows completed")} + } - query := fmt.Sprintf( - "%s='%s' AND ExecutionStatus = 'Completed'", - OmesExecutionIDSearchAttribute, - wct.info.ExecutionID, - ) + // Setup retry loop + checkTicker := time.NewTicker(15 * time.Second) + defer checkTicker.Stop() - // Bound waits to the parent context's deadline; otherwise allow up to 24h. - var waitAtMost time.Duration - if dl, ok := ctx.Deadline(); ok { - waitAtMost = time.Until(dl) - if waitAtMost < 0 { - waitAtMost = 0 - } - } else { - waitAtMost = 24 * time.Hour - } - err := MinVisibilityCountEventually( + printTicker := time.NewTicker(30 * time.Second) + defer printTicker.Stop() + + query := fmt.Sprintf( + "%s='%s' AND ExecutionStatus = 'Completed'", + OmesExecutionIDSearchAttribute, + wct.info.ExecutionID, + ) + + var lastErrors []error + + // Function to perform all checks + performChecks := func() []error { + var allErrors []error + + // (2) Verify that all completed workflows have indeed completed. + err := MinVisibilityCount( ctx, wct.info, &workflowservice.CountWorkflowExecutionsRequest{ @@ -104,45 +106,85 @@ func (wct *WorkflowCompletionVerifier) Verify(ctx context.Context, state Executo if err != nil { allErrors = append(allErrors, err) } - } - // (3) Verify that all started workflows have completed. - nonCompletedErrs := GetNonCompletedWorkflows( - ctx, - wct.info, - OmesExecutionIDSearchAttribute, - wct.info.ExecutionID, - 10, - ) - allErrors = append(allErrors, nonCompletedErrs...) + // (3) Verify that all started workflows have completed. + nonCompletedErrs := GetNonCompletedWorkflows( + ctx, + wct.info, + OmesExecutionIDSearchAttribute, + wct.info.ExecutionID, + 10, + ) + allErrors = append(allErrors, nonCompletedErrs...) + + return allErrors + } - return allErrors + for { + select { + case <-ctx.Done(): + // Context ended (deadline or cancellation). Return last errors. + return lastErrors + case <-printTicker.C: + wct.info.Logger.Infof("verification still has %d error(s), retrying until deadline...", len(lastErrors)) + case <-checkTicker.C: + lastErrors = performChecks() + if len(lastErrors) == 0 { + return nil + } + } + } } // TODO: remove this // VerifyNoRunningWorkflows waits until there are no running workflows on the task queue for the given run ID. // This is useful for scenarios that want to ensure all started workflows have completed. +// It retries the check until the context deadline is reached. func (wct *WorkflowCompletionVerifier) VerifyNoRunningWorkflows(ctx context.Context) error { query := fmt.Sprintf("TaskQueue = %q and ExecutionStatus = 'Running'", TaskQueueForRun(wct.info.RunID)) - // Bound waits to the parent context's deadline; otherwise allow up to 24h. - var waitAtMost time.Duration - if dl, ok := ctx.Deadline(); ok { - waitAtMost = time.Until(dl) - if waitAtMost < 0 { - waitAtMost = 0 + // Setup retry loop + checkTicker := time.NewTicker(3 * time.Second) + defer checkTicker.Stop() + + printTicker := time.NewTicker(30 * time.Second) + defer printTicker.Stop() + + var lastError error + + // Function to perform check + performCheck := func() error { + return MinVisibilityCount( + ctx, + wct.info, + &workflowservice.CountWorkflowExecutionsRequest{ + Namespace: wct.info.Namespace, + Query: query, + }, + 0, + ) + } + + // Initial check + lastError = performCheck() + if lastError == nil { + return nil + } + + // Retry loop until context deadline + for { + select { + case <-ctx.Done(): + // Context ended (deadline or cancellation). Return last error. + return lastError + case <-printTicker.C: + wct.info.Logger.Infof("still waiting for running workflows to complete, retrying until deadline...") + case <-checkTicker.C: + lastError = performCheck() + if lastError == nil { + return nil + } } - } else { - waitAtMost = 24 * time.Hour } - return MinVisibilityCountEventually( - ctx, - wct.info, - &workflowservice.CountWorkflowExecutionsRequest{ - Namespace: wct.info.Namespace, - Query: query, - }, - 0, - ) } From f418d11cb4a6de8860ed16ed4bf59435dbbdae62 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Mon, 10 Nov 2025 12:00:18 -0800 Subject: [PATCH 21/66] fix Signed-off-by: Stephan Behnke --- loadgen/workflow_completion_checker.go | 14 ++++++++++++++ scenarios/workflow_completion_checker_test.go | 5 ++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/loadgen/workflow_completion_checker.go b/loadgen/workflow_completion_checker.go index d3c967ba..351deeb4 100644 --- a/loadgen/workflow_completion_checker.go +++ b/loadgen/workflow_completion_checker.go @@ -120,6 +120,12 @@ func (wct *WorkflowCompletionVerifier) Verify(ctx context.Context, state Executo return allErrors } + // Initial check + lastErrors = performChecks() + if len(lastErrors) == 0 { + return nil + } + for { select { case <-ctx.Done(): @@ -128,6 +134,10 @@ func (wct *WorkflowCompletionVerifier) Verify(ctx context.Context, state Executo case <-printTicker.C: wct.info.Logger.Infof("verification still has %d error(s), retrying until deadline...", len(lastErrors)) case <-checkTicker.C: + // Don't perform checks if context is already done + if ctx.Err() != nil { + return lastErrors + } lastErrors = performChecks() if len(lastErrors) == 0 { return nil @@ -181,6 +191,10 @@ func (wct *WorkflowCompletionVerifier) VerifyNoRunningWorkflows(ctx context.Cont case <-printTicker.C: wct.info.Logger.Infof("still waiting for running workflows to complete, retrying until deadline...") case <-checkTicker.C: + // Don't perform check if context is already done + if ctx.Err() != nil { + return lastError + } lastError = performCheck() if lastError == nil { return nil diff --git a/scenarios/workflow_completion_checker_test.go b/scenarios/workflow_completion_checker_test.go index dbc0d168..fe9aef35 100644 --- a/scenarios/workflow_completion_checker_test.go +++ b/scenarios/workflow_completion_checker_test.go @@ -92,7 +92,10 @@ func TestWorkflowCompletionChecker(t *testing.T) { require.Equal(t, 4, execState.CompletedIterations, "should complete 4 iterations") // Verify using the verifier - pass the state directly - verifyErrs := verifier.VerifyRun(t.Context(), scenarioInfo, execState) + // Use a timeout that allows for visibility to catch up and retries to occur + verifyCtx, cancel := context.WithTimeout(t.Context(), 10*time.Second) + defer cancel() + verifyErrs := verifier.VerifyRun(verifyCtx, scenarioInfo, execState) require.NotEmpty(t, verifyErrs) require.Contains(t, verifyErrs[0].Error(), "non-completed workflow: WorkflowID=w-stuck-") } From fe0995b17d0f8a86ca46cd54b51e1bdfc2063a89 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Tue, 11 Nov 2025 13:39:11 -0800 Subject: [PATCH 22/66] Update throughput_stress.go Signed-off-by: Stephan Behnke --- scenarios/throughput_stress.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scenarios/throughput_stress.go b/scenarios/throughput_stress.go index 3f2bb343..9340a0a9 100644 --- a/scenarios/throughput_stress.go +++ b/scenarios/throughput_stress.go @@ -477,6 +477,12 @@ func (t *tpsExecutor) createActionsChunk( }, }), }, + SearchAttributes: map[string]*common.Payload{ + loadgen.OmesExecutionIDSearchAttribute: &common.Payload{ + Metadata: map[string][]byte{"encoding": []byte("json/plain"), "type": []byte("Keyword")}, + Data: []byte(fmt.Sprintf("%q", run.ExecutionID)), // quoted to be valid JSON string + }, + }, }, }, }) From 657ecffffd3c4f45b0b612f2409b482d946794a2 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Thu, 13 Nov 2025 11:55:29 -0800 Subject: [PATCH 23/66] not needed Signed-off-by: Stephan Behnke --- scenarios/throughput_stress.go | 6 ------ 1 file changed, 6 deletions(-) diff --git a/scenarios/throughput_stress.go b/scenarios/throughput_stress.go index 9340a0a9..3f2bb343 100644 --- a/scenarios/throughput_stress.go +++ b/scenarios/throughput_stress.go @@ -477,12 +477,6 @@ func (t *tpsExecutor) createActionsChunk( }, }), }, - SearchAttributes: map[string]*common.Payload{ - loadgen.OmesExecutionIDSearchAttribute: &common.Payload{ - Metadata: map[string][]byte{"encoding": []byte("json/plain"), "type": []byte("Keyword")}, - Data: []byte(fmt.Sprintf("%q", run.ExecutionID)), // quoted to be valid JSON string - }, - }, }, }, }) From 556eec5943bd6fb5cdafe7629ff677c58a5dc3ba Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Thu, 13 Nov 2025 15:36:04 -0800 Subject: [PATCH 24/66] Update generic_executor.go Signed-off-by: Stephan Behnke --- loadgen/generic_executor.go | 1 + 1 file changed, 1 insertion(+) diff --git a/loadgen/generic_executor.go b/loadgen/generic_executor.go index c0685feb..afe9a06a 100644 --- a/loadgen/generic_executor.go +++ b/loadgen/generic_executor.go @@ -205,6 +205,7 @@ func (g *genericRun) Run(ctx context.Context) error { case doneCh <- err: if err == nil && !isSkipIteration { g.executor.RecordCompletion() + fmt.Printf("✅ Workflow completed: iteration %d\n", run.Iteration) if g.config.OnCompletion != nil { g.config.OnCompletion(ctx, run) } From 7df03fb5fdff0f7640327fe7caa707ef6a8db7be Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Thu, 13 Nov 2025 15:39:01 -0800 Subject: [PATCH 25/66] Update generic_executor.go Signed-off-by: Stephan Behnke --- loadgen/generic_executor.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loadgen/generic_executor.go b/loadgen/generic_executor.go index afe9a06a..54f72796 100644 --- a/loadgen/generic_executor.go +++ b/loadgen/generic_executor.go @@ -205,7 +205,7 @@ func (g *genericRun) Run(ctx context.Context) error { case doneCh <- err: if err == nil && !isSkipIteration { g.executor.RecordCompletion() - fmt.Printf("✅ Workflow completed: iteration %d\n", run.Iteration) + g.logger.Debugf("✅ Workflow completed: iteration %v", run.Iteration) if g.config.OnCompletion != nil { g.config.OnCompletion(ctx, run) } From a22b226e90b0d9229f3857e96591c46b06f7c29b Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Fri, 14 Nov 2025 09:47:34 -0800 Subject: [PATCH 26/66] rename Signed-off-by: Stephan Behnke --- ...flow_completion_checker.go => workflow_completion_verifier.go} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename loadgen/{workflow_completion_checker.go => workflow_completion_verifier.go} (100%) diff --git a/loadgen/workflow_completion_checker.go b/loadgen/workflow_completion_verifier.go similarity index 100% rename from loadgen/workflow_completion_checker.go rename to loadgen/workflow_completion_verifier.go From 90df150d3ff10b97b6731e7c29a9a33c664359b5 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Fri, 14 Nov 2025 09:49:05 -0800 Subject: [PATCH 27/66] Update workflow_completion_verifier.go Signed-off-by: Stephan Behnke --- loadgen/workflow_completion_verifier.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loadgen/workflow_completion_verifier.go b/loadgen/workflow_completion_verifier.go index 351deeb4..af44202e 100644 --- a/loadgen/workflow_completion_verifier.go +++ b/loadgen/workflow_completion_verifier.go @@ -132,7 +132,7 @@ func (wct *WorkflowCompletionVerifier) Verify(ctx context.Context, state Executo // Context ended (deadline or cancellation). Return last errors. return lastErrors case <-printTicker.C: - wct.info.Logger.Infof("verification still has %d error(s), retrying until deadline...", len(lastErrors)) + wct.info.Logger.Infof("verification still has error(s), retrying until deadline: %v", lastErrors) case <-checkTicker.C: // Don't perform checks if context is already done if ctx.Err() != nil { From 37a704cdd02d00fbc4985bafce2229b58adf4735 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Fri, 14 Nov 2025 16:52:06 -0800 Subject: [PATCH 28/66] kitchensink Signed-off-by: Stephan Behnke --- loadgen/ebbandflow/ebb_and_flow.go | 19 ------ scenarios/ebb_and_flow.go | 38 ++++++++--- workers/go/ebbandflow/activities.go | 31 --------- workers/go/ebbandflow/workflow.go | 87 -------------------------- workers/go/kitchensink/kitchen_sink.go | 4 +- workers/go/worker/worker.go | 4 -- 6 files changed, 30 insertions(+), 153 deletions(-) delete mode 100644 loadgen/ebbandflow/ebb_and_flow.go delete mode 100644 workers/go/ebbandflow/activities.go delete mode 100644 workers/go/ebbandflow/workflow.go diff --git a/loadgen/ebbandflow/ebb_and_flow.go b/loadgen/ebbandflow/ebb_and_flow.go deleted file mode 100644 index 8122a41f..00000000 --- a/loadgen/ebbandflow/ebb_and_flow.go +++ /dev/null @@ -1,19 +0,0 @@ -package ebbandflow - -import ( - "time" - - "github.com/temporalio/omes/loadgen" -) - -type WorkflowParams struct { - SleepActivities *loadgen.SleepActivityConfig `json:"sleepActivities"` -} - -type WorkflowOutput struct { - Timings []ActivityTiming `json:"timings"` -} - -type ActivityTiming struct { - ScheduleToStart time.Duration `json:"d"` -} diff --git a/scenarios/ebb_and_flow.go b/scenarios/ebb_and_flow.go index 9c9a40b2..627a6ea5 100644 --- a/scenarios/ebb_and_flow.go +++ b/scenarios/ebb_and_flow.go @@ -10,7 +10,7 @@ import ( "time" "github.com/temporalio/omes/loadgen" - "github.com/temporalio/omes/loadgen/ebbandflow" + . "github.com/temporalio/omes/loadgen/kitchensink" ) const ( @@ -280,29 +280,47 @@ func (e *ebbAndFlowExecutor) spawnWorkflowWithActivities( Groups: template.Groups, } - // Start workflow. + // Sample activities from the configuration + rng := rand.New(rand.NewSource(time.Now().UnixNano())) + activities := config.Sample(rng) + + // Build actions for the kitchensink workflow + var actions []*Action + for _, activityAction := range activities { + actions = append(actions, &Action{ + Variant: &Action_ExecActivity{ + ExecActivity: activityAction, + }, + }) + } + + // Start workflow using kitchensink. run := e.NewRun(int(iteration)) options := run.DefaultStartWorkflowOptions() options.ID = fmt.Sprintf("%s-track-%d", e.id, iteration) options.WorkflowExecutionErrorWhenAlreadyStarted = false // TypedSearchAttributes are already set by DefaultStartWorkflowOptions() - workflowInput := &ebbandflow.WorkflowParams{ - SleepActivities: &config, + workflowInput := &WorkflowInput{ + InitialActions: []*ActionSet{ + { + Actions: actions, + Concurrent: true, + }, + }, } - // Start workflow to track activity timings. - wf, err := e.Client.ExecuteWorkflow(ctx, options, "ebbAndFlowTrack", workflowInput) + // Start workflow using kitchensink. + wf, err := e.Client.ExecuteWorkflow(ctx, options, "kitchenSink", workflowInput) if err != nil { - return fmt.Errorf("failed to start ebbAndFlowTrack workflow for iteration %d: %w", iteration, err) + return fmt.Errorf("failed to start kitchensink workflow for iteration %d: %w", iteration, err) } e.scheduledActivities.Add(activities) // Wait for workflow completion - var result ebbandflow.WorkflowOutput - err = wf.Get(ctx, &result) + err = wf.Get(ctx, nil) if err != nil { - e.Logger.Errorf("ebbAndFlowTrack workflow failed for iteration %d: %v", iteration, err) + e.Logger.Errorf("kitchensink workflow failed for iteration %d: %v", iteration, err) } e.completedActivities.Add(activities) e.incrementTotalCompletedWorkflow() diff --git a/workers/go/ebbandflow/activities.go b/workers/go/ebbandflow/activities.go deleted file mode 100644 index 5295bce5..00000000 --- a/workers/go/ebbandflow/activities.go +++ /dev/null @@ -1,31 +0,0 @@ -package ebbandflow - -import ( - "context" - "time" - - "github.com/temporalio/omes/loadgen/kitchensink" - "go.temporal.io/sdk/activity" -) - -type Activities struct{} - -type ActivityExecutionResult struct { - ScheduledTime time.Time `json:"scheduledTime"` - ActualStartTime time.Time `json:"actualStartTime"` -} - -func (a Activities) MeasureLatencyActivity( - ctx context.Context, - activityAction *kitchensink.ExecuteActivityAction, -) (ActivityExecutionResult, error) { - if delay := activityAction.GetDelay(); delay != nil { - time.Sleep(delay.AsDuration()) - } - - activityInfo := activity.GetInfo(ctx) - return ActivityExecutionResult{ - ScheduledTime: activityInfo.ScheduledTime, - ActualStartTime: activityInfo.StartedTime, - }, nil -} diff --git a/workers/go/ebbandflow/workflow.go b/workers/go/ebbandflow/workflow.go deleted file mode 100644 index fc67fe91..00000000 --- a/workers/go/ebbandflow/workflow.go +++ /dev/null @@ -1,87 +0,0 @@ -package ebbandflow - -import ( - "fmt" - "math/rand" - "sync" - "time" - - "github.com/temporalio/omes/loadgen/ebbandflow" - "github.com/temporalio/omes/workers/go/workflowutils" - "go.temporal.io/sdk/temporal" - "go.temporal.io/sdk/workflow" -) - -var activityStub = Activities{} - -// EbbAndFlowTrackWorkflow executes activities and returns their schedule-to-start times with fairness data -func EbbAndFlowTrackWorkflow(ctx workflow.Context, params *ebbandflow.WorkflowParams) (*ebbandflow.WorkflowOutput, error) { - rng := rand.New(rand.NewSource(workflow.Now(ctx).UnixNano())) - activities := params.SleepActivities.Sample(rng) - - if len(activities) == 0 { - return &ebbandflow.WorkflowOutput{Timings: []ebbandflow.ActivityTiming{}}, nil - } - - var results []ebbandflow.ActivityTiming - var resultsMutex sync.Mutex - - var activityFuncs []func(workflow.Context) error - for _, activity := range activities { - activityFuncs = append(activityFuncs, func(ctx workflow.Context) error { - // Set up activity options - opts := workflow.ActivityOptions{ - StartToCloseTimeout: 1 * time.Minute, - RetryPolicy: &temporal.RetryPolicy{}, - } - - // Set priority, if specified - if activity.Priority != nil { - opts.Priority.PriorityKey = int(activity.Priority.PriorityKey) - } - - // Set fairness, if specified - fairnessKey := activity.GetFairnessKey() - fairnessWeight := activity.GetFairnessWeight() - if fairnessKey != "" { - opts.Priority.FairnessKey = fairnessKey - opts.Priority.FairnessWeight = fairnessWeight - } - - // Execute activity - var activityResult ActivityExecutionResult - actCtx := workflow.WithActivityOptions(ctx, opts) - err := workflow.ExecuteActivity(actCtx, activityStub.MeasureLatencyActivity, activity).Get(ctx, &activityResult) - if err != nil { - workflow.GetLogger(ctx).Error("Activity execution failed", "error", err) - return err - } - - // Calculate schedule-to-start time using accurate activity timing - scheduleToStartMS := activityResult.ActualStartTime.Sub(activityResult.ScheduledTime) - - result := ebbandflow.ActivityTiming{ - ScheduleToStart: scheduleToStartMS, - } - - // Thread-safe append to results - resultsMutex.Lock() - results = append(results, result) - resultsMutex.Unlock() - - return nil - }) - } - - err := workflowutils.RunConcurrently(ctx, activityFuncs...) - if err != nil { - workflow.GetLogger(ctx).Error("Failed to execute activities concurrently", "error", err) - } - - // Check if all activities failed - if len(results) == 0 { - return nil, fmt.Errorf("failed to start any of the %d activities", len(activities)) - } - - return &ebbandflow.WorkflowOutput{Timings: results}, nil -} diff --git a/workers/go/kitchensink/kitchen_sink.go b/workers/go/kitchensink/kitchen_sink.go index f78244f4..186a28d8 100644 --- a/workers/go/kitchensink/kitchen_sink.go +++ b/workers/go/kitchensink/kitchen_sink.go @@ -386,10 +386,10 @@ func launchActivity(ctx workflow.Context, act *kitchensink.ExecuteActivityAction priority.PriorityKey = int(prio.PriorityKey) } if fk := act.GetFairnessKey(); fk != "" { - return fmt.Errorf("fairness key is not supported yet") + priority.FairnessKey = fk } if fw := act.GetFairnessWeight(); fw > 0 { - return fmt.Errorf("fairness weight is not supported yet") + priority.FairnessWeight = fw } opts := workflow.ActivityOptions{ diff --git a/workers/go/worker/worker.go b/workers/go/worker/worker.go index 81d2595a..4b53a558 100644 --- a/workers/go/worker/worker.go +++ b/workers/go/worker/worker.go @@ -6,7 +6,6 @@ import ( "github.com/nexus-rpc/sdk-go/nexus" "github.com/spf13/cobra" "github.com/temporalio/omes/cmd/clioptions" - "github.com/temporalio/omes/workers/go/ebbandflow" "github.com/temporalio/omes/workers/go/kitchensink" "go.temporal.io/sdk/activity" "go.temporal.io/sdk/client" @@ -68,7 +67,6 @@ func makePollerBehavior(simple, auto int) worker.PollerBehavior { func runWorkers(client client.Client, taskQueues []string, options clioptions.WorkerOptions) error { errCh := make(chan error, len(taskQueues)) - ebbFlowActivities := ebbandflow.Activities{} clientActivities := kitchensink.ClientActivities{ Client: client, } @@ -103,8 +101,6 @@ func runWorkers(client client.Client, taskQueues []string, options clioptions.Wo w.RegisterActivityWithOptions(clientActivities.ExecuteClientActivity, activity.RegisterOptions{Name: "client"}) w.RegisterWorkflow(kitchensink.EchoWorkflow) w.RegisterWorkflow(kitchensink.WaitForCancelWorkflow) - w.RegisterWorkflowWithOptions(ebbandflow.EbbAndFlowTrackWorkflow, workflow.RegisterOptions{Name: "ebbAndFlowTrack"}) - w.RegisterActivity(&ebbFlowActivities) w.RegisterNexusService(service) errCh <- w.Run(worker.InterruptCh()) }() From 4a802f3b29d0f288390893b665aba8ae99672a9b Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Fri, 14 Nov 2025 17:00:35 -0800 Subject: [PATCH 29/66] fairness Signed-off-by: Stephan Behnke --- .../dotnet/Temporalio.Omes/KitchenSinkWorkflow.cs | 14 ++++---------- .../io/temporal/omes/KitchenSinkWorkflowImpl.java | 4 ++-- workers/python/kitchen_sink.py | 10 ++++++++-- workers/typescript/src/workflows/kitchen_sink.ts | 12 +++++++++++- 4 files changed, 25 insertions(+), 15 deletions(-) diff --git a/workers/dotnet/Temporalio.Omes/KitchenSinkWorkflow.cs b/workers/dotnet/Temporalio.Omes/KitchenSinkWorkflow.cs index 5cf1286c..1b7c682d 100644 --- a/workers/dotnet/Temporalio.Omes/KitchenSinkWorkflow.cs +++ b/workers/dotnet/Temporalio.Omes/KitchenSinkWorkflow.cs @@ -412,17 +412,11 @@ private static Temporalio.Common.RetryPolicy RetryPolicyFromProto(RetryPolicy pr private static Temporalio.Common.Priority PriorityFromProto(ExecuteActivityAction eaa) { - if (eaa.FairnessKey != null) + return new Temporalio.Common.Priority { - throw new ApplicationFailureException("FairnessKey is not supported yet"); - } - if (eaa.FairnessWeight > 0) - { - throw new ApplicationFailureException("FairnessWeight is not supported yet"); - } - return new() - { - PriorityKey = eaa.Priority.PriorityKey + PriorityKey = eaa.Priority.PriorityKey, + FairnessKey = !string.IsNullOrEmpty(eaa.FairnessKey) ? eaa.FairnessKey : null, + FairnessWeight = eaa.FairnessWeight > 0 ? eaa.FairnessWeight : null, }; } diff --git a/workers/java/io/temporal/omes/KitchenSinkWorkflowImpl.java b/workers/java/io/temporal/omes/KitchenSinkWorkflowImpl.java index af30c1e9..808f0cc6 100644 --- a/workers/java/io/temporal/omes/KitchenSinkWorkflowImpl.java +++ b/workers/java/io/temporal/omes/KitchenSinkWorkflowImpl.java @@ -357,10 +357,10 @@ private void launchActivity(KitchenSink.ExecuteActivityAction executeActivity) { prio.setPriorityKey(priority.getPriorityKey()); } if (executeActivity.getFairnessKey() != "") { - throw new IllegalArgumentException("FairnessKey is not supported"); + prio.setFairnessKey(executeActivity.getFairnessKey()); } if (executeActivity.getFairnessWeight() > 0) { - throw new IllegalArgumentException("FairnessWeight is not supported"); + prio.setFairnessWeight(executeActivity.getFairnessWeight()); } if (executeActivity.hasIsLocal()) { diff --git a/workers/python/kitchen_sink.py b/workers/python/kitchen_sink.py index 56398a52..92638940 100644 --- a/workers/python/kitchen_sink.py +++ b/workers/python/kitchen_sink.py @@ -233,8 +233,13 @@ def launch_activity(execute_activity: ExecuteActivityAction) -> ActivityHandle: # TODO: cancel type can be in local ) else: - if execute_activity.HasField("priority"): - raise NotImplementedError("priority is not supported yet") + priority = None + if execute_activity.HasField("priority") or execute_activity.fairness_key or execute_activity.fairness_weight > 0: + priority = Priority( + priority_key=execute_activity.priority.priority_key if execute_activity.HasField("priority") else 0, + fairness_key=execute_activity.fairness_key if execute_activity.fairness_key else None, + fairness_weight=execute_activity.fairness_weight if execute_activity.fairness_weight > 0 else None, + ) activity_task = workflow.start_activity( activity=act_type, @@ -256,6 +261,7 @@ def launch_activity(execute_activity: ExecuteActivityAction) -> ActivityHandle: cancellation_type=convert_act_cancel_type( execute_activity.remote.cancellation_type ), + priority=priority, ) return activity_task diff --git a/workers/typescript/src/workflows/kitchen_sink.ts b/workers/typescript/src/workflows/kitchen_sink.ts index e55c6411..864d3957 100644 --- a/workers/typescript/src/workflows/kitchen_sink.ts +++ b/workers/typescript/src/workflows/kitchen_sink.ts @@ -290,12 +290,22 @@ function launchActivity(execActivity: IExecuteActivityAction): Promise actType = 'client'; args.push(execActivity.client); } + // Build priority object with fairness key and weight + let priority = decodePriority(execActivity.priority); + if (execActivity.fairnessKey || (execActivity.fairnessWeight && execActivity.fairnessWeight > 0)) { + priority = { + ...priority, + fairnessKey: execActivity.fairnessKey || undefined, + fairnessWeight: (execActivity.fairnessWeight && execActivity.fairnessWeight > 0) ? execActivity.fairnessWeight : undefined, + }; + } + const actArgs: ActivityOptions | LocalActivityOptions = { scheduleToCloseTimeout: durationConvertMaybeUndefined(execActivity.scheduleToCloseTimeout), startToCloseTimeout: durationConvertMaybeUndefined(execActivity.startToCloseTimeout), scheduleToStartTimeout: durationConvertMaybeUndefined(execActivity.scheduleToStartTimeout), retry: decompileRetryPolicy(execActivity.retryPolicy), - priority: decodePriority(execActivity.priority), + priority: priority, }; if (execActivity.isLocal) { From d59648efd2f7146af17f7b21ad6215fb0d76f891 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Fri, 14 Nov 2025 17:22:25 -0800 Subject: [PATCH 30/66] Update versions.env Signed-off-by: Stephan Behnke --- versions.env | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/versions.env b/versions.env index 04ff047c..91a863e9 100644 --- a/versions.env +++ b/versions.env @@ -14,4 +14,4 @@ DOTNET_SDK_VERSION=1.9.0 GO_SDK_VERSION=1.37.0 JAVA_SDK_VERSION=1.31.0 PYTHON_SDK_VERSION=1.19.0 -TYPESCRIPT_SDK_VERSION=1.12.1 +TYPESCRIPT_SDK_VERSION=1.13.2 From 307cf1bb73a39e602c7f86b4554c786fc3d9798d Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Fri, 14 Nov 2025 17:32:14 -0800 Subject: [PATCH 31/66] Update kitchen_sink.py Signed-off-by: Stephan Behnke --- workers/python/kitchen_sink.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/workers/python/kitchen_sink.py b/workers/python/kitchen_sink.py index 92638940..bdbf4a53 100644 --- a/workers/python/kitchen_sink.py +++ b/workers/python/kitchen_sink.py @@ -234,11 +234,21 @@ def launch_activity(execute_activity: ExecuteActivityAction) -> ActivityHandle: ) else: priority = None - if execute_activity.HasField("priority") or execute_activity.fairness_key or execute_activity.fairness_weight > 0: - priority = Priority( - priority_key=execute_activity.priority.priority_key if execute_activity.HasField("priority") else 0, - fairness_key=execute_activity.fairness_key if execute_activity.fairness_key else None, - fairness_weight=execute_activity.fairness_weight if execute_activity.fairness_weight > 0 else None, + if ( + execute_activity.HasField("priority") + or execute_activity.fairness_key + or execute_activity.fairness_weight > 0 + ): + priority = Priority( # type: ignore[call-arg] + priority_key=execute_activity.priority.priority_key + if execute_activity.HasField("priority") + else 0, + fairness_key=execute_activity.fairness_key # type: ignore[call-arg] + if execute_activity.fairness_key + else None, + fairness_weight=execute_activity.fairness_weight # type: ignore[call-arg] + if execute_activity.fairness_weight > 0 + else None, ) activity_task = workflow.start_activity( @@ -261,7 +271,7 @@ def launch_activity(execute_activity: ExecuteActivityAction) -> ActivityHandle: cancellation_type=convert_act_cancel_type( execute_activity.remote.cancellation_type ), - priority=priority, + priority=priority, # type: ignore[arg-type] ) return activity_task From 9944d84b6948c3ebf434357531856e3808275538 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Fri, 14 Nov 2025 17:35:30 -0800 Subject: [PATCH 32/66] Update kitchen_sink.py Signed-off-by: Stephan Behnke --- workers/python/kitchen_sink.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workers/python/kitchen_sink.py b/workers/python/kitchen_sink.py index bdbf4a53..15a66478 100644 --- a/workers/python/kitchen_sink.py +++ b/workers/python/kitchen_sink.py @@ -251,7 +251,7 @@ def launch_activity(execute_activity: ExecuteActivityAction) -> ActivityHandle: else None, ) - activity_task = workflow.start_activity( + activity_task = workflow.start_activity( # type: ignore[misc] activity=act_type, args=args, task_queue=execute_activity.task_queue, From c3f48804fa895e637f06d58e9bff718fbfe92995 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Fri, 14 Nov 2025 17:35:53 -0800 Subject: [PATCH 33/66] Update kitchen_sink.ts Signed-off-by: Stephan Behnke --- .../typescript/src/workflows/kitchen_sink.ts | 56 +++++++++++-------- 1 file changed, 33 insertions(+), 23 deletions(-) diff --git a/workers/typescript/src/workflows/kitchen_sink.ts b/workers/typescript/src/workflows/kitchen_sink.ts index 864d3957..26533705 100644 --- a/workers/typescript/src/workflows/kitchen_sink.ts +++ b/workers/typescript/src/workflows/kitchen_sink.ts @@ -290,31 +290,41 @@ function launchActivity(execActivity: IExecuteActivityAction): Promise actType = 'client'; args.push(execActivity.client); } - // Build priority object with fairness key and weight - let priority = decodePriority(execActivity.priority); - if (execActivity.fairnessKey || (execActivity.fairnessWeight && execActivity.fairnessWeight > 0)) { - priority = { - ...priority, - fairnessKey: execActivity.fairnessKey || undefined, - fairnessWeight: (execActivity.fairnessWeight && execActivity.fairnessWeight > 0) ? execActivity.fairnessWeight : undefined, - }; - } - - const actArgs: ActivityOptions | LocalActivityOptions = { - scheduleToCloseTimeout: durationConvertMaybeUndefined(execActivity.scheduleToCloseTimeout), - startToCloseTimeout: durationConvertMaybeUndefined(execActivity.startToCloseTimeout), - scheduleToStartTimeout: durationConvertMaybeUndefined(execActivity.scheduleToStartTimeout), - retry: decompileRetryPolicy(execActivity.retryPolicy), - priority: priority, - }; - if (execActivity.isLocal) { - return scheduleLocalActivity(actType, args, actArgs); + const localArgs: LocalActivityOptions = { + scheduleToCloseTimeout: durationConvertMaybeUndefined(execActivity.scheduleToCloseTimeout), + startToCloseTimeout: durationConvertMaybeUndefined(execActivity.startToCloseTimeout), + scheduleToStartTimeout: durationConvertMaybeUndefined(execActivity.scheduleToStartTimeout), + retry: decompileRetryPolicy(execActivity.retryPolicy), + }; + return scheduleLocalActivity(actType, args, localArgs); } else { - const remoteArgs = actArgs as ActivityOptions; - remoteArgs.taskQueue = execActivity.taskQueue ?? undefined; - remoteArgs.cancellationType = convertCancelType(execActivity.remote?.cancellationType); - remoteArgs.heartbeatTimeout = durationConvert(execActivity.heartbeatTimeout); + // Build priority object with fairness key and weight + let priority = decodePriority(execActivity.priority); + if ( + execActivity.fairnessKey || + (execActivity.fairnessWeight && execActivity.fairnessWeight > 0) + ) { + priority = { + ...priority, + fairnessKey: execActivity.fairnessKey || undefined, + fairnessWeight: + execActivity.fairnessWeight && execActivity.fairnessWeight > 0 + ? execActivity.fairnessWeight + : undefined, + }; + } + + const remoteArgs: ActivityOptions = { + scheduleToCloseTimeout: durationConvertMaybeUndefined(execActivity.scheduleToCloseTimeout), + startToCloseTimeout: durationConvertMaybeUndefined(execActivity.startToCloseTimeout), + scheduleToStartTimeout: durationConvertMaybeUndefined(execActivity.scheduleToStartTimeout), + retry: decompileRetryPolicy(execActivity.retryPolicy), + priority, + taskQueue: execActivity.taskQueue ?? undefined, + cancellationType: convertCancelType(execActivity.remote?.cancellationType), + heartbeatTimeout: durationConvert(execActivity.heartbeatTimeout), + }; return scheduleActivity(actType, args, remoteArgs); } } From 30a2eadff242b67d506463f9487bba2b0ae6c660 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Fri, 14 Nov 2025 19:56:30 -0800 Subject: [PATCH 34/66] install Signed-off-by: Stephan Behnke --- workers/typescript/package-lock.json | 129 +++++++++++++++++---------- workers/typescript/package.json | 8 +- 2 files changed, 85 insertions(+), 52 deletions(-) diff --git a/workers/typescript/package-lock.json b/workers/typescript/package-lock.json index b158d5fc..6f801d9e 100644 --- a/workers/typescript/package-lock.json +++ b/workers/typescript/package-lock.json @@ -8,10 +8,10 @@ "name": "omes", "version": "0.1.0", "dependencies": { - "@temporalio/activity": "^1.12.1", - "@temporalio/client": "^1.12.1", - "@temporalio/worker": "^1.12.1", - "@temporalio/workflow": "^1.12.1", + "@temporalio/activity": "^1.13.2", + "@temporalio/client": "^1.13.2", + "@temporalio/worker": "^1.13.2", + "@temporalio/workflow": "^1.13.2", "commander": "^11.1.0", "long": "^5.2.3", "winston": "^3.11.0" @@ -652,12 +652,13 @@ "integrity": "sha512-myfUej5naTBWnqOCc/MdVOLVjXUXtIA+NpDrDBKJtLLg2shUjBu3cZmB/85RyitKc55+lUUyl7oRfLOvkr2hsw==" }, "node_modules/@temporalio/activity": { - "version": "1.12.1", - "resolved": "https://registry.npmjs.org/@temporalio/activity/-/activity-1.12.1.tgz", - "integrity": "sha512-EPPIR5J0A6OxWTr5HGyeM2Lwh3US8S73N3ZFelCPaJwOq2Fh7qrLiwYp2wCwGYhhYI9Xppo3xE45MWUxayBa3Q==", + "version": "1.13.2", + "resolved": "https://registry.npmjs.org/@temporalio/activity/-/activity-1.13.2.tgz", + "integrity": "sha512-Mp0pAGNKGeIlZEy6ToLCt1gJdrumu64xHF1yAc1gsOVeqo4a3ISGFbCSpM56bokwtj9jpFK/Z1f3zCFnif2ogg==", "license": "MIT", "dependencies": { - "@temporalio/common": "1.12.1", + "@temporalio/client": "1.13.2", + "@temporalio/common": "1.13.2", "abort-controller": "^3.0.0" }, "engines": { @@ -665,31 +666,32 @@ } }, "node_modules/@temporalio/client": { - "version": "1.12.1", - "resolved": "https://registry.npmjs.org/@temporalio/client/-/client-1.12.1.tgz", - "integrity": "sha512-m89isGb6I4BBeCbhkvXbpjeRZZUa3E2R06J/I+t2JWgv0Tg+PoNPusvU9UBd6LN7f7AetsQvAZKU6eQHyWxSEA==", + "version": "1.13.2", + "resolved": "https://registry.npmjs.org/@temporalio/client/-/client-1.13.2.tgz", + "integrity": "sha512-gyptINv/i6DTG4sRgE6S10vsO6V56iQQujDFaVIwg5pcRsRqqHIwoOldI4j1RqrEoEy7J4prRBGNwOd5H3Yf8A==", "license": "MIT", "dependencies": { "@grpc/grpc-js": "^1.12.4", - "@temporalio/common": "1.12.1", - "@temporalio/proto": "1.12.1", + "@temporalio/common": "1.13.2", + "@temporalio/proto": "1.13.2", "abort-controller": "^3.0.0", "long": "^5.2.3", - "uuid": "^9.0.1" + "uuid": "^11.1.0" }, "engines": { "node": ">= 18.0.0" } }, "node_modules/@temporalio/common": { - "version": "1.12.1", - "resolved": "https://registry.npmjs.org/@temporalio/common/-/common-1.12.1.tgz", - "integrity": "sha512-gMVNYh49qGNFPKN22BPXtQlgvcD8rxUoP0QO0ePeaz9TyHG6+3TURGhc8xybJA7zHnpfW8TH8XHMWJIMzCPxtg==", + "version": "1.13.2", + "resolved": "https://registry.npmjs.org/@temporalio/common/-/common-1.13.2.tgz", + "integrity": "sha512-qpp/1Bn+Uvbnew3jHL5u1YWRfBmNnklzfZwa5oOnQ5EBxKMWmpGzCtvh+VwaGXunbPHh1Teqy76Mqp/Uj2kmbA==", "license": "MIT", "dependencies": { - "@temporalio/proto": "1.12.1", + "@temporalio/proto": "1.13.2", "long": "^5.2.3", - "ms": "^3.0.0-canary.1", + "ms": "3.0.0-canary.1", + "nexus-rpc": "^0.0.1", "proto3-json-serializer": "^2.0.0" }, "engines": { @@ -697,14 +699,14 @@ } }, "node_modules/@temporalio/core-bridge": { - "version": "1.12.1", - "resolved": "https://registry.npmjs.org/@temporalio/core-bridge/-/core-bridge-1.12.1.tgz", - "integrity": "sha512-JOLavcVhzLf4QDK7S/SAZjTbbtiYRoZoJCvJsl6T9s6MJFyeT1ih+4jeAN3UUmhLvaP++sqEuFSfRVJ0ZFoFNA==", + "version": "1.13.2", + "resolved": "https://registry.npmjs.org/@temporalio/core-bridge/-/core-bridge-1.13.2.tgz", + "integrity": "sha512-zwYZqeWypi1YHTeoYwBYgIVmWNg4+/T+CCcOwtyNUvA25wim85p9JOCB9tKgG4e8Hu1Nptd7yEjPaZtLPmJjjg==", "hasInstallScript": true, "license": "MIT", "dependencies": { "@grpc/grpc-js": "^1.12.4", - "@temporalio/common": "1.12.1", + "@temporalio/common": "1.13.2", "arg": "^5.0.2", "cargo-cp-artifact": "^0.1.8", "which": "^4.0.0" @@ -713,10 +715,26 @@ "node": ">= 18.0.0" } }, + "node_modules/@temporalio/nexus": { + "version": "1.13.2", + "resolved": "https://registry.npmjs.org/@temporalio/nexus/-/nexus-1.13.2.tgz", + "integrity": "sha512-oG+yZcgUiDCNU08aI7q5dKvRyeUtzJH7Woz66dx4QlhEIvRoUeEFqjLHySMf2r/3l1pbhZ5G2z12HcL4pVE5Eg==", + "license": "MIT", + "dependencies": { + "@temporalio/client": "1.13.2", + "@temporalio/common": "1.13.2", + "@temporalio/proto": "1.13.2", + "long": "^5.2.3", + "nexus-rpc": "^0.0.1" + }, + "engines": { + "node": ">= 18.0.0" + } + }, "node_modules/@temporalio/proto": { - "version": "1.12.1", - "resolved": "https://registry.npmjs.org/@temporalio/proto/-/proto-1.12.1.tgz", - "integrity": "sha512-hW5jvxBuoKdh3CwbGT/AQoPMFoGG8xcPcHRMCTta/HZGFHRDibbr0aDfPS6ke7oYtcpWF0A8d6jRAHEXyPUvUQ==", + "version": "1.13.2", + "resolved": "https://registry.npmjs.org/@temporalio/proto/-/proto-1.13.2.tgz", + "integrity": "sha512-V8agtFxM2KkKOtUjcCZFaIdOV64j86VrUQ4bvOZtzwmWGyp5ZCebskoaTTL8UMkRx4bTIeEKOckLrXo8VeorWg==", "license": "MIT", "dependencies": { "long": "^5.2.3", @@ -727,22 +745,25 @@ } }, "node_modules/@temporalio/worker": { - "version": "1.12.1", - "resolved": "https://registry.npmjs.org/@temporalio/worker/-/worker-1.12.1.tgz", - "integrity": "sha512-jI3UxPAVbuM2MJO0c27iNV59KNHgAlx6yoJOpcE+jdGAmoN52MHdSt3qedrWtWINgZDbZg9dPC8KoDbXr9kP6g==", + "version": "1.13.2", + "resolved": "https://registry.npmjs.org/@temporalio/worker/-/worker-1.13.2.tgz", + "integrity": "sha512-UEyHDjY/xJsTIg6DEwla6wncenOrmOGu13HnjwwqY2iUNJdoQUSHlqMK7Cc7hK0zpeAb7qLOCi2A1bSYVncAHg==", "license": "MIT", "dependencies": { "@grpc/grpc-js": "^1.12.4", "@swc/core": "^1.3.102", - "@temporalio/activity": "1.12.1", - "@temporalio/client": "1.12.1", - "@temporalio/common": "1.12.1", - "@temporalio/core-bridge": "1.12.1", - "@temporalio/proto": "1.12.1", - "@temporalio/workflow": "1.12.1", + "@temporalio/activity": "1.13.2", + "@temporalio/client": "1.13.2", + "@temporalio/common": "1.13.2", + "@temporalio/core-bridge": "1.13.2", + "@temporalio/nexus": "1.13.2", + "@temporalio/proto": "1.13.2", + "@temporalio/workflow": "1.13.2", "abort-controller": "^3.0.0", - "heap-js": "^2.3.0", + "heap-js": "^2.6.0", "memfs": "^4.6.0", + "nexus-rpc": "^0.0.1", + "proto3-json-serializer": "^2.0.0", "protobufjs": "^7.2.5", "rxjs": "^7.8.1", "source-map": "^0.7.4", @@ -772,13 +793,14 @@ } }, "node_modules/@temporalio/workflow": { - "version": "1.12.1", - "resolved": "https://registry.npmjs.org/@temporalio/workflow/-/workflow-1.12.1.tgz", - "integrity": "sha512-r2d2tzEf6zJENewZMku1ge53QO52ZTN8bJXp8zzerPYyMx9Iqhg3Ck1ckrdpxpDw9gxBYZsRbwS2vpiq53ZKRQ==", + "version": "1.13.2", + "resolved": "https://registry.npmjs.org/@temporalio/workflow/-/workflow-1.13.2.tgz", + "integrity": "sha512-vK8s0iCTMGNLtUZeKiFVfmLd4nVUDaJ4aS0yCy8WvMUpgqBTpaaOWPAy7KiH0grKB7zIskiWljEMtpt3ce586w==", "license": "MIT", "dependencies": { - "@temporalio/common": "1.12.1", - "@temporalio/proto": "1.12.1" + "@temporalio/common": "1.13.2", + "@temporalio/proto": "1.13.2", + "nexus-rpc": "^0.0.1" }, "engines": { "node": ">= 18.0.0" @@ -2421,9 +2443,10 @@ } }, "node_modules/heap-js": { - "version": "2.3.0", - "resolved": "https://registry.npmjs.org/heap-js/-/heap-js-2.3.0.tgz", - "integrity": "sha512-E5303mzwQ+4j/n2J0rDvEPBN7GKjhis10oHiYOgjxsmxYgqG++hz9NyLLOXttzH8as/DyiBHYpUrJTZWYaMo8Q==", + "version": "2.7.1", + "resolved": "https://registry.npmjs.org/heap-js/-/heap-js-2.7.1.tgz", + "integrity": "sha512-EQfezRg0NCZGNlhlDR3Evrw1FVL2G3LhU7EgPoxufQKruNBSYA8MiRPHeWbU+36o+Fhel0wMwM+sLEiBAlNLJA==", + "license": "BSD-3-Clause", "engines": { "node": ">=10.0.0" } @@ -3028,6 +3051,15 @@ "resolved": "https://registry.npmjs.org/neo-async/-/neo-async-2.6.2.tgz", "integrity": "sha512-Yd3UES5mWCSqR+qNT93S3UoYUkqAZ9lLg8a7g9rimsWmYGK8cVToA4/sF3RrshdyV3sAGMXVUmpMYOw+dLpOuw==" }, + "node_modules/nexus-rpc": { + "version": "0.0.1", + "resolved": "https://registry.npmjs.org/nexus-rpc/-/nexus-rpc-0.0.1.tgz", + "integrity": "sha512-hAWn8Hh2eewpB5McXR5EW81R3pR/ziuGhKCF3wFyUVCklanPqrIgMNr7jKCbzXeNVad0nUDfWpFRqh2u+zxQtw==", + "license": "MIT", + "engines": { + "node": ">= 18.0.0" + } + }, "node_modules/node-releases": { "version": "2.0.19", "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.19.tgz", @@ -4237,15 +4269,16 @@ "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==" }, "node_modules/uuid": { - "version": "9.0.1", - "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz", - "integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==", + "version": "11.1.0", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-11.1.0.tgz", + "integrity": "sha512-0/A9rDy9P7cJ+8w1c9WD9V//9Wj15Ce2MPz8Ri6032usz+NfePxx5AcN3bN+r6ZL6jEo066/yNYB3tn4pQEx+A==", "funding": [ "https://github.com/sponsors/broofa", "https://github.com/sponsors/ctavan" ], + "license": "MIT", "bin": { - "uuid": "dist/bin/uuid" + "uuid": "dist/esm/bin/uuid" } }, "node_modules/v8-compile-cache": { diff --git a/workers/typescript/package.json b/workers/typescript/package.json index ea99d42c..4042baca 100644 --- a/workers/typescript/package.json +++ b/workers/typescript/package.json @@ -21,10 +21,10 @@ ] }, "dependencies": { - "@temporalio/activity": "^1.12.1", - "@temporalio/client": "^1.12.1", - "@temporalio/worker": "^1.12.1", - "@temporalio/workflow": "^1.12.1", + "@temporalio/activity": "^1.13.2", + "@temporalio/client": "^1.13.2", + "@temporalio/worker": "^1.13.2", + "@temporalio/workflow": "^1.13.2", "commander": "^11.1.0", "long": "^5.2.3", "winston": "^3.11.0" From bc84f7300ba776b0931d0f821b99278ae7bc6ffc Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Fri, 14 Nov 2025 20:10:24 -0800 Subject: [PATCH 35/66] wip Signed-off-by: Stephan Behnke --- workers/go/go.mod | 1 - workers/go/go.sum | 4 ---- 2 files changed, 5 deletions(-) diff --git a/workers/go/go.mod b/workers/go/go.mod index 1a1123f0..69457921 100644 --- a/workers/go/go.mod +++ b/workers/go/go.mod @@ -13,7 +13,6 @@ require ( ) require ( - github.com/antithesishq/antithesis-sdk-go v0.5.1-0.20250924165633-f60b0222f1b6 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/davecgh/go-spew v1.1.1 // indirect diff --git a/workers/go/go.sum b/workers/go/go.sum index f89b9b63..ff9d801f 100644 --- a/workers/go/go.sum +++ b/workers/go/go.sum @@ -1,5 +1,3 @@ -github.com/antithesishq/antithesis-sdk-go v0.5.1-0.20250924165633-f60b0222f1b6 h1:qSD74Vz3scN2SrfML8dy2Whcv0C3pNkfqYZXeL4SIq0= -github.com/antithesishq/antithesis-sdk-go v0.5.1-0.20250924165633-f60b0222f1b6/go.mod h1:IUpT2DPAKh6i/YhSbt6Gl3v2yvUZjmKncl7U91fup7E= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= @@ -56,8 +54,6 @@ github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= -github.com/temporalio/features v0.0.0-20251113235102-ac7c92445a59 h1:+k/VNVoVeoe1rvX+9qoedknC5UkdA3BbL8TFKWtaZMU= -github.com/temporalio/features v0.0.0-20251113235102-ac7c92445a59/go.mod h1:Ew0bBvTHCGcs2fX+iyoUqoj78x5eS7BznJTPg8wd35I= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= From 0c36f166aa1e63d15fe75ea52927abea68b72d86 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Sun, 16 Nov 2025 12:12:50 -0800 Subject: [PATCH 36/66] Update run.go Signed-off-by: Stephan Behnke --- workers/run.go | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/workers/run.go b/workers/run.go index 13ba36d0..a7526b97 100644 --- a/workers/run.go +++ b/workers/run.go @@ -190,10 +190,17 @@ func passthrough(fs *pflag.FlagSet, prefix string) (flags []string) { if !f.Changed { return } - flags = append(flags, fmt.Sprintf("--%s=%s", - strings.TrimPrefix(f.Name, prefix), - f.Value.String(), - )) + + flagName := strings.TrimPrefix(f.Name, prefix) + + if f.Value.Type() == "bool" { + // Some SDKs like Python don't like `--tls=true` + if f.Value.String() == "true" { + flags = append(flags, fmt.Sprintf("--%s", flagName)) + } + } else { + flags = append(flags, fmt.Sprintf("--%s=%s", flagName, f.Value.String())) + } }) return } From 166aae56bc8f1f3ba4700fda0ed0f1c884a95983 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Sun, 16 Nov 2025 16:53:15 -0800 Subject: [PATCH 37/66] Update throughput_stress.go Signed-off-by: Stephan Behnke --- scenarios/throughput_stress.go | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/scenarios/throughput_stress.go b/scenarios/throughput_stress.go index 3f2bb343..49020a61 100644 --- a/scenarios/throughput_stress.go +++ b/scenarios/throughput_stress.go @@ -210,6 +210,13 @@ func (t *tpsExecutor) Run(ctx context.Context, info loadgen.ScenarioInfo) error } } + // Initialize workflow completion checker + timeout := info.ScenarioOptionDuration(VisibilityVerificationTimeoutFlag, 30*time.Second) + completionVerifier, err := loadgen.NewWorkflowCompletionChecker(ctx, info, timeout) + if err != nil { + return fmt.Errorf("failed to initialize workflow completion checker: %w", err) + } + // Start the scenario run. // // NOTE: When resuming, it can happen that there are no more iterations/time left to run more iterations. @@ -261,8 +268,6 @@ func (t *tpsExecutor) Run(ctx context.Context, info loadgen.ScenarioInfo) error } } - timeout := info.ScenarioOptionDuration(VisibilityVerificationTimeoutFlag, 30*time.Second) - // Configure expected workflow count function based on scenario config expectedWorkflowCount := func(state loadgen.ExecutorState) int { completedIterations := state.CompletedIterations @@ -281,12 +286,6 @@ func (t *tpsExecutor) Run(ctx context.Context, info loadgen.ScenarioInfo) error // Total: parent + children + continue-as-new return completedIterations + completedChildWorkflows + continueAsNewWorkflows } - - // Initialize workflow completion checker - completionVerifier, err := loadgen.NewWorkflowCompletionChecker(ctx, info, timeout) - if err != nil { - return fmt.Errorf("failed to initialize workflow completion checker: %w", err) - } completionVerifier.SetExpectedWorkflowCount(expectedWorkflowCount) // Create verifier that combines workflow completion and throughput checking From 124a8ec7abc2aa9e957db2163ecdc19021074c16 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Sun, 16 Nov 2025 16:55:28 -0800 Subject: [PATCH 38/66] Update throughput_stress.go Signed-off-by: Stephan Behnke --- scenarios/throughput_stress.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/scenarios/throughput_stress.go b/scenarios/throughput_stress.go index 49020a61..8bde6be1 100644 --- a/scenarios/throughput_stress.go +++ b/scenarios/throughput_stress.go @@ -199,6 +199,13 @@ func (t *tpsExecutor) Run(ctx context.Context, info loadgen.ScenarioInfo) error currentState := *t.state t.lock.Unlock() + // Initialize workflow completion checker + timeout := info.ScenarioOptionDuration(VisibilityVerificationTimeoutFlag, 30*time.Second) + completionVerifier, err := loadgen.NewWorkflowCompletionChecker(ctx, info, timeout) + if err != nil { + return fmt.Errorf("failed to initialize workflow completion checker: %w", err) + } + if isResuming { info.Logger.Info(fmt.Sprintf("Resuming scenario from state: %#v", currentState)) if execState, ok := currentState.ExecutorState.(loadgen.ExecutorState); ok { @@ -210,13 +217,6 @@ func (t *tpsExecutor) Run(ctx context.Context, info loadgen.ScenarioInfo) error } } - // Initialize workflow completion checker - timeout := info.ScenarioOptionDuration(VisibilityVerificationTimeoutFlag, 30*time.Second) - completionVerifier, err := loadgen.NewWorkflowCompletionChecker(ctx, info, timeout) - if err != nil { - return fmt.Errorf("failed to initialize workflow completion checker: %w", err) - } - // Start the scenario run. // // NOTE: When resuming, it can happen that there are no more iterations/time left to run more iterations. From 76cac5f97d79d3fb14295c79e25da0e653a136e0 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Sun, 16 Nov 2025 17:15:31 -0800 Subject: [PATCH 39/66] Update throughput_stress.go Signed-off-by: Stephan Behnke --- scenarios/throughput_stress.go | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/scenarios/throughput_stress.go b/scenarios/throughput_stress.go index 8bde6be1..80494e5b 100644 --- a/scenarios/throughput_stress.go +++ b/scenarios/throughput_stress.go @@ -62,8 +62,8 @@ type tpsConfig struct { } type tpsExecutor struct { + *tpsVerifier executor *loadgen.KitchenSinkExecutor - verifier *tpsVerifier lock sync.Mutex state *tpsState config *tpsConfig @@ -205,6 +205,10 @@ func (t *tpsExecutor) Run(ctx context.Context, info loadgen.ScenarioInfo) error if err != nil { return fmt.Errorf("failed to initialize workflow completion checker: %w", err) } + t.tpsVerifier = &tpsVerifier{ + completionVerifier: completionVerifier, + config: t.config, + } if isResuming { info.Logger.Info(fmt.Sprintf("Resuming scenario from state: %#v", currentState)) @@ -288,12 +292,6 @@ func (t *tpsExecutor) Run(ctx context.Context, info loadgen.ScenarioInfo) error } completionVerifier.SetExpectedWorkflowCount(expectedWorkflowCount) - // Create verifier that combines workflow completion and throughput checking - t.verifier = &tpsVerifier{ - completionVerifier: completionVerifier, - config: t.config, - } - if err := t.executor.Run(ctx, info); err != nil { return err } From e805cabac7befe8e9a7b0fe4a46833db7591db83 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Sun, 16 Nov 2025 17:24:48 -0800 Subject: [PATCH 40/66] Update throughput_stress.go Signed-off-by: Stephan Behnke --- scenarios/throughput_stress.go | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/scenarios/throughput_stress.go b/scenarios/throughput_stress.go index 80494e5b..f6f0134c 100644 --- a/scenarios/throughput_stress.go +++ b/scenarios/throughput_stress.go @@ -62,14 +62,14 @@ type tpsConfig struct { } type tpsExecutor struct { - *tpsVerifier - executor *loadgen.KitchenSinkExecutor - lock sync.Mutex - state *tpsState - config *tpsConfig - isResuming bool - runID string - rng *rand.Rand + executor *loadgen.KitchenSinkExecutor + tpsVerifier *tpsVerifier + lock sync.Mutex + state *tpsState + config *tpsConfig + isResuming bool + runID string + rng *rand.Rand } var _ loadgen.Resumable = (*tpsExecutor)(nil) @@ -83,11 +83,11 @@ func init() { ExecutorFn: func() loadgen.Executor { return newThroughputStressExecutor() }, VerifyFn: func(ctx context.Context, info loadgen.ScenarioInfo, executor loadgen.Executor) []error { t := executor.(*tpsExecutor) - if t.verifier == nil || t.executor == nil { + if t.tpsVerifier == nil || t.executor == nil { return nil } state := t.executor.GetState() - return t.verifier.VerifyRun(ctx, info, state) + return t.VerifyRun(ctx, info, state) }, }) } @@ -126,6 +126,14 @@ func (t *tpsExecutor) LoadState(loader func(any) error) error { return nil } +// VerifyRun implements the Verifier interface. +func (t *tpsExecutor) VerifyRun(ctx context.Context, info loadgen.ScenarioInfo, state loadgen.ExecutorState) []error { + if t.tpsVerifier == nil || t.executor == nil { + return nil + } + return t.tpsVerifier.VerifyRun(ctx, info, state) +} + // Configure initializes tpsConfig. Largely, it reads and validates throughput_stress scenario options func (t *tpsExecutor) Configure(info loadgen.ScenarioInfo) error { config := &tpsConfig{ From fcf4f6aeba0f277898c355b74ec4928399c7a418 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Sun, 16 Nov 2025 17:24:52 -0800 Subject: [PATCH 41/66] Update ebb_and_flow.go Signed-off-by: Stephan Behnke --- scenarios/ebb_and_flow.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/scenarios/ebb_and_flow.go b/scenarios/ebb_and_flow.go index 627a6ea5..b3f02344 100644 --- a/scenarios/ebb_and_flow.go +++ b/scenarios/ebb_and_flow.go @@ -144,6 +144,14 @@ func (e *ebbAndFlowExecutor) Configure(info loadgen.ScenarioInfo) error { return nil } +// VerifyRun implements the Verifier interface. +func (e *ebbAndFlowExecutor) VerifyRun(ctx context.Context, info loadgen.ScenarioInfo, state loadgen.ExecutorState) []error { + if e.completionVerifier == nil || e.executorState == nil { + return nil + } + return e.completionVerifier.VerifyRun(ctx, info, *e.executorState) +} + // Run executes the ebb and flow scenario. func (e *ebbAndFlowExecutor) Run(ctx context.Context, info loadgen.ScenarioInfo) error { if err := e.Configure(info); err != nil { From 4f8d9c6d04ba3a13ead3b02d099c482a17824ed0 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Sun, 16 Nov 2025 17:26:10 -0800 Subject: [PATCH 42/66] Update test_env.go Signed-off-by: Stephan Behnke --- workers/test_env.go | 50 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 47 insertions(+), 3 deletions(-) diff --git a/workers/test_env.go b/workers/test_env.go index 2296e79d..3ba268b2 100644 --- a/workers/test_env.go +++ b/workers/test_env.go @@ -6,6 +6,7 @@ import ( "fmt" "os" "path/filepath" + "reflect" "testing" "time" @@ -171,7 +172,7 @@ func (env *TestEnvironment) createNexusEndpoint(ctx context.Context, taskQueueNa return endpointName, nil } -// RunExecutorTest runs an executor with a specific SDK and server address +// RunExecutorTest runs an executor with a specific SDK and server address. func (env *TestEnvironment) RunExecutorTest( t *testing.T, executor loadgen.Executor, @@ -202,6 +203,48 @@ func (env *TestEnvironment) RunExecutorTest( execErr := executor.Run(testCtx, scenarioInfo) + // Run verification if executor implements Verifier interface. + // Use a fresh context for verification, not the executor context which may be canceled. + var verifyErrs []error + if verifier, ok := executor.(loadgen.Verifier); ok { + if stateful, hasSnapshot := executor.(interface{ Snapshot() any }); hasSnapshot { + snapshot := stateful.Snapshot() + // Try to extract ExecutorState from the snapshot + var execState loadgen.ExecutorState + var hasState bool + + switch s := snapshot.(type) { + case loadgen.ExecutorState: + execState = s + hasState = true + default: + // For custom state types with an ExecutorState field + v := reflect.ValueOf(snapshot) + if v.Kind() == reflect.Struct { + if field := v.FieldByName("ExecutorState"); field.IsValid() { + if es, ok := field.Interface().(loadgen.ExecutorState); ok { + execState = es + hasState = true + } else if field.Kind() == reflect.Interface && !field.IsNil() { + // Handle case where ExecutorState is stored as interface{} + if es, ok := field.Elem().Interface().(loadgen.ExecutorState); ok { + execState = es + hasState = true + } + } + } + } + } + + if hasState { + // Create a fresh context for verification with appropriate timeout + verifyCtx, cancelVerify := context.WithTimeout(t.Context(), env.executorTimeout) + defer cancelVerify() + verifyErrs = verifier.VerifyRun(verifyCtx, scenarioInfo, execState) + } + } + } + // Trigger worker shutdown. cancelTestCtx() @@ -216,8 +259,9 @@ func (env *TestEnvironment) RunExecutorTest( workerErr = fmt.Errorf("timed out waiting for worker shutdown") } - return TestResult{ObservedLogs: observedLogs}, - errors.Join(execErr, workerErr) + // Combine all errors + allErrs := append([]error{execErr, workerErr}, verifyErrs...) + return TestResult{ObservedLogs: observedLogs}, errors.Join(allErrs...) } func (env *TestEnvironment) buildDirName() string { From 9af13b183800c8dac75dec72d5c5fb1e57f25931 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Sun, 16 Nov 2025 17:36:26 -0800 Subject: [PATCH 43/66] Update throughput_stress.go Signed-off-by: Stephan Behnke --- scenarios/throughput_stress.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scenarios/throughput_stress.go b/scenarios/throughput_stress.go index f6f0134c..f7b34b8b 100644 --- a/scenarios/throughput_stress.go +++ b/scenarios/throughput_stress.go @@ -326,7 +326,7 @@ func (t *tpsExecutor) Run(ctx context.Context, info loadgen.ScenarioInfo) error continueAsNewWorkflows = continueAsNewPerIter * completedIterations } completedChildWorkflows := completedIterations * t.config.InternalIterations - completedWorkflows := completedIterations + completedChildWorkflows + continueAsNewWorkflows + completedWorkflows := completedIterations + completedChildWorkflows // + continueAsNewWorkflows TODO // Log completion summary. info.Logger.Info(fmt.Sprintf( From f09f5aee297dc69bafb3633428ed0bfd6fc68478 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Sun, 16 Nov 2025 17:37:45 -0800 Subject: [PATCH 44/66] Update throughput_stress.go Signed-off-by: Stephan Behnke --- scenarios/throughput_stress.go | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/scenarios/throughput_stress.go b/scenarios/throughput_stress.go index f7b34b8b..55e855e4 100644 --- a/scenarios/throughput_stress.go +++ b/scenarios/throughput_stress.go @@ -285,18 +285,18 @@ func (t *tpsExecutor) Run(ctx context.Context, info loadgen.ScenarioInfo) error completedIterations := state.CompletedIterations // Calculate continue-as-new workflows - var continueAsNewWorkflows int - if t.config.ContinueAsNewAfterIter > 0 { - // Subtract 1 because the last iteration doesn't trigger a continue-as-new. - continueAsNewPerIter := (t.config.InternalIterations - 1) / t.config.ContinueAsNewAfterIter - continueAsNewWorkflows = continueAsNewPerIter * completedIterations - } + // var continueAsNewWorkflows int + // if t.config.ContinueAsNewAfterIter > 0 { + // // Subtract 1 because the last iteration doesn't trigger a continue-as-new. + // continueAsNewPerIter := (t.config.InternalIterations - 1) / t.config.ContinueAsNewAfterIter + // continueAsNewWorkflows = continueAsNewPerIter * completedIterations + // } // Calculate child workflows completedChildWorkflows := completedIterations * t.config.InternalIterations // Total: parent + children + continue-as-new - return completedIterations + completedChildWorkflows + continueAsNewWorkflows + return completedIterations + completedChildWorkflows // TODO continueAsNewWorkflows } completionVerifier.SetExpectedWorkflowCount(expectedWorkflowCount) @@ -326,7 +326,7 @@ func (t *tpsExecutor) Run(ctx context.Context, info loadgen.ScenarioInfo) error continueAsNewWorkflows = continueAsNewPerIter * completedIterations } completedChildWorkflows := completedIterations * t.config.InternalIterations - completedWorkflows := completedIterations + completedChildWorkflows // + continueAsNewWorkflows TODO + completedWorkflows := completedIterations + completedChildWorkflows + continueAsNewWorkflows // Log completion summary. info.Logger.Info(fmt.Sprintf( From 73f82cb5f774873190553d2b20e3703bb7fac039 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Sun, 16 Nov 2025 18:02:44 -0800 Subject: [PATCH 45/66] Update workflow_completion_checker_test.go Signed-off-by: Stephan Behnke --- scenarios/workflow_completion_checker_test.go | 85 ++++--------------- 1 file changed, 16 insertions(+), 69 deletions(-) diff --git a/scenarios/workflow_completion_checker_test.go b/scenarios/workflow_completion_checker_test.go index fe9aef35..61703da4 100644 --- a/scenarios/workflow_completion_checker_test.go +++ b/scenarios/workflow_completion_checker_test.go @@ -1,7 +1,6 @@ package scenarios import ( - "context" "fmt" "testing" "time" @@ -9,93 +8,41 @@ import ( "github.com/stretchr/testify/require" "github.com/temporalio/omes/cmd/clioptions" "github.com/temporalio/omes/loadgen" - "github.com/temporalio/omes/loadgen/kitchensink" "github.com/temporalio/omes/workers" - "go.temporal.io/api/common/v1" - "go.uber.org/zap/zaptest" ) // Test that WorkflowCompletionChecker is able to detect a stuck workflow. +// Uses the stuck_workflow scenario which has a workflow that blocks forever on iteration 1. +// The scenario's executor implements the Verifier interface, so env.RunExecutorTest +// automatically runs verification and reports errors. func TestWorkflowCompletionChecker(t *testing.T) { t.Parallel() env := workers.SetupTestEnvironment(t, workers.WithExecutorTimeout(5*time.Second)) - testLogger := zaptest.NewLogger(t).Sugar() - scenarioInfo := loadgen.ScenarioInfo{ RunID: fmt.Sprintf("stuck-%d", time.Now().Unix()), ExecutionID: "test-exec-id", Configuration: loadgen.RunConfiguration{ Iterations: 10, }, - Client: env.TemporalClient(), - Namespace: "default", - Logger: testLogger, } - // Create workflow completion verifier - verifier, err := loadgen.NewWorkflowCompletionChecker(t.Context(), scenarioInfo, 30*time.Second) - require.NoError(t, err, "failed to create verifier") - - executor := &loadgen.KitchenSinkExecutor{ - TestInput: &kitchensink.TestInput{ - WorkflowInput: &kitchensink.WorkflowInput{}, - }, - UpdateWorkflowOptions: func(ctx context.Context, run *loadgen.Run, options *loadgen.KitchenSinkWorkflowOptions) error { - // Only the first iteration should block forever. - if run.Iteration == 1 { - options.Params.WorkflowInput.InitialActions = []*kitchensink.ActionSet{ - { - Actions: []*kitchensink.Action{ - { - Variant: &kitchensink.Action_AwaitWorkflowState{ - AwaitWorkflowState: &kitchensink.AwaitWorkflowState{ - Key: "will-never-be-set", - Value: "never", - }, - }, - }, - }, - }, - } - } else if run.Iteration%2 == 0 { - // Have some Continue-As-New. - options.Params.WorkflowInput.InitialActions = []*kitchensink.ActionSet{ - { - Actions: []*kitchensink.Action{ - { - Variant: &kitchensink.Action_ContinueAsNew{ - ContinueAsNew: &kitchensink.ContinueAsNewAction{ - Arguments: []*common.Payload{}, - }, - }, - }, - }, - }, - } - } else { - options.Params.WorkflowInput.InitialActions = []*kitchensink.ActionSet{ - kitchensink.NoOpSingleActivityActionSet(), - } - } - return nil - }, - } + // Get the stuck_workflow scenario executor + scenario := loadgen.GetScenario("stuck_workflow") + require.NotNil(t, scenario, "stuck_workflow scenario should be registered") + executor := scenario.ExecutorFn() - _, err = env.RunExecutorTest(t, executor, scenarioInfo, clioptions.LangGo) - require.Error(t, err, "executor should fail because first iteration times out") + // RunExecutorTest will automatically run verification since the executor implements Verifier + _, err := env.RunExecutorTest(t, executor, scenarioInfo, clioptions.LangGo) + require.Error(t, err, "should fail due to stuck workflow and verification errors") require.Contains(t, err.Error(), "deadline exceeded", "should report timed out iteration") + require.Contains(t, err.Error(), "non-completed workflow: WorkflowID=w-stuck-", "should report stuck workflow from verifier") - execState := executor.Snapshot().(loadgen.ExecutorState) - require.Equal(t, 4, execState.CompletedIterations, "should complete 4 iterations") - - // Verify using the verifier - pass the state directly - // Use a timeout that allows for visibility to catch up and retries to occur - verifyCtx, cancel := context.WithTimeout(t.Context(), 10*time.Second) - defer cancel() - verifyErrs := verifier.VerifyRun(verifyCtx, scenarioInfo, execState) - require.NotEmpty(t, verifyErrs) - require.Contains(t, verifyErrs[0].Error(), "non-completed workflow: WorkflowID=w-stuck-") + // Verify the executor state shows 9 completed iterations (all except the stuck one) + resumable, ok := executor.(loadgen.Resumable) + require.True(t, ok, "executor should implement Resumable interface") + execState := resumable.Snapshot().(loadgen.ExecutorState) + require.Equal(t, 9, execState.CompletedIterations, "should complete 9 iterations (all except iteration 1 which is stuck)") } From 4b32eb91a9e78caf714902e9a4b941f5fc0dd040 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Sun, 16 Nov 2025 18:02:48 -0800 Subject: [PATCH 46/66] Create stuck_workflow.go Signed-off-by: Stephan Behnke --- scenarios/stuck_workflow.go | 115 ++++++++++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) create mode 100644 scenarios/stuck_workflow.go diff --git a/scenarios/stuck_workflow.go b/scenarios/stuck_workflow.go new file mode 100644 index 00000000..21c66efe --- /dev/null +++ b/scenarios/stuck_workflow.go @@ -0,0 +1,115 @@ +package scenarios + +import ( + "context" + "time" + + "github.com/temporalio/omes/loadgen" + "github.com/temporalio/omes/loadgen/kitchensink" + "go.temporal.io/api/common/v1" + "go.temporal.io/sdk/converter" +) + +// stuckWorkflowExecutor wraps KitchenSinkExecutor and implements Verifier interface +// to detect stuck workflows using WorkflowCompletionVerifier. +type stuckWorkflowExecutor struct { + *loadgen.KitchenSinkExecutor + verifier *loadgen.WorkflowCompletionVerifier +} + +var _ loadgen.Verifier = (*stuckWorkflowExecutor)(nil) +var _ loadgen.Resumable = (*stuckWorkflowExecutor)(nil) + +func (e *stuckWorkflowExecutor) Run(ctx context.Context, info loadgen.ScenarioInfo) error { + // Create the verifier before running + verifier, err := loadgen.NewWorkflowCompletionChecker(ctx, info, 30*time.Second) + if err != nil { + return err + } + e.verifier = verifier + + // Run the embedded executor + return e.KitchenSinkExecutor.Run(ctx, info) +} + +func (e *stuckWorkflowExecutor) VerifyRun(ctx context.Context, info loadgen.ScenarioInfo, state loadgen.ExecutorState) []error { + if e.verifier == nil { + return nil + } + return e.verifier.VerifyRun(ctx, info, state) +} + +func (e *stuckWorkflowExecutor) Snapshot() any { + return e.KitchenSinkExecutor.Snapshot() +} + +func (e *stuckWorkflowExecutor) LoadState(loader func(any) error) error { + return e.KitchenSinkExecutor.LoadState(loader) +} + +func init() { + loadgen.MustRegisterScenario(loadgen.Scenario{ + Description: "Test scenario where the first iteration blocks forever (stuck workflow), " + + "even iterations use Continue-As-New, and odd iterations complete normally. " + + "Used for testing workflow completion detection.", + ExecutorFn: func() loadgen.Executor { + return &stuckWorkflowExecutor{ + KitchenSinkExecutor: &loadgen.KitchenSinkExecutor{ + TestInput: &kitchensink.TestInput{ + WorkflowInput: &kitchensink.WorkflowInput{}, + }, + UpdateWorkflowOptions: func(ctx context.Context, run *loadgen.Run, options *loadgen.KitchenSinkWorkflowOptions) error { + // Only the first iteration should block forever. + if run.Iteration == 1 { + options.Params.WorkflowInput.InitialActions = []*kitchensink.ActionSet{ + { + Actions: []*kitchensink.Action{ + { + Variant: &kitchensink.Action_AwaitWorkflowState{ + AwaitWorkflowState: &kitchensink.AwaitWorkflowState{ + Key: "will-never-be-set", + Value: "never", + }, + }, + }, + }, + }, + } + } else if run.Iteration%2 == 0 { + // Have some Continue-As-New. + // ContinueAsNew needs to pass the workflow input as the first argument. + // We pass a simple completion action to make the continued workflow complete immediately. + workflowInput, err := converter.GetDefaultDataConverter().ToPayload( + &kitchensink.WorkflowInput{ + InitialActions: []*kitchensink.ActionSet{ + kitchensink.NoOpSingleActivityActionSet(), + }, + }) + if err != nil { + return err + } + options.Params.WorkflowInput.InitialActions = []*kitchensink.ActionSet{ + { + Actions: []*kitchensink.Action{ + { + Variant: &kitchensink.Action_ContinueAsNew{ + ContinueAsNew: &kitchensink.ContinueAsNewAction{ + Arguments: []*common.Payload{workflowInput}, + }, + }, + }, + }, + }, + } + } else { + options.Params.WorkflowInput.InitialActions = []*kitchensink.ActionSet{ + kitchensink.NoOpSingleActivityActionSet(), + } + } + return nil + }, + }, + } + }, +}) +} From 199bd8b0548b5b3986e55ebc3152af3ba1535173 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Sun, 16 Nov 2025 18:07:41 -0800 Subject: [PATCH 47/66] Update stuck_workflow.go Signed-off-by: Stephan Behnke --- scenarios/stuck_workflow.go | 94 ++++++++++++++++++++----------------- 1 file changed, 51 insertions(+), 43 deletions(-) diff --git a/scenarios/stuck_workflow.go b/scenarios/stuck_workflow.go index 21c66efe..0fa665bd 100644 --- a/scenarios/stuck_workflow.go +++ b/scenarios/stuck_workflow.go @@ -59,57 +59,65 @@ func init() { WorkflowInput: &kitchensink.WorkflowInput{}, }, UpdateWorkflowOptions: func(ctx context.Context, run *loadgen.Run, options *loadgen.KitchenSinkWorkflowOptions) error { - // Only the first iteration should block forever. - if run.Iteration == 1 { - options.Params.WorkflowInput.InitialActions = []*kitchensink.ActionSet{ - { - Actions: []*kitchensink.Action{ - { - Variant: &kitchensink.Action_AwaitWorkflowState{ - AwaitWorkflowState: &kitchensink.AwaitWorkflowState{ - Key: "will-never-be-set", - Value: "never", + // Only the first iteration should block forever. + if run.Iteration == 1 { + options.Params.WorkflowInput.InitialActions = []*kitchensink.ActionSet{ + { + Actions: []*kitchensink.Action{ + { + Variant: &kitchensink.Action_AwaitWorkflowState{ + AwaitWorkflowState: &kitchensink.AwaitWorkflowState{ + Key: "will-never-be-set", + Value: "never", + }, }, }, }, }, - }, - } - } else if run.Iteration%2 == 0 { - // Have some Continue-As-New. - // ContinueAsNew needs to pass the workflow input as the first argument. - // We pass a simple completion action to make the continued workflow complete immediately. - workflowInput, err := converter.GetDefaultDataConverter().ToPayload( - &kitchensink.WorkflowInput{ - InitialActions: []*kitchensink.ActionSet{ - kitchensink.NoOpSingleActivityActionSet(), - }, - }) - if err != nil { - return err - } - options.Params.WorkflowInput.InitialActions = []*kitchensink.ActionSet{ - { - Actions: []*kitchensink.Action{ - { - Variant: &kitchensink.Action_ContinueAsNew{ - ContinueAsNew: &kitchensink.ContinueAsNewAction{ - Arguments: []*common.Payload{workflowInput}, + } + } else if run.Iteration%2 == 0 { + // Have some Continue-As-New. + // ContinueAsNew needs to pass the workflow input as the first argument. + // We pass a simple completion action to make the continued workflow complete immediately. + workflowInput, err := converter.GetDefaultDataConverter().ToPayload( + &kitchensink.WorkflowInput{ + InitialActions: []*kitchensink.ActionSet{ + kitchensink.NoOpSingleActivityActionSet(), + }, + }) + if err != nil { + return err + } + options.Params.WorkflowInput.InitialActions = []*kitchensink.ActionSet{ + { + Actions: []*kitchensink.Action{ + { + Variant: &kitchensink.Action_ContinueAsNew{ + ContinueAsNew: &kitchensink.ContinueAsNewAction{ + Arguments: []*common.Payload{workflowInput}, + }, }, }, }, }, - }, - } - } else { - options.Params.WorkflowInput.InitialActions = []*kitchensink.ActionSet{ - kitchensink.NoOpSingleActivityActionSet(), + } + } else { + options.Params.WorkflowInput.InitialActions = []*kitchensink.ActionSet{ + kitchensink.NoOpSingleActivityActionSet(), + } } - } - return nil + return nil + }, }, - }, - } - }, -}) + } + }, + VerifyFn: func(ctx context.Context, info loadgen.ScenarioInfo, executor loadgen.Executor) []error { + e := executor.(*stuckWorkflowExecutor) + if e.verifier == nil || e.KitchenSinkExecutor == nil { + return nil + } + state := e.KitchenSinkExecutor.Snapshot().(loadgen.ExecutorState) + return e.verifier.VerifyRun(ctx, info, state) + }, + }) } From d00fd0fd94d565ad22778d0cb727e5af1a3b77c5 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Sun, 16 Nov 2025 18:31:15 -0800 Subject: [PATCH 48/66] Update ebb_and_flow.go Signed-off-by: Stephan Behnke --- scenarios/ebb_and_flow.go | 1 + 1 file changed, 1 insertion(+) diff --git a/scenarios/ebb_and_flow.go b/scenarios/ebb_and_flow.go index b3f02344..ad7ee5af 100644 --- a/scenarios/ebb_and_flow.go +++ b/scenarios/ebb_and_flow.go @@ -307,6 +307,7 @@ func (e *ebbAndFlowExecutor) spawnWorkflowWithActivities( options := run.DefaultStartWorkflowOptions() options.ID = fmt.Sprintf("%s-track-%d", e.id, iteration) options.WorkflowExecutionErrorWhenAlreadyStarted = false + options.WorkflowExecutionTimeout = 30 * time.Second // TypedSearchAttributes are already set by DefaultStartWorkflowOptions() workflowInput := &WorkflowInput{ From 488b82cfae61c9113d0e51605adad55c58596ee7 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Mon, 17 Nov 2025 08:42:53 -0800 Subject: [PATCH 49/66] Update ebb_and_flow.go Signed-off-by: Stephan Behnke --- scenarios/ebb_and_flow.go | 1 - 1 file changed, 1 deletion(-) diff --git a/scenarios/ebb_and_flow.go b/scenarios/ebb_and_flow.go index ad7ee5af..b3f02344 100644 --- a/scenarios/ebb_and_flow.go +++ b/scenarios/ebb_and_flow.go @@ -307,7 +307,6 @@ func (e *ebbAndFlowExecutor) spawnWorkflowWithActivities( options := run.DefaultStartWorkflowOptions() options.ID = fmt.Sprintf("%s-track-%d", e.id, iteration) options.WorkflowExecutionErrorWhenAlreadyStarted = false - options.WorkflowExecutionTimeout = 30 * time.Second // TypedSearchAttributes are already set by DefaultStartWorkflowOptions() workflowInput := &WorkflowInput{ From 350212f32dc71c0576e1c21b955baa404cbd740b Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Mon, 17 Nov 2025 13:04:21 -0800 Subject: [PATCH 50/66] Update cli.Dockerfile Signed-off-by: Stephan Behnke --- dockerfiles/cli.Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dockerfiles/cli.Dockerfile b/dockerfiles/cli.Dockerfile index 07a57bf3..0c825f26 100644 --- a/dockerfiles/cli.Dockerfile +++ b/dockerfiles/cli.Dockerfile @@ -69,7 +69,7 @@ COPY --from=instrumented /app_transformed /app_transformed WORKDIR /app_transformed/customer # Build the CLI -RUN CGO_ENABLED=0 go build -o temporal-omes ./cmd +RUN CGO_ENABLED=0 go build -o temporal-omes -tags with_antithesis_sdk ./cmd # Install protoc-gen-go for kitchen-sink-gen build RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.31.0 From c50ae75d9e3d8923682af726a4833b620173165e Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Mon, 17 Nov 2025 17:30:44 -0800 Subject: [PATCH 51/66] Update ebb_and_flow.go Signed-off-by: Stephan Behnke --- scenarios/ebb_and_flow.go | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/scenarios/ebb_and_flow.go b/scenarios/ebb_and_flow.go index b3f02344..caa1eed9 100644 --- a/scenarios/ebb_and_flow.go +++ b/scenarios/ebb_and_flow.go @@ -313,13 +313,22 @@ func (e *ebbAndFlowExecutor) spawnWorkflowWithActivities( InitialActions: []*ActionSet{ { Actions: actions, - Concurrent: true, + Concurrent: false, + }, + { + Actions: []*Action{ + { + Variant: &Action_ReturnResult{ + ReturnResult: &ReturnResultAction{}, + }, + }, + }, }, }, } // Start workflow using kitchensink. - wf, err := e.Client.ExecuteWorkflow(ctx, options, "kitchenSink", workflowInput) + _, err := e.Client.ExecuteWorkflow(ctx, options, "kitchenSink", workflowInput) if err != nil { return fmt.Errorf("failed to start kitchensink workflow for iteration %d: %w", iteration, err) } From 7bfeddec9ad96023a3324549195042beb4df034c Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Mon, 17 Nov 2025 17:31:29 -0800 Subject: [PATCH 52/66] Update ebb_and_flow.go Signed-off-by: Stephan Behnke --- scenarios/ebb_and_flow.go | 1 - 1 file changed, 1 deletion(-) diff --git a/scenarios/ebb_and_flow.go b/scenarios/ebb_and_flow.go index caa1eed9..522b53f1 100644 --- a/scenarios/ebb_and_flow.go +++ b/scenarios/ebb_and_flow.go @@ -307,7 +307,6 @@ func (e *ebbAndFlowExecutor) spawnWorkflowWithActivities( options := run.DefaultStartWorkflowOptions() options.ID = fmt.Sprintf("%s-track-%d", e.id, iteration) options.WorkflowExecutionErrorWhenAlreadyStarted = false - // TypedSearchAttributes are already set by DefaultStartWorkflowOptions() workflowInput := &WorkflowInput{ InitialActions: []*ActionSet{ From 4568ff495a0c704d0d0e4963836f22cdaa8e554f Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Mon, 17 Nov 2025 17:32:43 -0800 Subject: [PATCH 53/66] Update ebb_and_flow.go Signed-off-by: Stephan Behnke --- scenarios/ebb_and_flow.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scenarios/ebb_and_flow.go b/scenarios/ebb_and_flow.go index 522b53f1..95cbb9d5 100644 --- a/scenarios/ebb_and_flow.go +++ b/scenarios/ebb_and_flow.go @@ -312,7 +312,7 @@ func (e *ebbAndFlowExecutor) spawnWorkflowWithActivities( InitialActions: []*ActionSet{ { Actions: actions, - Concurrent: false, + Concurrent: true, }, { Actions: []*Action{ From d6e3bc79b3cfaed966899a36b6a9e34612cf08cb Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Mon, 17 Nov 2025 17:34:33 -0800 Subject: [PATCH 54/66] Update ebb_and_flow.go Signed-off-by: Stephan Behnke --- scenarios/ebb_and_flow.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scenarios/ebb_and_flow.go b/scenarios/ebb_and_flow.go index 95cbb9d5..7f847d3f 100644 --- a/scenarios/ebb_and_flow.go +++ b/scenarios/ebb_and_flow.go @@ -11,6 +11,7 @@ import ( "github.com/temporalio/omes/loadgen" . "github.com/temporalio/omes/loadgen/kitchensink" + "go.temporal.io/api/common/v1" ) const ( @@ -318,7 +319,9 @@ func (e *ebbAndFlowExecutor) spawnWorkflowWithActivities( Actions: []*Action{ { Variant: &Action_ReturnResult{ - ReturnResult: &ReturnResultAction{}, + ReturnResult: &ReturnResultAction{ + ReturnThis: &common.Payload{}, + }, }, }, }, From abb7c5f5779bf419c3b651f23163a147443f0a93 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Mon, 17 Nov 2025 17:48:06 -0800 Subject: [PATCH 55/66] Update ebb_and_flow.go Signed-off-by: Stephan Behnke --- scenarios/ebb_and_flow.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scenarios/ebb_and_flow.go b/scenarios/ebb_and_flow.go index 7f847d3f..b2751370 100644 --- a/scenarios/ebb_and_flow.go +++ b/scenarios/ebb_and_flow.go @@ -330,7 +330,7 @@ func (e *ebbAndFlowExecutor) spawnWorkflowWithActivities( } // Start workflow using kitchensink. - _, err := e.Client.ExecuteWorkflow(ctx, options, "kitchenSink", workflowInput) + wf, err := e.Client.ExecuteWorkflow(ctx, options, "kitchenSink", workflowInput) if err != nil { return fmt.Errorf("failed to start kitchensink workflow for iteration %d: %w", iteration, err) } From a9f631724955eb854013c8bfe1e9a061cf39241b Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Mon, 17 Nov 2025 19:33:47 -0800 Subject: [PATCH 56/66] add ns Signed-off-by: Stephan Behnke --- loadgen/helpers.go | 3 ++- scenarios/workflow_completion_checker_test.go | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/loadgen/helpers.go b/loadgen/helpers.go index 25cfa575..860ed634 100644 --- a/loadgen/helpers.go +++ b/loadgen/helpers.go @@ -88,7 +88,8 @@ func GetNonCompletedWorkflows(ctx context.Context, info ScenarioInfo, searchAttr var workflowErrors []error for _, exec := range resp.Executions { workflowErrors = append(workflowErrors, fmt.Errorf( - "non-completed workflow: WorkflowID=%s, RunID=%s, Status=%s", + "non-completed workflow: Namespace=%s, WorkflowID=%s, RunID=%s, Status=%s", + info.Namespace, exec.Execution.WorkflowId, exec.Execution.RunId, exec.Status.String())) diff --git a/scenarios/workflow_completion_checker_test.go b/scenarios/workflow_completion_checker_test.go index 61703da4..3f2db673 100644 --- a/scenarios/workflow_completion_checker_test.go +++ b/scenarios/workflow_completion_checker_test.go @@ -38,7 +38,7 @@ func TestWorkflowCompletionChecker(t *testing.T) { _, err := env.RunExecutorTest(t, executor, scenarioInfo, clioptions.LangGo) require.Error(t, err, "should fail due to stuck workflow and verification errors") require.Contains(t, err.Error(), "deadline exceeded", "should report timed out iteration") - require.Contains(t, err.Error(), "non-completed workflow: WorkflowID=w-stuck-", "should report stuck workflow from verifier") + require.Contains(t, err.Error(), "non-completed workflow: Namespace=default, WorkflowID=w-stuck-", "should report stuck workflow from verifier") // Verify the executor state shows 9 completed iterations (all except the stuck one) resumable, ok := executor.(loadgen.Resumable) From f6eacd60a4a2b95c85006482513b292c20de9bea Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Tue, 18 Nov 2025 13:11:13 -0800 Subject: [PATCH 57/66] strip Signed-off-by: Stephan Behnke --- loadgen/kitchen_sink_executor_test.go | 1 - scenarios/ebb_and_flow_test.go | 3 +-- scenarios/throughput_stress_test.go | 3 +-- scenarios/versioning_pinned_workflows_test.go | 3 +-- scenarios/workflow_completion_checker_test.go | 3 +-- 5 files changed, 4 insertions(+), 9 deletions(-) diff --git a/loadgen/kitchen_sink_executor_test.go b/loadgen/kitchen_sink_executor_test.go index ef222f14..5865df3a 100644 --- a/loadgen/kitchen_sink_executor_test.go +++ b/loadgen/kitchen_sink_executor_test.go @@ -912,7 +912,6 @@ func testForSDK( scenarioInfo := ScenarioInfo{ ScenarioName: "kitchenSinkTest", RunID: fmt.Sprintf("%s-%d", t.Name(), time.Now().Unix()), - ExecutionID: "test-exec-id", Configuration: RunConfiguration{ Iterations: 1, }, diff --git a/scenarios/ebb_and_flow_test.go b/scenarios/ebb_and_flow_test.go index 31b195b2..2c6aad6f 100644 --- a/scenarios/ebb_and_flow_test.go +++ b/scenarios/ebb_and_flow_test.go @@ -49,8 +49,7 @@ func TestEbbAndFlow(t *testing.T) { }` scenarioInfo := loadgen.ScenarioInfo{ - RunID: fmt.Sprintf("eaf-%d", time.Now().Unix()), - ExecutionID: "test-exec-id", + RunID: fmt.Sprintf("eaf-%d", time.Now().Unix()), Configuration: loadgen.RunConfiguration{ Duration: 10 * time.Second, }, diff --git a/scenarios/throughput_stress_test.go b/scenarios/throughput_stress_test.go index c5dac149..eb3b64cc 100644 --- a/scenarios/throughput_stress_test.go +++ b/scenarios/throughput_stress_test.go @@ -22,8 +22,7 @@ func TestThroughputStress(t *testing.T) { workers.WithNexusEndpoint(taskQueueName)) scenarioInfo := loadgen.ScenarioInfo{ - RunID: runID, - ExecutionID: "test-exec-id", + RunID: runID, Configuration: loadgen.RunConfiguration{ Iterations: 2, }, diff --git a/scenarios/versioning_pinned_workflows_test.go b/scenarios/versioning_pinned_workflows_test.go index d18f058e..02f5f711 100644 --- a/scenarios/versioning_pinned_workflows_test.go +++ b/scenarios/versioning_pinned_workflows_test.go @@ -23,8 +23,7 @@ func TestVersioningPinnedWorkflows(t *testing.T) { workers.WithExecutorTimeout(2*time.Minute)) scenarioInfo := loadgen.ScenarioInfo{ - RunID: runID, - ExecutionID: "test-exec-id", + RunID: runID, Configuration: loadgen.RunConfiguration{ Iterations: 12, // 0 (start) + 11 iterations, will bump versions at 5 and 10 }, diff --git a/scenarios/workflow_completion_checker_test.go b/scenarios/workflow_completion_checker_test.go index 3f2db673..82c37ef3 100644 --- a/scenarios/workflow_completion_checker_test.go +++ b/scenarios/workflow_completion_checker_test.go @@ -22,8 +22,7 @@ func TestWorkflowCompletionChecker(t *testing.T) { workers.WithExecutorTimeout(5*time.Second)) scenarioInfo := loadgen.ScenarioInfo{ - RunID: fmt.Sprintf("stuck-%d", time.Now().Unix()), - ExecutionID: "test-exec-id", + RunID: fmt.Sprintf("stuck-%d", time.Now().Unix()), Configuration: loadgen.RunConfiguration{ Iterations: 10, }, From ecddb3781d73cc74064fff4cbdf988f5639dd8e7 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Wed, 19 Nov 2025 14:14:45 -0800 Subject: [PATCH 58/66] workflow_loop Signed-off-by: Stephan Behnke --- scenarios/fixed_resource_consumption.go | 30 ++- scenarios/fuzzer.go | 79 +++++-- scenarios/fuzzer_example.go | 43 +++- scenarios/scheduler_stress.go | 5 + scenarios/state_transitions_steady.go | 12 +- scenarios/stuck_workflow_test.go | 105 +++++++++ scenarios/workflow_loop.go | 154 +++++++++++++ scenarios/workflow_loop_test.go | 214 ++++++++++++++++++ scenarios/workflow_on_many_task_queues.go | 64 ++++-- scenarios/workflow_with_many_actions.go | 30 ++- .../workflow_with_single_noop_activity.go | 40 +++- workers/java/.classpath | 12 + .../org.eclipse.buildship.core.prefs | 13 ++ .../java/.settings/org.eclipse.jdt.core.prefs | 4 + 14 files changed, 749 insertions(+), 56 deletions(-) create mode 100644 scenarios/stuck_workflow_test.go create mode 100644 scenarios/workflow_loop.go create mode 100644 scenarios/workflow_loop_test.go create mode 100644 workers/java/.classpath create mode 100644 workers/java/.settings/org.eclipse.buildship.core.prefs create mode 100644 workers/java/.settings/org.eclipse.jdt.core.prefs diff --git a/scenarios/fixed_resource_consumption.go b/scenarios/fixed_resource_consumption.go index d6379ddb..c9136056 100644 --- a/scenarios/fixed_resource_consumption.go +++ b/scenarios/fixed_resource_consumption.go @@ -1,6 +1,7 @@ package scenarios import ( + "context" "math" "math/rand" "time" @@ -14,6 +15,23 @@ import ( // This scenario is meant to be adjusted and run manually to evaluate the performance of different // slot provider implementations +type fixedResourceExecutor struct { + *loadgen.KitchenSinkExecutor + completionVerifier *loadgen.WorkflowCompletionVerifier +} + +func (e *fixedResourceExecutor) Run(ctx context.Context, info loadgen.ScenarioInfo) error { + // Create completion verifier + verifier, err := loadgen.NewWorkflowCompletionChecker(ctx, info, 30*time.Second) + if err != nil { + return err + } + e.completionVerifier = verifier + + // Run the kitchen sink executor + return e.KitchenSinkExecutor.Run(ctx, info) +} + func parallelResourcesActions( numConccurrent int, bytesToAlloc int, @@ -63,7 +81,8 @@ func init() { loadgen.MustRegisterScenario(loadgen.Scenario{ Description: "Used for testing slot provider performance. Runs activities that consume certain amounts of resources.", ExecutorFn: func() loadgen.Executor { - return &loadgen.KitchenSinkExecutor{ + return &fixedResourceExecutor{ + KitchenSinkExecutor: &loadgen.KitchenSinkExecutor{ TestInput: &kitchensink.TestInput{ WorkflowInput: &kitchensink.WorkflowInput{ InitialActions: []*kitchensink.ActionSet{ @@ -123,7 +142,16 @@ func init() { }, }, }, + }, + } + }, + VerifyFn: func(ctx context.Context, info loadgen.ScenarioInfo, executor loadgen.Executor) []error { + e := executor.(*fixedResourceExecutor) + if e.completionVerifier == nil { + return nil } + state := e.KitchenSinkExecutor.GetState() + return e.completionVerifier.VerifyRun(ctx, info, state) }, }) } diff --git a/scenarios/fuzzer.go b/scenarios/fuzzer.go index af62408a..4f7314b4 100644 --- a/scenarios/fuzzer.go +++ b/scenarios/fuzzer.go @@ -2,43 +2,74 @@ package scenarios import ( "context" + "time" "github.com/temporalio/omes/loadgen" ) +type fuzzerExecutor struct { + fuzzExecutor loadgen.FuzzExecutor + completionVerifier *loadgen.WorkflowCompletionVerifier +} + +func (e *fuzzerExecutor) Run(ctx context.Context, info loadgen.ScenarioInfo) error { + // Create completion verifier + verifier, err := loadgen.NewWorkflowCompletionChecker(ctx, info, 30*time.Second) + if err != nil { + return err + } + e.completionVerifier = verifier + + // Run the fuzz executor + return e.fuzzExecutor.Run(ctx, info) +} + func init() { loadgen.MustRegisterScenario(loadgen.Scenario{ Description: "This scenario uses the kitchen sink input generation tool to run fuzzy" + " workflows", ExecutorFn: func() loadgen.Executor { - return loadgen.FuzzExecutor{ - InitInputs: func(ctx context.Context, info loadgen.ScenarioInfo) loadgen.FileOrArgs { - fPath, ok := info.ScenarioOptions["input-file"] - if ok && fPath != "" { - return loadgen.FileOrArgs{ - FilePath: fPath, + return &fuzzerExecutor{ + fuzzExecutor: loadgen.FuzzExecutor{ + InitInputs: func(ctx context.Context, info loadgen.ScenarioInfo) loadgen.FileOrArgs { + fPath, ok := info.ScenarioOptions["input-file"] + if ok && fPath != "" { + return loadgen.FileOrArgs{ + FilePath: fPath, + } } - } - args := []string{"generate"} - seed, ok := info.ScenarioOptions["seed"] - if ok && seed != "" { - args = append(args, "--explicit-seed", seed) - } - config, ok := info.ScenarioOptions["config"] - if ok && config != "" { - args = append(args, "--generator-config-override", config) - } - _, ok = info.ScenarioOptions["no-output-file"] - if !ok { - args = append(args, "--output-path", "last_fuzz_run.proto") - } - return loadgen.FileOrArgs{ - Args: args, - } + args := []string{"generate"} + seed, ok := info.ScenarioOptions["seed"] + if ok && seed != "" { + args = append(args, "--explicit-seed", seed) + } + config, ok := info.ScenarioOptions["config"] + if ok && config != "" { + args = append(args, "--generator-config-override", config) + } + _, ok = info.ScenarioOptions["no-output-file"] + if !ok { + args = append(args, "--output-path", "last_fuzz_run.proto") + } + return loadgen.FileOrArgs{ + Args: args, + } + }, + DefaultConfiguration: loadgen.RunConfiguration{}, }, - DefaultConfiguration: loadgen.RunConfiguration{}, } }, + VerifyFn: func(ctx context.Context, info loadgen.ScenarioInfo, executor loadgen.Executor) []error { + e := executor.(*fuzzerExecutor) + if e.completionVerifier == nil { + return nil + } + // Get state from the embedded generic executor (FuzzExecutor creates one internally) + state := loadgen.ExecutorState{ + CompletedIterations: info.Configuration.Iterations, + } + return e.completionVerifier.VerifyRun(ctx, info, state) + }, }) } diff --git a/scenarios/fuzzer_example.go b/scenarios/fuzzer_example.go index 26fb2dc3..0d6715c7 100644 --- a/scenarios/fuzzer_example.go +++ b/scenarios/fuzzer_example.go @@ -2,23 +2,54 @@ package scenarios import ( "context" + "time" "github.com/temporalio/omes/loadgen" ) +type fuzzerExampleExecutor struct { + fuzzExecutor loadgen.FuzzExecutor + completionVerifier *loadgen.WorkflowCompletionVerifier +} + +func (e *fuzzerExampleExecutor) Run(ctx context.Context, info loadgen.ScenarioInfo) error { + // Create completion verifier + verifier, err := loadgen.NewWorkflowCompletionChecker(ctx, info, 30*time.Second) + if err != nil { + return err + } + e.completionVerifier = verifier + + // Run the fuzz executor + return e.fuzzExecutor.Run(ctx, info) +} + func init() { loadgen.MustRegisterScenario(loadgen.Scenario{ Description: "This scenario runs the kitchen sink input generation tool `example` " + "command to help with basic verification of KS implementations.", ExecutorFn: func() loadgen.Executor { - return loadgen.FuzzExecutor{ - InitInputs: func(ctx context.Context, info loadgen.ScenarioInfo) loadgen.FileOrArgs { - return loadgen.FileOrArgs{ - Args: []string{"example"}, - } + return &fuzzerExampleExecutor{ + fuzzExecutor: loadgen.FuzzExecutor{ + InitInputs: func(ctx context.Context, info loadgen.ScenarioInfo) loadgen.FileOrArgs { + return loadgen.FileOrArgs{ + Args: []string{"example"}, + } + }, + DefaultConfiguration: loadgen.RunConfiguration{}, }, - DefaultConfiguration: loadgen.RunConfiguration{}, } }, + VerifyFn: func(ctx context.Context, info loadgen.ScenarioInfo, executor loadgen.Executor) []error { + e := executor.(*fuzzerExampleExecutor) + if e.completionVerifier == nil { + return nil + } + // Get state from the embedded generic executor (FuzzExecutor creates one internally) + state := loadgen.ExecutorState{ + CompletedIterations: info.Configuration.Iterations, + } + return e.completionVerifier.VerifyRun(ctx, info, state) + }, }) } diff --git a/scenarios/scheduler_stress.go b/scenarios/scheduler_stress.go index d040d509..9fc28220 100644 --- a/scenarios/scheduler_stress.go +++ b/scenarios/scheduler_stress.go @@ -43,6 +43,11 @@ func init() { }, } }, + VerifyFn: func(ctx context.Context, info loadgen.ScenarioInfo, executor loadgen.Executor) []error { + // Scheduler stress scenario manages its own lifecycle and cleanup + // No additional verification needed beyond what happens during execution + return nil + }, }) } diff --git a/scenarios/state_transitions_steady.go b/scenarios/state_transitions_steady.go index 167343da..ac0573fe 100644 --- a/scenarios/state_transitions_steady.go +++ b/scenarios/state_transitions_steady.go @@ -16,7 +16,8 @@ type steadyStateConfig struct { type stateTransitionsSteadyExecutor struct { loadgen.ScenarioInfo - config *steadyStateConfig + config *steadyStateConfig + completionVerifier *loadgen.WorkflowCompletionVerifier } func init() { @@ -28,6 +29,14 @@ func init() { ExecutorFn: func() loadgen.Executor { return &stateTransitionsSteadyExecutor{} }, + VerifyFn: func(ctx context.Context, info loadgen.ScenarioInfo, executor loadgen.Executor) []error { + e := executor.(*stateTransitionsSteadyExecutor) + if e.completionVerifier == nil { + return nil + } + // For state transitions steady, we just need to verify no running workflows + return []error{e.completionVerifier.VerifyNoRunningWorkflows(ctx)} + }, }) } @@ -80,6 +89,7 @@ func (s *stateTransitionsSteadyExecutor) run(ctx context.Context) error { if err != nil { return fmt.Errorf("failed to create workflow completion checker: %w", err) } + s.completionVerifier = completionChecker // Execute initial workflow and get the transition count workflowParams := &kitchensink.WorkflowInput{ diff --git a/scenarios/stuck_workflow_test.go b/scenarios/stuck_workflow_test.go new file mode 100644 index 00000000..7f43cc30 --- /dev/null +++ b/scenarios/stuck_workflow_test.go @@ -0,0 +1,105 @@ +package scenarios + +import ( + "fmt" + "testing" + "time" + + "github.com/stretchr/testify/require" + "github.com/temporalio/omes/cmd/clioptions" + "github.com/temporalio/omes/loadgen" + "github.com/temporalio/omes/workers" +) + +// TestStuckWorkflowScenario verifies that the stuck_workflow scenario correctly detects +// stuck workflows through its VerifyFn implementation. +func TestStuckWorkflowScenario(t *testing.T) { + t.Parallel() + + env := workers.SetupTestEnvironment(t, + workers.WithExecutorTimeout(5*time.Second)) + + scenarioInfo := loadgen.ScenarioInfo{ + RunID: fmt.Sprintf("stuck-test-%d", time.Now().Unix()), + Configuration: loadgen.RunConfiguration{ + Iterations: 10, + }, + } + + // Get the stuck_workflow scenario + scenario := loadgen.GetScenario("stuck_workflow") + require.NotNil(t, scenario, "stuck_workflow scenario should be registered") + require.NotNil(t, scenario.VerifyFn, "stuck_workflow scenario should have a VerifyFn") + + executor := scenario.ExecutorFn() + + // RunExecutorTest will automatically run verification since the scenario has a VerifyFn + _, err := env.RunExecutorTest(t, executor, scenarioInfo, clioptions.LangGo) + require.Error(t, err, "should fail due to stuck workflow detected by verification") + require.Contains(t, err.Error(), "deadline exceeded", "should report deadline exceeded for stuck iteration") +} + +// TestStuckWorkflowVerifyFnDetectsStuckWorkflow tests that the VerifyFn properly identifies +// stuck workflows after execution. +func TestStuckWorkflowVerifyFnDetectsStuckWorkflow(t *testing.T) { + t.Parallel() + + env := workers.SetupTestEnvironment(t, + workers.WithExecutorTimeout(5*time.Second)) + + scenarioInfo := loadgen.ScenarioInfo{ + RunID: fmt.Sprintf("stuck-verify-%d", time.Now().Unix()), + Configuration: loadgen.RunConfiguration{ + Iterations: 5, + }, + } + + // Get the stuck_workflow scenario + scenario := loadgen.GetScenario("stuck_workflow") + require.NotNil(t, scenario, "stuck_workflow scenario should be registered") + + executor := scenario.ExecutorFn() + + // Run the executor and expect it to fail + _, err := env.RunExecutorTest(t, executor, scenarioInfo, clioptions.LangGo) + require.Error(t, err, "should fail due to verification detecting stuck workflow") + + // The error should indicate a non-completed workflow was detected + require.Contains(t, err.Error(), "non-completed workflow", "verification should report stuck workflow") +} + +// TestStuckWorkflowScenarioIterationBehavior tests that the stuck_workflow scenario +// behaves correctly across multiple iterations: +// - Iteration 1: blocks forever (stuck workflow) +// - Even iterations: use Continue-As-New +// - Odd iterations (except 1): complete normally +func TestStuckWorkflowScenarioIterationBehavior(t *testing.T) { + t.Parallel() + + env := workers.SetupTestEnvironment(t, + workers.WithExecutorTimeout(5*time.Second)) + + scenarioInfo := loadgen.ScenarioInfo{ + RunID: fmt.Sprintf("stuck-behavior-%d", time.Now().Unix()), + Configuration: loadgen.RunConfiguration{ + Iterations: 7, // Test iterations 1-7 + }, + } + + // Get the stuck_workflow scenario + scenario := loadgen.GetScenario("stuck_workflow") + require.NotNil(t, scenario, "stuck_workflow scenario should be registered") + + executor := scenario.ExecutorFn() + + // RunExecutorTest will fail because iteration 1 will be stuck + _, err := env.RunExecutorTest(t, executor, scenarioInfo, clioptions.LangGo) + require.Error(t, err, "should fail due to stuck workflow on iteration 1") + + // Verify the executor state shows 6 completed iterations (all except iteration 1) + resumable, ok := executor.(loadgen.Resumable) + require.True(t, ok, "executor should implement Resumable interface") + execState := resumable.Snapshot().(loadgen.ExecutorState) + require.Equal(t, 6, execState.CompletedIterations, + "should complete 6 iterations (iterations 2-7, skipping stuck iteration 1)") +} diff --git a/scenarios/workflow_loop.go b/scenarios/workflow_loop.go new file mode 100644 index 00000000..3010e408 --- /dev/null +++ b/scenarios/workflow_loop.go @@ -0,0 +1,154 @@ +package scenarios + +import ( + "context" + "fmt" + "time" + + "github.com/temporalio/omes/loadgen" + "github.com/temporalio/omes/loadgen/kitchensink" +) + +const ( + // ActivityCountFlag controls the number of activities to execute sequentially + ActivityCountFlag = "activity-count" + // UseUpdateFlag controls whether to use update instead of signal (default: false, use signal) + UseUpdateFlag = "use-update" +) + +type workflowLoopExecutor struct { + *loadgen.KitchenSinkExecutor + completionVerifier *loadgen.WorkflowCompletionVerifier +} + +func (e *workflowLoopExecutor) Run(ctx context.Context, info loadgen.ScenarioInfo) error { + // Create completion verifier + verifier, err := loadgen.NewWorkflowCompletionChecker(ctx, info, 30*time.Second) + if err != nil { + return err + } + e.completionVerifier = verifier + + // Run the kitchen sink executor + return e.KitchenSinkExecutor.Run(ctx, info) +} + +func init() { + loadgen.MustRegisterScenario(loadgen.Scenario{ + Description: fmt.Sprintf("Creates n activities sequentially, each activity sends one signal or update back to the workflow. "+ + "The workflow waits for each signal/update before proceeding. "+ + "Use --option %s= to set the activity count (default: 1). "+ + "Use --option %s=true to use updates instead of signals (default: false).", + ActivityCountFlag, UseUpdateFlag), + ExecutorFn: func() loadgen.Executor { + return &workflowLoopExecutor{ + KitchenSinkExecutor: &loadgen.KitchenSinkExecutor{ + TestInput: &kitchensink.TestInput{ + WorkflowInput: &kitchensink.WorkflowInput{}, + }, + PrepareTestInput: func(ctx context.Context, info loadgen.ScenarioInfo, params *kitchensink.TestInput) error { + activityCount := info.ScenarioOptionInt(ActivityCountFlag, 1) + if activityCount <= 0 { + return fmt.Errorf("%s must be positive, got %d", ActivityCountFlag, activityCount) + } + + useUpdate := info.ScenarioOptions[UseUpdateFlag] == "true" + mechanism := "signal" + if useUpdate { + mechanism = "update" + } + info.Logger.Infof("Preparing workflow loop with %d activities using %s", activityCount, mechanism) + + // Create actions for the workflow + var actions []*kitchensink.Action + + // Use a single state variable "loop-index" that tracks the current index + // This ensures signals/updates are processed consecutively in order + const stateKey = "loop-index" + + // For each iteration, create a sequential action that: + // 1. Executes an activity that sends a signal or update back to the workflow + // 2. Waits for the workflow state to be set to the current index by that signal/update + for i := 0; i < activityCount; i++ { + stateValue := fmt.Sprintf("%d", i) + + // Create the activity that sends signal/update to set the workflow state to the current index + var clientAction *kitchensink.ClientAction + if useUpdate { + // Use update + clientAction = &kitchensink.ClientAction{ + Variant: &kitchensink.ClientAction_DoUpdate{ + DoUpdate: &kitchensink.DoUpdate{ + Variant: &kitchensink.DoUpdate_DoActions{ + DoActions: &kitchensink.DoActionsUpdate{ + Variant: &kitchensink.DoActionsUpdate_DoActions{ + DoActions: &kitchensink.ActionSet{ + Actions: []*kitchensink.Action{ + kitchensink.NewSetWorkflowStateAction(stateKey, stateValue), + }, + }, + }, + }, + }, + }, + }, + } + } else { + // Use signal + clientAction = &kitchensink.ClientAction{ + Variant: &kitchensink.ClientAction_DoSignal{ + DoSignal: &kitchensink.DoSignal{ + Variant: &kitchensink.DoSignal_DoSignalActions_{ + DoSignalActions: &kitchensink.DoSignal_DoSignalActions{ + Variant: &kitchensink.DoSignal_DoSignalActions_DoActions{ + DoActions: &kitchensink.ActionSet{ + Actions: []*kitchensink.Action{ + kitchensink.NewSetWorkflowStateAction(stateKey, stateValue), + }, + }, + }, + }, + }, + }, + }, + } + } + + // Execute an activity that performs the client action (sends signal/update) + // This activity will use the Temporal client to send the signal/update back to its parent workflow + actions = append(actions, kitchensink.ClientActivity( + kitchensink.ClientActions(clientAction), + kitchensink.DefaultRemoteActivity, + )) + + // Wait for the workflow state to be set to the current index by the signal/update + // This ensures signals/updates are processed consecutively in order (0, 1, 2, ...) + actions = append(actions, kitchensink.NewAwaitWorkflowStateAction(stateKey, stateValue)) + } + + // Add final return action + actions = append(actions, kitchensink.NewEmptyReturnResultAction()) + + // Set the actions as sequential (not concurrent) + params.WorkflowInput.InitialActions = []*kitchensink.ActionSet{ + { + Actions: actions, + Concurrent: false, + }, + } + + return nil + }, + }, + } + }, + VerifyFn: func(ctx context.Context, info loadgen.ScenarioInfo, executor loadgen.Executor) []error { + e := executor.(*workflowLoopExecutor) + if e.completionVerifier == nil { + return nil + } + state := e.KitchenSinkExecutor.GetState() + return e.completionVerifier.VerifyRun(ctx, info, state) + }, + }) +} diff --git a/scenarios/workflow_loop_test.go b/scenarios/workflow_loop_test.go new file mode 100644 index 00000000..3cf5767a --- /dev/null +++ b/scenarios/workflow_loop_test.go @@ -0,0 +1,214 @@ +package scenarios + +import ( + "fmt" + "testing" + "time" + + "github.com/stretchr/testify/require" + "github.com/temporalio/omes/cmd/clioptions" + "github.com/temporalio/omes/loadgen" + "github.com/temporalio/omes/workers" +) + +// TestWorkflowLoopScenario tests the workflow_loop scenario with default settings (1 activity). +func TestWorkflowLoopScenario(t *testing.T) { + t.Parallel() + + env := workers.SetupTestEnvironment(t, + workers.WithExecutorTimeout(60*time.Second)) + + scenarioInfo := loadgen.ScenarioInfo{ + RunID: fmt.Sprintf("loop-%d", time.Now().Unix()), + Configuration: loadgen.RunConfiguration{ + Iterations: 3, + }, + ScenarioOptions: map[string]string{ + ActivityCountFlag: "1", + }, + } + + // Get the workflow_loop scenario + scenario := loadgen.GetScenario("workflow_loop") + require.NotNil(t, scenario, "workflow_loop scenario should be registered") + require.NotNil(t, scenario.VerifyFn, "workflow_loop scenario should have a VerifyFn") + + executor := scenario.ExecutorFn() + + // Run the executor + _, err := env.RunExecutorTest(t, executor, scenarioInfo, clioptions.LangGo) + require.NoError(t, err, "executor should complete successfully") + + // Verify the executor state + resumable, ok := executor.(loadgen.Resumable) + require.True(t, ok, "executor should implement Resumable interface") + execState := resumable.Snapshot().(loadgen.ExecutorState) + require.Equal(t, 3, execState.CompletedIterations, "should complete 3 iterations") +} + +// TestWorkflowLoopScenarioMultipleActivities tests the workflow_loop scenario with multiple activities. +func TestWorkflowLoopScenarioMultipleActivities(t *testing.T) { + t.Parallel() + + env := workers.SetupTestEnvironment(t, + workers.WithExecutorTimeout(30*time.Second)) + + scenarioInfo := loadgen.ScenarioInfo{ + RunID: fmt.Sprintf("loop-multi-%d", time.Now().Unix()), + Configuration: loadgen.RunConfiguration{ + Iterations: 2, + }, + ScenarioOptions: map[string]string{ + ActivityCountFlag: "5", + }, + } + + // Get the workflow_loop scenario + scenario := loadgen.GetScenario("workflow_loop") + require.NotNil(t, scenario, "workflow_loop scenario should be registered") + + executor := scenario.ExecutorFn() + + // Run the executor + _, err := env.RunExecutorTest(t, executor, scenarioInfo, clioptions.LangGo) + require.NoError(t, err, "executor should complete successfully with 5 activities") + + // Verify the executor state + resumable, ok := executor.(loadgen.Resumable) + require.True(t, ok, "executor should implement Resumable interface") + execState := resumable.Snapshot().(loadgen.ExecutorState) + require.Equal(t, 2, execState.CompletedIterations, "should complete 2 iterations") +} + +// TestWorkflowLoopScenarioVerification tests that the VerifyFn properly validates workflow completion. +func TestWorkflowLoopScenarioVerification(t *testing.T) { + t.Parallel() + + env := workers.SetupTestEnvironment(t, + workers.WithExecutorTimeout(30*time.Second)) + + scenarioInfo := loadgen.ScenarioInfo{ + RunID: fmt.Sprintf("loop-verify-%d", time.Now().Unix()), + Configuration: loadgen.RunConfiguration{ + Iterations: 5, + }, + ScenarioOptions: map[string]string{ + ActivityCountFlag: "3", + }, + } + + // Get the workflow_loop scenario + scenario := loadgen.GetScenario("workflow_loop") + require.NotNil(t, scenario, "workflow_loop scenario should be registered") + + executor := scenario.ExecutorFn() + + // Run the executor and verification + _, err := env.RunExecutorTest(t, executor, scenarioInfo, clioptions.LangGo) + require.NoError(t, err, "should complete successfully and pass verification") + + // Verify the executor state shows correct number of completed iterations + resumable, ok := executor.(loadgen.Resumable) + require.True(t, ok, "executor should implement Resumable interface") + execState := resumable.Snapshot().(loadgen.ExecutorState) + require.Equal(t, 5, execState.CompletedIterations, "should complete all 5 iterations") +} + +// TestWorkflowLoopScenarioInvalidConfig tests that invalid configuration is rejected. +func TestWorkflowLoopScenarioInvalidConfig(t *testing.T) { + t.Parallel() + + env := workers.SetupTestEnvironment(t, + workers.WithExecutorTimeout(10*time.Second)) + + scenarioInfo := loadgen.ScenarioInfo{ + RunID: fmt.Sprintf("loop-invalid-%d", time.Now().Unix()), + Configuration: loadgen.RunConfiguration{ + Iterations: 1, + }, + ScenarioOptions: map[string]string{ + ActivityCountFlag: "0", // Invalid: must be positive + }, + } + + // Get the workflow_loop scenario + scenario := loadgen.GetScenario("workflow_loop") + require.NotNil(t, scenario, "workflow_loop scenario should be registered") + + executor := scenario.ExecutorFn() + + // Run the executor - should fail due to invalid configuration + _, err := env.RunExecutorTest(t, executor, scenarioInfo, clioptions.LangGo) + require.Error(t, err, "should fail with invalid activity count") + require.Contains(t, err.Error(), "must be positive", "error should mention positive requirement") +} + +// TestWorkflowLoopScenarioWithUpdates tests the workflow_loop scenario using updates instead of signals. +func TestWorkflowLoopScenarioWithUpdates(t *testing.T) { + t.Parallel() + + env := workers.SetupTestEnvironment(t, + workers.WithExecutorTimeout(30*time.Second)) + + scenarioInfo := loadgen.ScenarioInfo{ + RunID: fmt.Sprintf("loop-update-%d", time.Now().Unix()), + Configuration: loadgen.RunConfiguration{ + Iterations: 2, + }, + ScenarioOptions: map[string]string{ + ActivityCountFlag: "3", + UseUpdateFlag: "true", // Use updates instead of signals + }, + } + + // Get the workflow_loop scenario + scenario := loadgen.GetScenario("workflow_loop") + require.NotNil(t, scenario, "workflow_loop scenario should be registered") + + executor := scenario.ExecutorFn() + + // Run the executor + _, err := env.RunExecutorTest(t, executor, scenarioInfo, clioptions.LangGo) + require.NoError(t, err, "executor should complete successfully with updates") + + // Verify the executor state + resumable, ok := executor.(loadgen.Resumable) + require.True(t, ok, "executor should implement Resumable interface") + execState := resumable.Snapshot().(loadgen.ExecutorState) + require.Equal(t, 2, execState.CompletedIterations, "should complete 2 iterations using updates") +} + +// TestWorkflowLoopScenarioWithUpdatesMultipleIterations tests updates with more iterations. +func TestWorkflowLoopScenarioWithUpdatesMultipleIterations(t *testing.T) { + t.Parallel() + + env := workers.SetupTestEnvironment(t, + workers.WithExecutorTimeout(30*time.Second)) + + scenarioInfo := loadgen.ScenarioInfo{ + RunID: fmt.Sprintf("loop-update-multi-%d", time.Now().Unix()), + Configuration: loadgen.RunConfiguration{ + Iterations: 4, + }, + ScenarioOptions: map[string]string{ + ActivityCountFlag: "2", + UseUpdateFlag: "true", + }, + } + + // Get the workflow_loop scenario + scenario := loadgen.GetScenario("workflow_loop") + require.NotNil(t, scenario, "workflow_loop scenario should be registered") + + executor := scenario.ExecutorFn() + + // Run the executor + _, err := env.RunExecutorTest(t, executor, scenarioInfo, clioptions.LangGo) + require.NoError(t, err, "executor should complete successfully with multiple iterations using updates") + + // Verify the executor state + resumable, ok := executor.(loadgen.Resumable) + require.True(t, ok, "executor should implement Resumable interface") + execState := resumable.Snapshot().(loadgen.ExecutorState) + require.Equal(t, 4, execState.CompletedIterations, "should complete 4 iterations") +} diff --git a/scenarios/workflow_on_many_task_queues.go b/scenarios/workflow_on_many_task_queues.go index a4b7a96a..dbd786f3 100644 --- a/scenarios/workflow_on_many_task_queues.go +++ b/scenarios/workflow_on_many_task_queues.go @@ -3,39 +3,67 @@ package scenarios import ( "context" "fmt" + "time" "github.com/temporalio/omes/loadgen" "github.com/temporalio/omes/loadgen/kitchensink" ) +type manyTaskQueuesExecutor struct { + *loadgen.KitchenSinkExecutor + completionVerifier *loadgen.WorkflowCompletionVerifier +} + +func (e *manyTaskQueuesExecutor) Run(ctx context.Context, info loadgen.ScenarioInfo) error { + // Create completion verifier + verifier, err := loadgen.NewWorkflowCompletionChecker(ctx, info, 30*time.Second) + if err != nil { + return err + } + e.completionVerifier = verifier + + // Run the kitchen sink executor + return e.KitchenSinkExecutor.Run(ctx, info) +} + func init() { loadgen.MustRegisterScenario(loadgen.Scenario{ Description: "Each iteration executes a single workflow on one of the task queues. " + "Workers must be started with --task-queue-suffix-index-end as one less than task queue count here. " + "Additional options: task-queue-count (required).", ExecutorFn: func() loadgen.Executor { - return &loadgen.KitchenSinkExecutor{ - TestInput: &kitchensink.TestInput{ - WorkflowInput: &kitchensink.WorkflowInput{ - InitialActions: []*kitchensink.ActionSet{ - kitchensink.NoOpSingleActivityActionSet(), + return &manyTaskQueuesExecutor{ + KitchenSinkExecutor: &loadgen.KitchenSinkExecutor{ + TestInput: &kitchensink.TestInput{ + WorkflowInput: &kitchensink.WorkflowInput{ + InitialActions: []*kitchensink.ActionSet{ + kitchensink.NoOpSingleActivityActionSet(), + }, }, }, + PrepareTestInput: func(ctx context.Context, opts loadgen.ScenarioInfo, params *kitchensink.TestInput) error { + // Require task queue count + if opts.ScenarioOptionInt("task-queue-count", 0) == 0 { + return fmt.Errorf("task-queue-count option required") + } + return nil + }, + UpdateWorkflowOptions: func(ctx context.Context, run *loadgen.Run, options *loadgen.KitchenSinkWorkflowOptions) error { + // Add suffix to the task queue based on modulus of iteration + options.StartOptions.TaskQueue += + fmt.Sprintf("-%v", run.Iteration%run.ScenarioInfo.ScenarioOptionInt("task-queue-count", 0)) + return nil + }, }, - PrepareTestInput: func(ctx context.Context, opts loadgen.ScenarioInfo, params *kitchensink.TestInput) error { - // Require task queue count - if opts.ScenarioOptionInt("task-queue-count", 0) == 0 { - return fmt.Errorf("task-queue-count option required") - } - return nil - }, - UpdateWorkflowOptions: func(ctx context.Context, run *loadgen.Run, options *loadgen.KitchenSinkWorkflowOptions) error { - // Add suffix to the task queue based on modulus of iteration - options.StartOptions.TaskQueue += - fmt.Sprintf("-%v", run.Iteration%run.ScenarioInfo.ScenarioOptionInt("task-queue-count", 0)) - return nil - }, } }, + VerifyFn: func(ctx context.Context, info loadgen.ScenarioInfo, executor loadgen.Executor) []error { + e := executor.(*manyTaskQueuesExecutor) + if e.completionVerifier == nil { + return nil + } + state := e.KitchenSinkExecutor.GetState() + return e.completionVerifier.VerifyRun(ctx, info, state) + }, }) } diff --git a/scenarios/workflow_with_many_actions.go b/scenarios/workflow_with_many_actions.go index c7d2a362..d142f281 100644 --- a/scenarios/workflow_with_many_actions.go +++ b/scenarios/workflow_with_many_actions.go @@ -3,6 +3,7 @@ package scenarios import ( "context" "strconv" + "time" "go.temporal.io/api/common/v1" "go.temporal.io/sdk/converter" @@ -12,12 +13,30 @@ import ( "github.com/temporalio/omes/loadgen/kitchensink" ) +type manyActionsExecutor struct { + *loadgen.KitchenSinkExecutor + completionVerifier *loadgen.WorkflowCompletionVerifier +} + +func (e *manyActionsExecutor) Run(ctx context.Context, info loadgen.ScenarioInfo) error { + // Create completion verifier + verifier, err := loadgen.NewWorkflowCompletionChecker(ctx, info, 30*time.Second) + if err != nil { + return err + } + e.completionVerifier = verifier + + // Run the kitchen sink executor + return e.KitchenSinkExecutor.Run(ctx, info) +} + func init() { loadgen.MustRegisterScenario(loadgen.Scenario{ Description: "Each iteration executes a single workflow with a number of child workflows and/or activities. " + "Additional options: children-per-workflow (default 30), activities-per-workflow (default 30).", ExecutorFn: func() loadgen.Executor { - return &loadgen.KitchenSinkExecutor{ + return &manyActionsExecutor{ + KitchenSinkExecutor: &loadgen.KitchenSinkExecutor{ TestInput: &kitchensink.TestInput{ WorkflowInput: &kitchensink.WorkflowInput{ InitialActions: []*kitchensink.ActionSet{}, @@ -86,7 +105,16 @@ func init() { ) return nil }, + }, + } + }, + VerifyFn: func(ctx context.Context, info loadgen.ScenarioInfo, executor loadgen.Executor) []error { + e := executor.(*manyActionsExecutor) + if e.completionVerifier == nil { + return nil } + state := e.KitchenSinkExecutor.GetState() + return e.completionVerifier.VerifyRun(ctx, info, state) }, }) } diff --git a/scenarios/workflow_with_single_noop_activity.go b/scenarios/workflow_with_single_noop_activity.go index 2fb81c0a..fb6095d8 100644 --- a/scenarios/workflow_with_single_noop_activity.go +++ b/scenarios/workflow_with_single_noop_activity.go @@ -1,23 +1,53 @@ package scenarios import ( + "context" + "time" + "github.com/temporalio/omes/loadgen" "github.com/temporalio/omes/loadgen/kitchensink" ) +type noopActivityExecutor struct { + *loadgen.KitchenSinkExecutor + completionVerifier *loadgen.WorkflowCompletionVerifier +} + +func (e *noopActivityExecutor) Run(ctx context.Context, info loadgen.ScenarioInfo) error { + // Create completion verifier + verifier, err := loadgen.NewWorkflowCompletionChecker(ctx, info, 30*time.Second) + if err != nil { + return err + } + e.completionVerifier = verifier + + // Run the kitchen sink executor + return e.KitchenSinkExecutor.Run(ctx, info) +} + func init() { loadgen.MustRegisterScenario(loadgen.Scenario{ Description: "Each iteration executes a single workflow with a noop activity.", ExecutorFn: func() loadgen.Executor { - return &loadgen.KitchenSinkExecutor{ - TestInput: &kitchensink.TestInput{ - WorkflowInput: &kitchensink.WorkflowInput{ - InitialActions: []*kitchensink.ActionSet{ - kitchensink.NoOpSingleActivityActionSet(), + return &noopActivityExecutor{ + KitchenSinkExecutor: &loadgen.KitchenSinkExecutor{ + TestInput: &kitchensink.TestInput{ + WorkflowInput: &kitchensink.WorkflowInput{ + InitialActions: []*kitchensink.ActionSet{ + kitchensink.NoOpSingleActivityActionSet(), + }, }, }, }, } }, + VerifyFn: func(ctx context.Context, info loadgen.ScenarioInfo, executor loadgen.Executor) []error { + e := executor.(*noopActivityExecutor) + if e.completionVerifier == nil { + return nil + } + state := e.KitchenSinkExecutor.GetState() + return e.completionVerifier.VerifyRun(ctx, info, state) + }, }) } diff --git a/workers/java/.classpath b/workers/java/.classpath new file mode 100644 index 00000000..9f95f67b --- /dev/null +++ b/workers/java/.classpath @@ -0,0 +1,12 @@ + + + + + + + + + + + + diff --git a/workers/java/.settings/org.eclipse.buildship.core.prefs b/workers/java/.settings/org.eclipse.buildship.core.prefs new file mode 100644 index 00000000..b78a8101 --- /dev/null +++ b/workers/java/.settings/org.eclipse.buildship.core.prefs @@ -0,0 +1,13 @@ +arguments=--init-script /var/folders/4w/5qdjw8sd6417nldg5pvhs_rr0000gn/T/db3b08fc4a9ef609cb16b96b200fa13e563f396e9bb1ed0905fdab7bc3bc513b.gradle +auto.sync=false +build.scans.enabled=false +connection.gradle.distribution=GRADLE_DISTRIBUTION(WRAPPER) +connection.project.dir= +eclipse.preferences.version=1 +gradle.user.home= +java.home=/Users/stephan/.local/share/mise/installs/java/21.0.2 +jvm.arguments= +offline.mode=false +override.workspace.settings=true +show.console.view=true +show.executions.view=true diff --git a/workers/java/.settings/org.eclipse.jdt.core.prefs b/workers/java/.settings/org.eclipse.jdt.core.prefs new file mode 100644 index 00000000..ee4d5dd0 --- /dev/null +++ b/workers/java/.settings/org.eclipse.jdt.core.prefs @@ -0,0 +1,4 @@ +eclipse.preferences.version=1 +org.eclipse.jdt.core.compiler.codegen.targetPlatform=10 +org.eclipse.jdt.core.compiler.compliance=10 +org.eclipse.jdt.core.compiler.source=10 From 114cf497a33af7a2e649c26e4074114d34240d46 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Wed, 19 Nov 2025 15:40:26 -0800 Subject: [PATCH 59/66] wip Signed-off-by: Stephan Behnke --- scenarios/workflow_loop.go | 39 ++++++++++++++++++++++----------- scenarios/workflow_loop_test.go | 39 +++++++++++++++++++++++++++++++-- 2 files changed, 63 insertions(+), 15 deletions(-) diff --git a/scenarios/workflow_loop.go b/scenarios/workflow_loop.go index 3010e408..bad33b7b 100644 --- a/scenarios/workflow_loop.go +++ b/scenarios/workflow_loop.go @@ -3,6 +3,7 @@ package scenarios import ( "context" "fmt" + "math/rand" "time" "github.com/temporalio/omes/loadgen" @@ -12,8 +13,8 @@ import ( const ( // ActivityCountFlag controls the number of activities to execute sequentially ActivityCountFlag = "activity-count" - // UseUpdateFlag controls whether to use update instead of signal (default: false, use signal) - UseUpdateFlag = "use-update" + // MessageViaFlag controls whether to use signal, update, or random (default: "signal") + MessageViaFlag = "message-via" ) type workflowLoopExecutor struct { @@ -35,11 +36,11 @@ func (e *workflowLoopExecutor) Run(ctx context.Context, info loadgen.ScenarioInf func init() { loadgen.MustRegisterScenario(loadgen.Scenario{ - Description: fmt.Sprintf("Creates n activities sequentially, each activity sends one signal or update back to the workflow. "+ + Description: fmt.Sprintf("Creates n activities sequentially, each sends one signal or update back to the workflow. "+ "The workflow waits for each signal/update before proceeding. "+ - "Use --option %s= to set the activity count (default: 1). "+ - "Use --option %s=true to use updates instead of signals (default: false).", - ActivityCountFlag, UseUpdateFlag), + "Use --option %s= to set the count (default: 1). "+ + "Use --option %s= to choose mechanism (default: signal).", + ActivityCountFlag, MessageViaFlag), ExecutorFn: func() loadgen.Executor { return &workflowLoopExecutor{ KitchenSinkExecutor: &loadgen.KitchenSinkExecutor{ @@ -52,12 +53,15 @@ func init() { return fmt.Errorf("%s must be positive, got %d", ActivityCountFlag, activityCount) } - useUpdate := info.ScenarioOptions[UseUpdateFlag] == "true" - mechanism := "signal" - if useUpdate { - mechanism = "update" + messageVia := info.ScenarioOptions[MessageViaFlag] + if messageVia == "" { + messageVia = "signal" } - info.Logger.Infof("Preparing workflow loop with %d activities using %s", activityCount, mechanism) + if messageVia != "signal" && messageVia != "update" && messageVia != "random" { + return fmt.Errorf("%s must be 'signal', 'update', or 'random', got '%s'", MessageViaFlag, messageVia) + } + + info.Logger.Infof("Preparing workflow loop with %d iterations using message-via=%s", activityCount, messageVia) // Create actions for the workflow var actions []*kitchensink.Action @@ -72,7 +76,16 @@ func init() { for i := 0; i < activityCount; i++ { stateValue := fmt.Sprintf("%d", i) - // Create the activity that sends signal/update to set the workflow state to the current index + // Determine if we use update for this iteration + var useUpdate bool + if messageVia == "random" { + // Pick randomly between signal and update + useUpdate = rand.Intn(2) == 1 + } else { + useUpdate = messageVia == "update" + } + + // Create the client action that will be executed var clientAction *kitchensink.ClientAction if useUpdate { // Use update @@ -115,7 +128,7 @@ func init() { } // Execute an activity that performs the client action (sends signal/update) - // This activity will use the Temporal client to send the signal/update back to its parent workflow + // This activity will use the Temporal client to send the signal/update back to the workflow actions = append(actions, kitchensink.ClientActivity( kitchensink.ClientActions(clientAction), kitchensink.DefaultRemoteActivity, diff --git a/scenarios/workflow_loop_test.go b/scenarios/workflow_loop_test.go index 3cf5767a..cbd70375 100644 --- a/scenarios/workflow_loop_test.go +++ b/scenarios/workflow_loop_test.go @@ -157,7 +157,7 @@ func TestWorkflowLoopScenarioWithUpdates(t *testing.T) { }, ScenarioOptions: map[string]string{ ActivityCountFlag: "3", - UseUpdateFlag: "true", // Use updates instead of signals + MessageViaFlag: "update", // Use updates instead of signals }, } @@ -192,7 +192,7 @@ func TestWorkflowLoopScenarioWithUpdatesMultipleIterations(t *testing.T) { }, ScenarioOptions: map[string]string{ ActivityCountFlag: "2", - UseUpdateFlag: "true", + MessageViaFlag: "update", }, } @@ -212,3 +212,38 @@ func TestWorkflowLoopScenarioWithUpdatesMultipleIterations(t *testing.T) { execState := resumable.Snapshot().(loadgen.ExecutorState) require.Equal(t, 4, execState.CompletedIterations, "should complete 4 iterations") } + +// TestWorkflowLoopScenarioWithRandomSignalAndUpdate tests random selection between signals and updates. +func TestWorkflowLoopScenarioWithRandomSignalAndUpdate(t *testing.T) { + t.Parallel() + + env := workers.SetupTestEnvironment(t, + workers.WithExecutorTimeout(30*time.Second)) + + scenarioInfo := loadgen.ScenarioInfo{ + RunID: fmt.Sprintf("loop-random-via-%d", time.Now().Unix()), + Configuration: loadgen.RunConfiguration{ + Iterations: 2, + }, + ScenarioOptions: map[string]string{ + ActivityCountFlag: "5", + MessageViaFlag: "random", // Randomly pick between signal and update + }, + } + + // Get the workflow_loop scenario + scenario := loadgen.GetScenario("workflow_loop") + require.NotNil(t, scenario, "workflow_loop scenario should be registered") + + executor := scenario.ExecutorFn() + + // Run the executor + _, err := env.RunExecutorTest(t, executor, scenarioInfo, clioptions.LangGo) + require.NoError(t, err, "executor should complete successfully with random signal/update") + + // Verify the executor state + resumable, ok := executor.(loadgen.Resumable) + require.True(t, ok, "executor should implement Resumable interface") + execState := resumable.Snapshot().(loadgen.ExecutorState) + require.Equal(t, 2, execState.CompletedIterations, "should complete 2 iterations with random via") +} From 849fffbe038ae56ca839953847d8879d480f7aad Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Wed, 19 Nov 2025 15:41:42 -0800 Subject: [PATCH 60/66] rename Signed-off-by: Stephan Behnke --- scenarios/workflow_loop.go | 10 +++++----- scenarios/workflow_loop_test.go | 20 ++++++++++---------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/scenarios/workflow_loop.go b/scenarios/workflow_loop.go index bad33b7b..0d2b3627 100644 --- a/scenarios/workflow_loop.go +++ b/scenarios/workflow_loop.go @@ -11,8 +11,8 @@ import ( ) const ( - // ActivityCountFlag controls the number of activities to execute sequentially - ActivityCountFlag = "activity-count" + // LoopsFlag controls the number of activities to execute sequentially + LoopsFlag = "loops" // MessageViaFlag controls whether to use signal, update, or random (default: "signal") MessageViaFlag = "message-via" ) @@ -40,7 +40,7 @@ func init() { "The workflow waits for each signal/update before proceeding. "+ "Use --option %s= to set the count (default: 1). "+ "Use --option %s= to choose mechanism (default: signal).", - ActivityCountFlag, MessageViaFlag), + LoopsFlag, MessageViaFlag), ExecutorFn: func() loadgen.Executor { return &workflowLoopExecutor{ KitchenSinkExecutor: &loadgen.KitchenSinkExecutor{ @@ -48,9 +48,9 @@ func init() { WorkflowInput: &kitchensink.WorkflowInput{}, }, PrepareTestInput: func(ctx context.Context, info loadgen.ScenarioInfo, params *kitchensink.TestInput) error { - activityCount := info.ScenarioOptionInt(ActivityCountFlag, 1) + activityCount := info.ScenarioOptionInt(LoopsFlag, 1) if activityCount <= 0 { - return fmt.Errorf("%s must be positive, got %d", ActivityCountFlag, activityCount) + return fmt.Errorf("%s must be positive, got %d", LoopsFlag, activityCount) } messageVia := info.ScenarioOptions[MessageViaFlag] diff --git a/scenarios/workflow_loop_test.go b/scenarios/workflow_loop_test.go index cbd70375..a6b1a201 100644 --- a/scenarios/workflow_loop_test.go +++ b/scenarios/workflow_loop_test.go @@ -24,7 +24,7 @@ func TestWorkflowLoopScenario(t *testing.T) { Iterations: 3, }, ScenarioOptions: map[string]string{ - ActivityCountFlag: "1", + LoopsFlag: "1", }, } @@ -59,7 +59,7 @@ func TestWorkflowLoopScenarioMultipleActivities(t *testing.T) { Iterations: 2, }, ScenarioOptions: map[string]string{ - ActivityCountFlag: "5", + LoopsFlag: "5", }, } @@ -93,7 +93,7 @@ func TestWorkflowLoopScenarioVerification(t *testing.T) { Iterations: 5, }, ScenarioOptions: map[string]string{ - ActivityCountFlag: "3", + LoopsFlag: "3", }, } @@ -127,7 +127,7 @@ func TestWorkflowLoopScenarioInvalidConfig(t *testing.T) { Iterations: 1, }, ScenarioOptions: map[string]string{ - ActivityCountFlag: "0", // Invalid: must be positive + LoopsFlag: "0", // Invalid: must be positive }, } @@ -156,8 +156,8 @@ func TestWorkflowLoopScenarioWithUpdates(t *testing.T) { Iterations: 2, }, ScenarioOptions: map[string]string{ - ActivityCountFlag: "3", - MessageViaFlag: "update", // Use updates instead of signals + LoopsFlag: "3", + MessageViaFlag: "update", // Use updates instead of signals }, } @@ -191,8 +191,8 @@ func TestWorkflowLoopScenarioWithUpdatesMultipleIterations(t *testing.T) { Iterations: 4, }, ScenarioOptions: map[string]string{ - ActivityCountFlag: "2", - MessageViaFlag: "update", + LoopsFlag: "2", + MessageViaFlag: "update", }, } @@ -226,8 +226,8 @@ func TestWorkflowLoopScenarioWithRandomSignalAndUpdate(t *testing.T) { Iterations: 2, }, ScenarioOptions: map[string]string{ - ActivityCountFlag: "5", - MessageViaFlag: "random", // Randomly pick between signal and update + LoopsFlag: "5", + MessageViaFlag: "random", // Randomly pick between signal and update }, } From 31cd545f4dacb2f928052421a77b73d315cdee08 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Wed, 19 Nov 2025 15:48:32 -0800 Subject: [PATCH 61/66] print qury Signed-off-by: Stephan Behnke --- loadgen/helpers.go | 3 +++ loadgen/workflow_completion_verifier.go | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/loadgen/helpers.go b/loadgen/helpers.go index 860ed634..04d7ab1d 100644 --- a/loadgen/helpers.go +++ b/loadgen/helpers.go @@ -71,6 +71,8 @@ func GetNonCompletedWorkflows(ctx context.Context, info ScenarioInfo, searchAttr runID, ) + info.Logger.Infof("Using visibility query for non-completed workflows: %q", nonCompletedQuery) + resp, err := info.Client.ListWorkflow(ctx, &workflowservice.ListWorkflowExecutionsRequest{ Namespace: info.Namespace, Query: nonCompletedQuery, @@ -108,6 +110,7 @@ func VerifyNoFailedWorkflows(ctx context.Context, info ScenarioInfo, searchAttri statusQuery := fmt.Sprintf( "%s='%s' and ExecutionStatus = '%s'", searchAttribute, runID, status) + info.Logger.Infof("Using visibility query for %s workflows: %q", status.String(), statusQuery) visibilityCount, err := info.Client.CountWorkflow(ctx, &workflowservice.CountWorkflowExecutionsRequest{ Namespace: info.Namespace, Query: statusQuery, diff --git a/loadgen/workflow_completion_verifier.go b/loadgen/workflow_completion_verifier.go index af44202e..3baa47c5 100644 --- a/loadgen/workflow_completion_verifier.go +++ b/loadgen/workflow_completion_verifier.go @@ -87,6 +87,8 @@ func (wct *WorkflowCompletionVerifier) Verify(ctx context.Context, state Executo wct.info.ExecutionID, ) + wct.info.Logger.Infof("Using visibility query for completed workflows: %q", query) + var lastErrors []error // Function to perform all checks @@ -154,6 +156,8 @@ func (wct *WorkflowCompletionVerifier) VerifyNoRunningWorkflows(ctx context.Cont query := fmt.Sprintf("TaskQueue = %q and ExecutionStatus = 'Running'", TaskQueueForRun(wct.info.RunID)) + wct.info.Logger.Infof("Using visibility query for running workflows: %q", query) + // Setup retry loop checkTicker := time.NewTicker(3 * time.Second) defer checkTicker.Stop() From 67d7be6e67c39bb4bec6e22b4d459cb6fceff2bb Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Wed, 19 Nov 2025 15:49:45 -0800 Subject: [PATCH 62/66] full query Signed-off-by: Stephan Behnke --- loadgen/helpers.go | 6 ++++-- loadgen/workflow_completion_verifier.go | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/loadgen/helpers.go b/loadgen/helpers.go index 04d7ab1d..49058442 100644 --- a/loadgen/helpers.go +++ b/loadgen/helpers.go @@ -71,7 +71,8 @@ func GetNonCompletedWorkflows(ctx context.Context, info ScenarioInfo, searchAttr runID, ) - info.Logger.Infof("Using visibility query for non-completed workflows: %q", nonCompletedQuery) + info.Logger.Infof("Visibility query for non-completed workflows - CLI command: temporal workflow list --namespace %s --query %q", + info.Namespace, nonCompletedQuery) resp, err := info.Client.ListWorkflow(ctx, &workflowservice.ListWorkflowExecutionsRequest{ Namespace: info.Namespace, @@ -110,7 +111,8 @@ func VerifyNoFailedWorkflows(ctx context.Context, info ScenarioInfo, searchAttri statusQuery := fmt.Sprintf( "%s='%s' and ExecutionStatus = '%s'", searchAttribute, runID, status) - info.Logger.Infof("Using visibility query for %s workflows: %q", status.String(), statusQuery) + info.Logger.Infof("Visibility query for %s workflows - CLI command: temporal workflow count --namespace %s --query %q", + status.String(), info.Namespace, statusQuery) visibilityCount, err := info.Client.CountWorkflow(ctx, &workflowservice.CountWorkflowExecutionsRequest{ Namespace: info.Namespace, Query: statusQuery, diff --git a/loadgen/workflow_completion_verifier.go b/loadgen/workflow_completion_verifier.go index 3baa47c5..2c66958a 100644 --- a/loadgen/workflow_completion_verifier.go +++ b/loadgen/workflow_completion_verifier.go @@ -87,7 +87,8 @@ func (wct *WorkflowCompletionVerifier) Verify(ctx context.Context, state Executo wct.info.ExecutionID, ) - wct.info.Logger.Infof("Using visibility query for completed workflows: %q", query) + wct.info.Logger.Infof("Visibility query for completed workflows - CLI command: temporal workflow count --namespace %s --query %q", + wct.info.Namespace, query) var lastErrors []error @@ -156,7 +157,8 @@ func (wct *WorkflowCompletionVerifier) VerifyNoRunningWorkflows(ctx context.Cont query := fmt.Sprintf("TaskQueue = %q and ExecutionStatus = 'Running'", TaskQueueForRun(wct.info.RunID)) - wct.info.Logger.Infof("Using visibility query for running workflows: %q", query) + wct.info.Logger.Infof("Visibility query for running workflows - CLI command: temporal workflow count --namespace %s --query %q", + wct.info.Namespace, query) // Setup retry loop checkTicker := time.NewTicker(3 * time.Second) From e30aa250c2363cdd9e766aa55085f69c8a1c0284 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Thu, 20 Nov 2025 10:18:09 -0800 Subject: [PATCH 63/66] retry InitSearchAttribute Signed-off-by: Stephan Behnke --- loadgen/workflow_completion_verifier.go | 37 +++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/loadgen/workflow_completion_verifier.go b/loadgen/workflow_completion_verifier.go index 2c66958a..49fc78b8 100644 --- a/loadgen/workflow_completion_verifier.go +++ b/loadgen/workflow_completion_verifier.go @@ -47,11 +47,44 @@ func (wct *WorkflowCompletionVerifier) init(ctx context.Context, info ScenarioIn return nil } + // Retry InitSearchAttribute until context deadline expires + retryTicker := time.NewTicker(2 * time.Second) + defer retryTicker.Stop() + + // Try immediately first + var lastErr error if err := InitSearchAttribute(ctx, info, OmesExecutionIDSearchAttribute); err != nil { - return fmt.Errorf("failed to register search attribute %s: %w", + lastErr = err + info.Logger.Warnf("failed to register search attribute %s, will retry: %v", OmesExecutionIDSearchAttribute, err) + } else { + return nil + } + + // Retry loop until context deadline + for { + select { + case <-ctx.Done(): + // Context ended (deadline or cancellation). Return last error. + return fmt.Errorf("failed to register search attribute %s after retries: %w", + OmesExecutionIDSearchAttribute, lastErr) + case <-retryTicker.C: + // Don't perform retry if context is already done + if ctx.Err() != nil { + return fmt.Errorf("failed to register search attribute %s after retries: %w", + OmesExecutionIDSearchAttribute, lastErr) + } + if err := InitSearchAttribute(ctx, info, OmesExecutionIDSearchAttribute); err != nil { + lastErr = err + info.Logger.Warnf("failed to register search attribute %s, will retry: %v", + OmesExecutionIDSearchAttribute, err) + } else { + info.Logger.Infof("successfully registered search attribute %s after retries", + OmesExecutionIDSearchAttribute) + return nil + } + } } - return nil } // VerifyRun implements the Verifier interface. From cef31f397bfeff0062e5065c78c662638b9b02f1 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Thu, 20 Nov 2025 22:38:52 -0800 Subject: [PATCH 64/66] rm Signed-off-by: Stephan Behnke --- scenarios/workflow_with_single_noop_activity.go | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/scenarios/workflow_with_single_noop_activity.go b/scenarios/workflow_with_single_noop_activity.go index fb6095d8..9194bbd6 100644 --- a/scenarios/workflow_with_single_noop_activity.go +++ b/scenarios/workflow_with_single_noop_activity.go @@ -42,12 +42,13 @@ func init() { } }, VerifyFn: func(ctx context.Context, info loadgen.ScenarioInfo, executor loadgen.Executor) []error { - e := executor.(*noopActivityExecutor) - if e.completionVerifier == nil { - return nil - } - state := e.KitchenSinkExecutor.GetState() - return e.completionVerifier.VerifyRun(ctx, info, state) + // e := executor.(*noopActivityExecutor) + // if e.completionVerifier == nil { + // return nil + // } + // state := e.KitchenSinkExecutor.GetState() + // return e.completionVerifier.VerifyRun(ctx, info, state) + return nil }, }) } From fb348411138bfd6b19c4603b738be2191d175adf Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Thu, 20 Nov 2025 22:43:19 -0800 Subject: [PATCH 65/66] fix Signed-off-by: Stephan Behnke --- scenarios/ebb_and_flow.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scenarios/ebb_and_flow.go b/scenarios/ebb_and_flow.go index b2751370..75b07e6d 100644 --- a/scenarios/ebb_and_flow.go +++ b/scenarios/ebb_and_flow.go @@ -291,11 +291,11 @@ func (e *ebbAndFlowExecutor) spawnWorkflowWithActivities( // Sample activities from the configuration rng := rand.New(rand.NewSource(time.Now().UnixNano())) - activities := config.Sample(rng) + activityActions := config.Sample(rng) // Build actions for the kitchensink workflow var actions []*Action - for _, activityAction := range activities { + for _, activityAction := range activityActions { actions = append(actions, &Action{ Variant: &Action_ExecActivity{ ExecActivity: activityAction, From 3e65387af23f1453392f22a462d3b2b7798bffcf Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Thu, 20 Nov 2025 22:43:56 -0800 Subject: [PATCH 66/66] rm Signed-off-by: Stephan Behnke --- scenarios/ebb_and_flow.go | 1 - 1 file changed, 1 deletion(-) diff --git a/scenarios/ebb_and_flow.go b/scenarios/ebb_and_flow.go index 75b07e6d..815db969 100644 --- a/scenarios/ebb_and_flow.go +++ b/scenarios/ebb_and_flow.go @@ -342,7 +342,6 @@ func (e *ebbAndFlowExecutor) spawnWorkflowWithActivities( e.Logger.Errorf("kitchensink workflow failed for iteration %d: %v", iteration, err) } e.completedActivities.Add(activities) - e.incrementTotalCompletedWorkflow() // Record completion in executor state for verification e.stateLock.Lock()