diff --git a/api-reference/datasets/datapoints.mdx b/api-reference/datasets/datapoints.mdx new file mode 100644 index 0000000..068c41b --- /dev/null +++ b/api-reference/datasets/datapoints.mdx @@ -0,0 +1,9 @@ +--- +title: "List dataset datapoints" +method: GET +path: /v1/datasets/datapoints +openapi: "GET /v1/datasets/datapoints" +description: List datapoints from a dataset. +--- + +List datapoints with filters: `datasetId` or `datasetName`, plus `limit` and `offset`. diff --git a/api-reference/datasets/list.mdx b/api-reference/datasets/list.mdx new file mode 100644 index 0000000..cd46202 --- /dev/null +++ b/api-reference/datasets/list.mdx @@ -0,0 +1,9 @@ +--- +title: "List datasets" +method: GET +path: /v1/datasets +openapi: "GET /v1/datasets" +description: List datasets by id or name. +--- + +List datasets with optional filters: `id` and/or `name`. diff --git a/api-reference/datasets/parquet.mdx b/api-reference/datasets/parquet.mdx new file mode 100644 index 0000000..f6ec4e9 --- /dev/null +++ b/api-reference/datasets/parquet.mdx @@ -0,0 +1,9 @@ +--- +title: "Download dataset parquet" +method: GET +path: /v1/datasets/{datasetId}/parquets/{idx} +openapi: "GET /v1/datasets/{datasetId}/parquets/{idx}" +description: Stream a parquet export for a dataset. +--- + +Streams a parquet file from the export bucket for the given dataset and index. diff --git a/api-reference/datasets/upsert.mdx b/api-reference/datasets/upsert.mdx new file mode 100644 index 0000000..ff40dbc --- /dev/null +++ b/api-reference/datasets/upsert.mdx @@ -0,0 +1,10 @@ +--- +title: "Create datapoints" +method: POST +path: /v1/datasets/datapoints +openapi: "POST /v1/datasets/datapoints" +description: Insert datapoints and optionally create a dataset. +--- + +Body: `{"dataset": {"datasetName"|"datasetId"}, "datapoints":[{id?,data,target?,metadata?}], "createDataset"?: bool}`. +Creates the dataset if requested; returns `201` if created. diff --git a/api-reference/evals/init_eval.mdx b/api-reference/evals/init_eval.mdx index 6c31996..bee01a5 100644 --- a/api-reference/evals/init_eval.mdx +++ b/api-reference/evals/init_eval.mdx @@ -1,8 +1,9 @@ --- -title: 'Initialize Evaluation' -openapi: 'POST /v1/evals' +title: "Create evaluation" +method: POST +path: /v1/evals +openapi: "POST /v1/evals" +description: Create an evaluation. --- -### Description - -Create a new evaluation with an optional name and group. If no name is provided, a random name will be generated automatically. \ No newline at end of file +Create an evaluation. If no name is provided, Laminar generates one automatically. diff --git a/api-reference/evals/save_eval_datapoints.mdx b/api-reference/evals/save_eval_datapoints.mdx index f2af70d..3490a74 100644 --- a/api-reference/evals/save_eval_datapoints.mdx +++ b/api-reference/evals/save_eval_datapoints.mdx @@ -1,12 +1,9 @@ --- -title: 'Save Evaluation Datapoints' -openapi: 'POST /v1/evals/{eval_id}/datapoints' +title: "Add evaluation datapoints" +method: POST +path: /v1/evals/{eval_id}/datapoints +openapi: "POST /v1/evals/{eval_id}/datapoints" +description: Add datapoints to an existing evaluation. --- -### Description - -Save multiple evaluation datapoints to an existing evaluation. Each datapoint can include input, output, expected output, executor output, scores, and metadata. - - -The actual datapoints are not saved until you call the [Update Evaluation Datapoint](/api-reference/evals/update_eval_datapoint) endpoint. - \ No newline at end of file +Add multiple evaluation datapoints to an existing evaluation. Each datapoint can include input, output, expected output, executor output, scores, and metadata. Datapoints are stored immediately; the update endpoint is only for changing a specific datapoint later. diff --git a/api-reference/evals/update_eval_datapoint.mdx b/api-reference/evals/update_eval_datapoint.mdx index d8952e9..0e52c36 100644 --- a/api-reference/evals/update_eval_datapoint.mdx +++ b/api-reference/evals/update_eval_datapoint.mdx @@ -1,8 +1,9 @@ --- -title: 'Update Evaluation Datapoint' -openapi: 'POST /v1/evals/{eval_id}/datapoints/{datapoint_id}' +title: "Update evaluation datapoint" +method: POST +path: /v1/evals/{eval_id}/datapoints/{datapoint_id} +openapi: "POST /v1/evals/{eval_id}/datapoints/{datapoint_id}" +description: Update a specific datapoint with new output or scores. --- -### Description - -Update a specific evaluation datapoint with new executor output and scores. \ No newline at end of file +Update a specific evaluation datapoint with new executor output, scores, or metadata. diff --git a/api-reference/evaluators/create-evaluator-score.mdx b/api-reference/evaluators/create-evaluator-score.mdx index de331b6..8c59cc8 100644 --- a/api-reference/evaluators/create-evaluator-score.mdx +++ b/api-reference/evaluators/create-evaluator-score.mdx @@ -5,4 +5,4 @@ openapi: 'POST /v1/evaluators/score' ### Description -Create a score for a span using either a trace ID or span ID. When using a trace ID, the score will be attached to the root span of that trace. \ No newline at end of file +Create a score for a span using either a trace ID or span ID. When using a trace ID, the score is attached to the root span of that trace. This endpoint records the provided score; it does not run evaluator logic server-side. diff --git a/api-reference/evaluators/score.mdx b/api-reference/evaluators/score.mdx new file mode 100644 index 0000000..2810d5c --- /dev/null +++ b/api-reference/evaluators/score.mdx @@ -0,0 +1,9 @@ +--- +title: "Score via evaluator" +method: POST +path: /v1/evaluators/score +openapi: "POST /v1/evaluators/score" +description: Create a manual evaluator score attached to a span. +--- + +Create a score for a span using either a trace ID or span ID. When a trace ID is provided, the score is attached to the root span. This endpoint writes the score you supply; it does not execute or look up evaluator definitions. diff --git a/api-reference/health/health.mdx b/api-reference/health/health.mdx new file mode 100644 index 0000000..69356b8 --- /dev/null +++ b/api-reference/health/health.mdx @@ -0,0 +1,9 @@ +--- +title: "Health probe" +method: GET +path: /health +openapi: "GET /health" +description: Health check (RabbitMQ connectivity). +--- + +Checks RabbitMQ connectivity. Consumer /health also validates worker counts. diff --git a/api-reference/health/ready.mdx b/api-reference/health/ready.mdx new file mode 100644 index 0000000..e899601 --- /dev/null +++ b/api-reference/health/ready.mdx @@ -0,0 +1,9 @@ +--- +title: "Readiness probe" +method: GET +path: /ready +openapi: "GET /ready" +description: Readiness check. +--- + +Readiness check; returns 500 if dependencies are unavailable. diff --git a/api-reference/ingestion/browser-sessions.mdx b/api-reference/ingestion/browser-sessions.mdx new file mode 100644 index 0000000..4d45979 --- /dev/null +++ b/api-reference/ingestion/browser-sessions.mdx @@ -0,0 +1,10 @@ +--- +title: "Ingest browser session events" +method: POST +path: /v1/browser-sessions/events +openapi: "POST /v1/browser-sessions/events" +description: Send browser session events (ingest or default key). +--- + +Body: `{"events":[{type,timestamp,data(base64|bytes)}], "sessionId", "traceId", "source"?, "sdkVersion"?}`. +`400` if `traceId` is nil UUID. `403` on data limit. Publishes to MQ. diff --git a/api-reference/ingestion/metrics.mdx b/api-reference/ingestion/metrics.mdx new file mode 100644 index 0000000..d0bb7ad --- /dev/null +++ b/api-reference/ingestion/metrics.mdx @@ -0,0 +1,9 @@ +--- +title: "Ingest metrics" +method: POST +path: /v1/metrics +openapi: "POST /v1/metrics" +description: Placeholder OTLP metrics ingestion. +--- + +Accepts OTLP metrics posts and returns `200 OK`. Placeholder endpoint. diff --git a/api-reference/ingestion/otlp-grpc.mdx b/api-reference/ingestion/otlp-grpc.mdx new file mode 100644 index 0000000..c6f9a28 --- /dev/null +++ b/api-reference/ingestion/otlp-grpc.mdx @@ -0,0 +1,8 @@ +--- +title: "OTLP gRPC ingest" +method: POST +path: TraceService/Export (gRPC) +description: Ingest traces over OTLP gRPC (ingest or default key). +--- + +This is a gRPC method, not an HTTP endpoint. Send OTLP TraceService/Export requests to the gRPC port (default 8001) with `Authorization: Bearer ` (ingest or default key). Enforces usage-limit checks. diff --git a/api-reference/ingestion/traces.mdx b/api-reference/ingestion/traces.mdx new file mode 100644 index 0000000..b917311 --- /dev/null +++ b/api-reference/ingestion/traces.mdx @@ -0,0 +1,9 @@ +--- +title: "Ingest traces" +method: POST +path: /v1/traces +openapi: "POST /v1/traces" +description: OTLP trace ingestion (ingest or default key). +--- + +Send OTLP `ExportTraceServiceRequest` bytes to enqueue spans. Returns `403` when workspace data limit is hit. Supports keep-alive. diff --git a/api-reference/introduction.mdx b/api-reference/introduction.mdx index 4de49f0..fba9aa8 100644 --- a/api-reference/introduction.mdx +++ b/api-reference/introduction.mdx @@ -1,15 +1,66 @@ --- -title: 'Overview' -description: 'General guidelines on using our API' +title: API reference +description: HTTP endpoints for ingesting traces, sessions, datasets, evals, and querying data. --- -## General +Use the REST API to ingest traces/browser sessions, manage datasets/evals, and query your Laminar data. Authentication depends on the endpoint (ingest or default project key). See the left sidebar for all routes. -Use the following base URL: `https://api.lmnr.ai/v1` +## Base URL -For example, `POST https://api.lmnr.ai/v1/sql/query` +``` +https://api.lmnr.ai/v1 +``` -For more detailed information about each endpoint or schema, check our OpenAPI specification. +## Auth notes + +- `/v1/traces`, `/v1/metrics`, `/v1/browser-sessions/events` allow ingest-only **or** default API keys. +- Other `/v1/*` routes require default project API keys. +- `/api/v1/projects/{project_id}/...` assume auth is handled upstream (frontend middleware). + +## Ingestion (/v1, ingest or default key) + +- `POST /v1/traces` — OTLP `ExportTraceServiceRequest` bytes; enqueues spans; `403` when workspace data limit hit; keep-alive supported. +- `POST /v1/browser-sessions/events` — JSON `{events:[{type,timestamp,data(base64|bytes)}], sessionId, traceId, source?, sdkVersion?}`; `400` if `traceId` is nil UUID, `403` on data limit; publishes to MQ. +- `POST /v1/metrics` — placeholder; returns `200 OK` for OTLP metrics posts. + +## Tracing / datasets / evals (/v1, default keys) + +- Tags: `POST /v1/tag` with `{"names":[...],"traceId":UUID}` or `{"names":[...],"spanId":UUID}`; resolves top span if only traceId given; returns `{id, spanId}` list. +- Datasets: + - `GET /v1/datasets?id?&name?` — list datasets + - `GET /v1/datasets/datapoints?...` — paginated datapoints + - `POST /v1/datasets/datapoints` — create dataset (optional) and insert datapoints + - `GET /v1/datasets/{datasetId}/parquets/{idx}` — stream parquet from export bucket +- Evals: + - `POST /v1/evals` — create evaluation (name?, groupName?, metadata?) + - `POST /v1/evals/{evalId}/datapoints` — bulk save datapoints + - `POST /v1/evals/{evalId}/datapoints/{datapointId}` — update one datapoint +- Evaluator scores: `POST /v1/evaluators/score` with `{"name","score","source","metadata?","traceId"|"spanId"}`; resolves span and writes score. +- SQL & payloads: + - `POST /v1/sql/query` — body `{query}`; returns `{data:[...]}` + - `GET /v1/payloads/{payloadId}?payloadType=image|raw` — stream stored payload + +## Project-scoped (`/api/v1/projects/{project_id}`) + +- Spans: `POST .../spans` to enqueue a span; `POST .../spans/search` to query spans (Quickwit). +- SQL helpers: `POST .../sql/query|validate|to-json|from-json` for query execution/translation. +- Evaluation analytics: `GET .../evaluation-score-stats` and `GET .../evaluation-score-distribution` for score summaries. +- Realtime SSE: `GET .../realtime?key=traces|trace_{traceId}` opens a project SSE stream. + +## Health and gRPC + +- `GET /health` and `GET /ready` check RabbitMQ (consumer health checks worker counts). +- OTLP gRPC trace ingestion: `TraceService/Export` on gRPC port (default 8001) with Bearer API key (ingest or default); enforces usage limits. + +## Reference endpoints + +- [Trigger or update an evaluation run](/api-reference/evals/init_eval) — `POST /v1/evals` +- [Add evaluation datapoints](/api-reference/evals/save_eval_datapoints) — `POST /v1/evals/{eval_id}/datapoints` +- [Update a specific datapoint](/api-reference/evals/update_eval_datapoint) — `POST /v1/evals/{eval_id}/datapoints/{datapoint_id}` +- [Score via evaluator](/api-reference/evaluators/score) — `POST /v1/evaluators/score` +- [Run a SQL query against Laminar data](/api-reference/sql/sql_query) — `POST /v1/sql/query` + +See the OpenAPI spec for full schemas. -Each endpoint's page in OpenAPI specification specifies the method, path and parameters to be used. Additionally, you can try sending the request from there. - ## Authentication -All API endpoints are authenticated using Project API key as Bearer token. - -To get the token, go to "settings" page and move to "Project API keys" section. Then get a token from there or generate a new one. +Send your `LMNR_PROJECT_API_KEY` as a Bearer token. -Note that each project has different Project API keys. For switching between projects, press "Laminar" icon at the top-left of the dashboard. +```bash +curl -X POST https://api.lmnr.ai/v1/sql/query \ + -H "Authorization: Bearer $LMNR_PROJECT_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{"query":"SELECT * FROM spans LIMIT 1"}' +``` - -Project API key bearer tokens + + Project API key bearer tokens diff --git a/api-reference/openapi.json b/api-reference/openapi.json index a96593c..80fe2de 100644 --- a/api-reference/openapi.json +++ b/api-reference/openapi.json @@ -22,7 +22,7 @@ "/v1/queues/push": { "post": { "tags": [ - "api::queues" + "Queues" ], "operationId": "push_to_queue", "requestBody": { @@ -75,7 +75,7 @@ "/v1/evals": { "post": { "tags": [ - "api::evaluations" + "Evals" ], "operationId": "init_eval", "requestBody": { @@ -127,7 +127,7 @@ "/v1/evals/{eval_id}/datapoints": { "post": { "tags": [ - "api::evaluations" + "Evals" ], "operationId": "save_eval_datapoints", "parameters": [ @@ -204,7 +204,7 @@ "/v1/evals/{eval_id}/datapoints/{datapoint_id}": { "post": { "tags": [ - "api::evaluations" + "Evals" ], "operationId": "update_eval_datapoint", "parameters": [ @@ -291,7 +291,7 @@ "/v1/evaluators/score": { "post": { "tags": [ - "api::evaluators" + "Evaluators" ], "operationId": "create_evaluator_score", "requestBody": { @@ -339,7 +339,7 @@ "/v1/sql/query": { "post": { "tags": [ - "api::sql" + "SQL" ], "operationId": "sql_query", "requestBody": { diff --git a/api-reference/payloads/get_payload.mdx b/api-reference/payloads/get_payload.mdx new file mode 100644 index 0000000..e304054 --- /dev/null +++ b/api-reference/payloads/get_payload.mdx @@ -0,0 +1,9 @@ +--- +title: "Get payload" +method: GET +path: /v1/payloads/{payloadId} +openapi: "GET /v1/payloads/{payloadId}" +description: Stream a stored payload (image or raw). +--- + +Retrieve a stored payload. Use `payloadType=image|raw` query param to control content type. diff --git a/api-reference/projects/eval_score_distribution.mdx b/api-reference/projects/eval_score_distribution.mdx new file mode 100644 index 0000000..e462b21 --- /dev/null +++ b/api-reference/projects/eval_score_distribution.mdx @@ -0,0 +1,9 @@ +--- +title: "Evaluation score distribution" +method: GET +path: /api/v1/projects/{project_id}/evaluation-score-distribution +openapi: "GET /api/v1/projects/{project_id}/evaluation-score-distribution" +description: Score distribution across buckets. +--- + +Get score distribution for one or more evaluations. Query params: `evaluationIds` (comma-separated UUIDs), `scoreName`. diff --git a/api-reference/projects/eval_score_stats.mdx b/api-reference/projects/eval_score_stats.mdx new file mode 100644 index 0000000..85dc0a5 --- /dev/null +++ b/api-reference/projects/eval_score_stats.mdx @@ -0,0 +1,9 @@ +--- +title: "Evaluation score stats" +method: GET +path: /api/v1/projects/{project_id}/evaluation-score-stats +openapi: "GET /api/v1/projects/{project_id}/evaluation-score-stats" +description: Average score for an evaluation. +--- + +Get average score value for an evaluation. Query params: `evaluationId`, `scoreName`. diff --git a/api-reference/projects/realtime.mdx b/api-reference/projects/realtime.mdx new file mode 100644 index 0000000..eed4b4b --- /dev/null +++ b/api-reference/projects/realtime.mdx @@ -0,0 +1,9 @@ +--- +title: "Realtime SSE" +method: GET +path: /api/v1/projects/{project_id}/realtime +openapi: "GET /api/v1/projects/{project_id}/realtime" +description: Open a project SSE stream. +--- + +Open a server-sent events stream. Query param `key=traces|trace_{traceId}` selects the stream. diff --git a/api-reference/projects/spans.mdx b/api-reference/projects/spans.mdx new file mode 100644 index 0000000..0492c7c --- /dev/null +++ b/api-reference/projects/spans.mdx @@ -0,0 +1,10 @@ +--- +title: "Project spans" +method: POST +path: /api/v1/projects/{project_id}/spans +openapi: "POST /api/v1/projects/{project_id}/spans" +description: Enqueue a span for a project. +--- + +Body: `{name, spanType?, startTime, endTime, attributes?, traceId?, parentSpanId?}`. +Returns `{spanId, traceId}`. diff --git a/api-reference/projects/spans_search.mdx b/api-reference/projects/spans_search.mdx new file mode 100644 index 0000000..97e5f10 --- /dev/null +++ b/api-reference/projects/spans_search.mdx @@ -0,0 +1,10 @@ +--- +title: "Search spans" +method: POST +path: /api/v1/projects/{project_id}/spans/search +openapi: "POST /api/v1/projects/{project_id}/spans/search" +description: Search spans for a project. +--- + +Body: `{traceId?, searchQuery, startTime?, endTime?, searchIn?, limit, offset}`. +Returns hits `{trace_id, span_id}` from Quickwit. diff --git a/api-reference/projects/sql_from_json.mdx b/api-reference/projects/sql_from_json.mdx new file mode 100644 index 0000000..c12f047 --- /dev/null +++ b/api-reference/projects/sql_from_json.mdx @@ -0,0 +1,9 @@ +--- +title: "SQL from JSON" +method: POST +path: /api/v1/projects/{project_id}/sql/from-json +openapi: "POST /api/v1/projects/{project_id}/sql/from-json" +description: Convert a JSON query structure to SQL. +--- + +Body: `{queryStructure}`. Returns `{success, sql?, error?}`. diff --git a/api-reference/projects/sql_query.mdx b/api-reference/projects/sql_query.mdx new file mode 100644 index 0000000..d680a68 --- /dev/null +++ b/api-reference/projects/sql_query.mdx @@ -0,0 +1,9 @@ +--- +title: "Project SQL query" +method: POST +path: /api/v1/projects/{project_id}/sql/query +openapi: "POST /api/v1/projects/{project_id}/sql/query" +description: Execute a SQL query for a project. +--- + +Body: `{query, parameters}`. diff --git a/api-reference/projects/sql_to_json.mdx b/api-reference/projects/sql_to_json.mdx new file mode 100644 index 0000000..b9e757d --- /dev/null +++ b/api-reference/projects/sql_to_json.mdx @@ -0,0 +1,9 @@ +--- +title: "SQL to JSON" +method: POST +path: /api/v1/projects/{project_id}/sql/to-json +openapi: "POST /api/v1/projects/{project_id}/sql/to-json" +description: Convert SQL to JSON structure. +--- + +Body: `{sql}`. Returns `{success, queryStructure?, error?}`. diff --git a/api-reference/projects/sql_validate.mdx b/api-reference/projects/sql_validate.mdx new file mode 100644 index 0000000..f29a329 --- /dev/null +++ b/api-reference/projects/sql_validate.mdx @@ -0,0 +1,9 @@ +--- +title: "Validate SQL" +method: POST +path: /api/v1/projects/{project_id}/sql/validate +openapi: "POST /api/v1/projects/{project_id}/sql/validate" +description: Validate a SQL query for a project. +--- + +Body: `{query}`. Returns `{success, validatedQuery?, error?}`. diff --git a/api-reference/queues/queues_push.mdx b/api-reference/queues/queues_push.mdx index 105a403..cf628ba 100644 --- a/api-reference/queues/queues_push.mdx +++ b/api-reference/queues/queues_push.mdx @@ -5,6 +5,6 @@ openapi: 'POST /v1/queues/push' ### Description -You can push data to a labeling queue. +This endpoint is not available in the current server build. Labeling queues have not been released yet; data cannot be pushed or consumed at this route today. -It will then be accessible in the UI for further labeling. +When queues ship, this page will document the request/response shape. diff --git a/api-reference/sql/sql_query.mdx b/api-reference/sql/sql_query.mdx index a7a5af7..f33ad85 100644 --- a/api-reference/sql/sql_query.mdx +++ b/api-reference/sql/sql_query.mdx @@ -6,7 +6,7 @@ openapi: /api-reference/openapi.json POST /v1/sql/query ## SQL Query -You can run SQL queries on your data stored in Laminar using the SQL query API. Learn more in the [SQL Editor](/sql-editor/introduction) reference. +You can run SQL queries on your data stored in Laminar using the SQL query API. Learn more in the [SQL Editor](/sql-editor/overview) reference. ### Example request diff --git a/api-reference/tag.mdx b/api-reference/tag.mdx new file mode 100644 index 0000000..0a2c1f7 --- /dev/null +++ b/api-reference/tag.mdx @@ -0,0 +1,9 @@ +--- +title: "Tag spans or traces" +method: POST +path: /v1/tag +openapi: "POST /v1/tag" +description: Add tags to a trace or span (default key). +--- + +Body: `{"names":[...], "traceId": UUID}` or `{"names":[...], "spanId": UUID}`. Resolves the top span if only `traceId` is provided. Returns list of `{id, spanId}`. diff --git a/changelog/index.mdx b/changelog/index.mdx new file mode 100644 index 0000000..e19e6f5 --- /dev/null +++ b/changelog/index.mdx @@ -0,0 +1,95 @@ +--- +title: "Changelog" +description: "Key releases and updates" +--- + +
+
+
Dec 3, 2025
+

Claude Agent SDK instrumentation

+

Automatic tracing for Claude Agent SDK with a lightweight Rust proxy.

+
    +
  • Captures agent actions, inputs/outputs, tokens, and cost
  • +
  • Works with existing Laminar tracing
  • +
+
+ +
+
Nov 2025
+

SDK updates (v0.7.x)

+

Reliability fixes and better ESM support.

+
    +
  • Python SDK v0.7.22
  • +
  • TypeScript SDK v0.7.6
  • +
+
+ +
+
Oct 2025
+

Index browser agent API

+

Serverless API for running browser agents in production with full observability.

+
    +
  • Supports Gemini, Claude, and OpenAI models
  • +
  • Structured outputs with Pydantic schemas
  • +
  • Interactive CLI for testing
  • +
+
+ +
+
Sep 2025
+

LangGraph visualization & Skyvern integration

+

Visualize LangGraph execution flows and trace Skyvern automation with session recordings.

+
+ +
+
Aug 2025
+

Custom dashboards & SQL editor

+

Query all Laminar data with SQL and build dashboards. Backed by ClickHouse for sub-second reads.

+
+ +
+
Jul 2025
+

Browser agent observability suite

+

Session recordings synced with traces; real-time spans and live cost tracking.

+
    +
  • Supports Playwright, Puppeteer, Stagehand, Browser Use
  • +
  • 30+ minute sessions with instant playback
  • +
+
+ +
+
Jun 2025
+

Vercel AI SDK integration

+

Automatic tracing for generateText/streamText with Next.js support.

+
+ +
+
Jun 2025
+

Stagehand integration

+

Trace Stagehand runs with session recordings and per-step cost.

+
+ +
+
May 2025
+

Integration expansion

+

Support for Gemini, Mistral, Bedrock, Groq, Cohere, CrewAI, LiteLLM, plus improved OpenAI/Anthropic instrumentation.

+
+ +
+
Apr 2025
+

Agent manager service

+

Self-hosted container to manage browser agent infrastructure.

+
+ +
+
Dec 2024
+

Launch Week #1

+

Flow (dynamic task engine), Evaluations SDK, Semantic Search API, Labeling Queues, Online Evaluations.

+
+ +
+
Summer 2024
+

Core platform launch

+

Automatic tracing (OpenAI, Anthropic, LangChain), @observe decorator, datasets, playground, self-hosting.

+
+
diff --git a/custom-dashboards/overview.mdx b/custom-dashboards/overview.mdx index 0531a0e..13cd18f 100644 --- a/custom-dashboards/overview.mdx +++ b/custom-dashboards/overview.mdx @@ -1,67 +1,55 @@ --- -title: "Creating dashboards to track metrics of AI agents" -sidebarTitle: "Overview" +title: Create dashboards to track your AI app +sidebarTitle: Overview --- - - Laminar Custom Dashboard Introduction - - -## What You Can Track - -Custom Dashboards are built on top of our powerful query engine, working across all your observability data - `traces`, `spans`, `events`, and `tags`. You can track any metric that matters to your application. - -For detailed information on available entities, fields, and how to select the right data for your charts, see the [SQL Editor Reference](/sql-editor/reference). -## How to Build Charts - -To create a chart, navigate to **dashboard** menu and click the **`+ Chart`** button in the upper right corner. - - - Chart Builder flow - +Build dashboards on top of your traces, spans, events, and eval scores without extra infrastructure. Track costs, latency, accuracy, or any custom metric in a few clicks. -**The process:** +## What you'll build -1. **Pick visualization**: - - **Line Chart**: For time series visualization. We automatically prefill and group data by time range, perfect for tracking trends over time. - - **Bar Chart**: Another alternative to visualize time series data, useful when you want to emphasize individual time periods. - - **Horizontal Bar Chart**: For visualizations that need to be ranked, similar to a sortable table. Use this to compare and rank items (e.g., top users, models by cost). +- A dashboard tile showing the metrics that matter (cost, tokens, latency, accuracy). +- Filters and group-bys to break down by model, route, user, team, or tag. +- Shareable dashboards for eng/support/research. -2. **Select data source**: Traces, Spans, Events, and Tags +## Copy/paste workflow -3. **Define metrics**: What to measure (count, sum, avg, min, max, p90, p95, p99) +1. Open **Dashboard** → click **`+ Chart`**. +2. Choose a visualization (line, bar, horizontal bar for rankings). +3. Select a source (`spans`, `traces`, `events`, `evaluation_scores`). +4. Define metrics (count/sum/avg/p90/p95/p99) and group by model/route/tag. +5. Save to a dashboard and resize/arrange tiles. -4. **Add context**: Filters to narrow scope, grouping to break down by dimensions, order by fields, limits for top N results + +Need a custom query? Use the SQL editor to craft it, then add the result as a chart. + -5. **Save and customize**: Add to dashboard, resize as needed +## Example recipes -## Examples +**Total tokens by model (identify spend hotspots)** +- Source: `spans` +- Metric: `sum(total_tokens)` +- Group by: `model` +- Filter: `span_type = 'LLM'` +- Visualization: Line chart -### Total Tokens by Model +**p90 cost by provider (compare pricing drift)** +- Source: `spans` +- Metric: `p90(cost)` +- Group by: `provider` +- Visualization: Line chart -See which models consume the most tokens to identify where your LLM costs are going. Use this to decide if you're using the right model for each use case. +**Top slow routes (find regressions)** +- Source: `traces` +- Metric: `p95(duration_ms)` +- Group by: `route` +- Visualization: Horizontal bar - Total tokens by model chart + Dashboard with cost and token charts -**How to build:** -- Chart type: Line Chart -- Table: `spans` -- Metric: `total_tokens` with `sum` aggregation -- Group by: `model` -- Filter: `span_type` = `LLM` (to include only LLM calls) - -### p90 Cost by Provider - -Track cost trends across different LLM providers over time. The p90 metric shows what most of your expensive requests cost, helping you compare provider pricing and spot cost increases. - - - p90 cost by provider chart - +## Build this next -**How to build:** -- Chart type: Line Chart -- Table: `spans` -- Metric: `cost` with `p90` aggregation -- Group by: `provider` +- Write custom queries → [SQL editor](/sql-editor/overview) +- Export query results to datasets → [Export & datasets](/sql-editor/overview) +- Pipe eval scores into dashboards → [Evaluations](/evaluations/quickstart) diff --git a/datasets/adding-data.mdx b/datasets/adding-data.mdx index 2af400f..1f0eb40 100644 --- a/datasets/adding-data.mdx +++ b/datasets/adding-data.mdx @@ -53,3 +53,7 @@ You can create new datapoints by editing existing ones or copying span data usin Humans can then edit the datapoints in the queue and save them to new datapoints either in the same dataset or in a new one. [Learn more about queues](/queues/quickstart). + + +Labeling queues are not available in the current server build; use dataset edits directly until the feature ships. + diff --git a/datasets/cli.mdx b/datasets/cli.mdx index f6d3b7e..2f365f2 100644 --- a/datasets/cli.mdx +++ b/datasets/cli.mdx @@ -79,7 +79,7 @@ lmnr datasets --project-api-key "" list Create a new dataset from the input file. This command will create a new dataset with the name `my-cli-dataset` and save the datapoints to the file `my-cli-dataset.json`. The datapoints are saved to a new file in order to: -- Store datasets in the Laminar format. In particular, datapoint id is crucial for versioning ([Learn more](/datasets/introduction#versioning)). +- Store datasets in the Laminar format. In particular, datapoint id is crucial for downstream joins. Laminar stores datapoints append-only, so keep ids stable when you want to re-use a row. - Not overwrite existing files. @@ -103,7 +103,7 @@ Make sure to not edit the `id` field of the datapoints. If you delete a datapoint, this will not affect the dataset in Laminar. -This is because the push operation only pushes new datapoint (versions) to the dataset. +Push is append-only; it adds new datapoints and does not delete existing ones in Laminar. @@ -231,7 +231,7 @@ Make sure to not edit the `id` field of the datapoints. If you delete a datapoint, this will not affect the dataset in Laminar. -This is because the push operation only pushes new datapoint (versions) to the dataset. +Push is append-only; it adds new datapoints and does not delete existing ones in Laminar. @@ -394,4 +394,4 @@ Options: --limit Limit number of datapoints to pull --offset Offset for pagination (default: 0) -h, --help display help for command -``` \ No newline at end of file +``` diff --git a/datasets/introduction.mdx b/datasets/introduction.mdx index 026a135..9d33eaa 100644 --- a/datasets/introduction.mdx +++ b/datasets/introduction.mdx @@ -1,112 +1,52 @@ --- sidebarTitle: Introduction -title: Introduction to Laminar datasets +title: Datasets for evals and training --- -## Concept +Datasets in Laminar hold the examples that power your evals, labeling, and training loops. Each datapoint is JSON you can version, filter, and connect back to traces. -Dataset is a collection of datapoints. It can be used for the following purposes: -1. Data storage for use in future fine-tuning or prompt-tuning. -1. Provide inputs and expected outputs for [Evaluations](/evaluations/introduction). + + Dataset datapoint view + -## Format +## What you'll do with datasets -Every datapoint has two fixed JSON objects: `data` and `target`, each with arbitrary keys. -`target` is only used in evaluations. +- Store inputs + expected outputs for evaluations. +- Turn traces into labeled examples, then iterate in labeling queues. +- Export query results (cost outliers, bad traces) straight into a dataset. -- `data` – the actual datapoint data, -- `target` – data additionally sent to the evaluator function. -- `metadata` – arbitrary key-value metadata about the datapoint. - -For every key inside `data` and `target`, the value can be any JSON value. - -### Example - -This is an example of a valid datapoint. +## Datapoint shape ```json { - "data": { - "color": "red", - "size": "large", - "messages": [ - { - "role": "user", - "content": "Hello, can you help me choose a T-shirt?" - }, - { - "role": "assistant", - "content": "I'm afraid, we don't sell T-shirts" - } - ] - }, - "target": { - "expected_output": "Of course! What size and color are you looking for?" - } + "data": { "question": "What is the capital of France?" }, + "target": { "answer": "Paris" }, + "metadata": { "category": "geography" } } ``` -## Use case: Evaluations - -Datasets can be used for evaluations to specify inputs and expected outputs. - -You will need to make sure the dataset keys match the input and output node names of the pipelines. -See more in the [Evaluations](/evaluations/introduction) page. - -## Editing - -Datasets are editable. You can edit the datapoints by clicking on the datapoint and -editing the data in JSON. Changes are saved as a new datapoint version. +- `data`: the input to your executor. +- `target`: optional reference passed to evaluators. +- `metadata`: tags for filtering and grouping. -### Versioning +## Storage model (append-only) -Each datapoint has a unique id and a `created_at` timestamp. Every time you -edit a datapoint, under the hood, -a new datapoint version is created with the same id, -but the `created_at` timestamp is updated. +- Each datapoint you add is stored as a row with the provided `id` (or a generated UUIDv7). +- Updates today are append-only; there is no built-in version history or delete/rollback. +- To change a datapoint, write a new row with the desired values; consumers should pick the record they need. -The version stack is push-only. That is, when you revert to a previous version, -a copy of that version is created and added as a current version. +## Common workflows -Example: +- **Feed evals**: wire a dataset into [evaluate](/evaluations/quickstart) to score prompts/agents. +- **Label from traces**: push spans into a queue, label targets, and write back to the dataset. +- **Export from SQL**: query outliers in the [SQL editor](/sql-editor/overview) and export to a dataset. -- Initial version (v1): -```json -{ - "id": "019a3122-ca78-7d75-91a7-a860526895b2", - "created_at": "2025-01-01T00:00:00.000Z", - "data": { "key": "initial value" } -} -``` -- Version 2 (v2): -```json -{ - "id": "019a3122-ca78-7d75-91a7-a860526895b2", - "created_at": "2025-01-05T00:00:05.000Z", - "data": { "key": "value at v2" } -} -``` -- Version 3 (v3): -```json -{ - "id": "019a3122-ca78-7d75-91a7-a860526895b2", - "created_at": "2025-01-10T00:00:10.000Z", - "data": { "key": "value at v3" } -} -``` - -After this, you want to update to version 1 (initial version). This will create a new version (v4) with the same id, but the `created_at` timestamp is updated. - -- Version 4 (v4): -```json -{ - "id": "019a3122-ca78-7d75-91a7-a860526895b2", - "created_at": "2025-01-15T00:00:15.000Z", - "data": { "key": "initial value" } -} -``` + + Export spans to dataset + -### Datapoint id +## Build this next -When you push a new datapoint to a dataset, a UUIDv7 is generated for it. -This allows to sort datapoints by their creation order and preserve the order of insertion. +- Create/load datasets programmatically → [Datasets CLI](/datasets/cli) +- Label quickly → [Labeling queues](/queues/quickstart) (coming soon in the current server build) +- Run evals on your dataset → [Evaluations quickstart](/evaluations/quickstart) diff --git a/docs.json b/docs.json index 0c35654..ada60dc 100644 --- a/docs.json +++ b/docs.json @@ -1,6 +1,6 @@ { "$schema": "https://mintlify.com/docs.json", - "theme": "mint", + "theme": "maple", "name": "Laminar documentation", "colors": { "primary": "#ED6E40", @@ -12,39 +12,45 @@ "tabs": [ { "tab": "Documentation", + "icon": "book-open", "groups": [ { - "group": "Overview", + "group": "Laminar", + "icon": "info", "pages": [ "overview", - "installation", - { - "group": "Self-hosting", - "pages": [ - "self-hosting/setup", - "self-hosting/access-control-setup" - ] - }, - "cursor" + "tracing/quickstart" ] }, { - "group": "Tracing", + "group": "Stage 1: Build locally", + "icon": "rocket", "pages": [ - "tracing/introduction", - "tracing/quickstart", + "installation", "tracing/automatic-instrumentation", + { + "group": "Trace structure", + "pages": [ + "tracing/structure/observe", + "tracing/structure/manual-span-creation", + "tracing/structure/session", + "tracing/structure/user-id", + "tracing/structure/metadata", + "tracing/structure/tags", + "tracing/structure/image" + ] + }, { "group": "Integrations", "pages": [ "tracing/integrations/openai", "tracing/integrations/anthropic", "tracing/integrations/gemini", - "tracing/integrations/langchain", "tracing/integrations/cohere", + "tracing/integrations/litellm", "tracing/integrations/vercel-ai-sdk", "tracing/integrations/nextjs", - "tracing/integrations/litellm", + "tracing/integrations/langchain", "tracing/integrations/kernel", "tracing/integrations/claude-agent-sdk", "tracing/integrations/browser-use", @@ -53,28 +59,37 @@ "tracing/integrations/playwright", "tracing/integrations/puppeteer" ] - }, + } + ] + }, + { + "group": "Stage 2: Ship to production", + "icon": "cloud-upload", + "pages": [ + "how-it-works", { - "group": "Tracing Structure", + "group": "Self-hosting", "pages": [ - "tracing/structure/overview", - "tracing/structure/observe", - "tracing/structure/manual-span-creation", - "tracing/structure/session", - "tracing/structure/user-id", - "tracing/structure/metadata", - "tracing/structure/tags", - "tracing/structure/image", - "tracing/structure/continuing-traces", - "tracing/structure/providers", - "tracing/structure/flushing" + "self-hosting/setup", + "self-hosting/access-control-setup" ] }, + "tracing/otel" + ] + }, + { + "group": "Stage 3: Monitor in production", + "icon": "bar-chart", + "pages": [ "tracing/realtime", - "tracing/browser-agent-observability", - "tracing/langgraph-visualization", "tracing/events", - "tracing/otel", + { + "group": "Browser agents", + "pages": [ + "tracing/browser-agent-observability", + "tracing/langgraph-visualization" + ] + }, { "group": "Troubleshooting", "pages": [ @@ -85,14 +100,28 @@ ] }, { - "group": "Evaluations", + "group": "Stage 4: Analyze & discover errors", + "icon": "search", + "pages": [ + "playground/introduction", + "playground/playground-from-span", + "playground/tools", + "playground/history", + "sql-editor/overview", + "sql-editor/reference", + "custom-dashboards/overview" + ] + }, + { + "group": "Stage 5: Create evaluation datasets", + "icon": "database", "pages": [ - "evaluations/introduction", "evaluations/quickstart", "evaluations/using-dataset", - "evaluations/human-evaluators", "evaluations/configuration", "evaluations/reference", + "evaluations/human-evaluators", + "evaluations/reference", "evaluations/manual-evaluation", "evaluations/cookbook", { @@ -102,78 +131,90 @@ "evaluations/online-evaluators/scoring-with-hosted-evaluators", "evaluations/online-evaluators/scoring-with-sdk" ] - } + }, + { + "group": "Datasets", + "pages": [ + "datasets/adding-data", + "datasets/cli" + ] + }, + "queues/quickstart" ] - }, + } + ] + }, + { + "tab": "Recipes", + "icon": "map", + "pages": [ + "guides/fastapi", + "guides/nextjs", + "guides/nextjs-aisdk", + "guides/evaluating-tool-calls" + ] + }, + { + "tab": "Changelog", + "icon": "megaphone", + "pages": [ + "changelog/index" + ] + }, + { + "tab": "API Reference", + "icon": "code", + "pages": [ + "api-reference/introduction", { - "group": "SQL Editor", + "group": "Ingestion", "pages": [ - "sql-editor/introduction", - "sql-editor/overview", - "sql-editor/reference" + "api-reference/ingestion/traces", + "api-reference/ingestion/browser-sessions", + "api-reference/ingestion/metrics", + "api-reference/ingestion/otlp-grpc" ] }, { - "group": "Custom Dashboards", + "group": "Tracing & datasets", "pages": [ - "custom-dashboards/overview" + "api-reference/tag", + "api-reference/datasets/list", + "api-reference/datasets/datapoints", + "api-reference/datasets/upsert", + "api-reference/datasets/parquet", + "api-reference/payloads/get_payload", + "api-reference/sql/sql_query" ] }, { - "group": "Datasets", + "group": "Evaluations", "pages": [ - "datasets/introduction", - "datasets/adding-data", - "datasets/cli" + "api-reference/evals/init_eval", + "api-reference/evals/save_eval_datapoints", + "api-reference/evals/update_eval_datapoint", + "api-reference/evaluators/score" ] }, { - "group": "Labeling Queues", + "group": "Project (scoped)", "pages": [ - "queues/quickstart" + "api-reference/projects/spans", + "api-reference/projects/spans_search", + "api-reference/projects/sql_query", + "api-reference/projects/sql_validate", + "api-reference/projects/sql_to_json", + "api-reference/projects/sql_from_json", + "api-reference/projects/eval_score_stats", + "api-reference/projects/eval_score_distribution", + "api-reference/projects/realtime" ] }, { - "group": "Playground", + "group": "Health", "pages": [ - "playground/introduction", - "playground/playground-from-span", - "playground/tools", - "playground/history" - ] - } - ] - }, - { - "tab": "Guides", - "pages": [ - "guides/fastapi", - "guides/nextjs-aisdk", - "guides/nextjs", - "guides/evaluating-tool-calls" - ] - }, - { - "tab": "API Reference", - "groups": [ - { - "group": "API Documentation", - "pages": [ - "api-reference/introduction", - { - "group": "Evaluations", - "pages": [ - "api-reference/evals/init_eval", - "api-reference/evals/save_eval_datapoints", - "api-reference/evals/update_eval_datapoint" - ] - }, - { - "group": "SQL", - "pages": [ - "api-reference/sql/sql_query" - ] - } + "api-reference/health/health", + "api-reference/health/ready" ] } ] @@ -194,6 +235,12 @@ } }, "logo": "/logo/logo.png", + "styling": { + "breadcrumbs": true, + "css": [ + "/style.css" + ] + }, "api": { "openapi": [ "api-reference/openapi.json" @@ -238,4 +285,4 @@ "apiHost": "https://p.lmnr.ai" } } -} \ No newline at end of file +} diff --git a/evaluations/configuration.mdx b/evaluations/configuration.mdx index 0778ab1..b992b7e 100644 --- a/evaluations/configuration.mdx +++ b/evaluations/configuration.mdx @@ -1,7 +1,7 @@ --- sidebarTitle: Configuration title: Configuring Laminar evaluations -description: This page describes how to configure evaluations in Laminar and showcases some common use cases. +description: Configure evaluations in Laminar and see common use cases. --- ## Configuring evaluations to report results to locally self-hosted Laminar diff --git a/evaluations/human-evaluators.mdx b/evaluations/human-evaluators.mdx index 170b3b2..dde4eb9 100644 --- a/evaluations/human-evaluators.mdx +++ b/evaluations/human-evaluators.mdx @@ -200,7 +200,7 @@ Let's explore how to collect human evaluator data into datasets and use it to va ### Collecting human evaluator data into datasets -The [SQL Editor](/sql-editor/introduction) is a powerful tool for analyzing your human evaluator results and creating datasets for training or validating LLM-as-a-judge evaluators. Here's how to leverage it: +The [SQL Editor](/sql-editor/overview) is a powerful tool for analyzing your human evaluator results and creating datasets for training or validating LLM-as-a-judge evaluators. Here's how to leverage it: ### Finding human evaluator spans @@ -223,4 +223,4 @@ After running this query, click **"Export to Dataset"** to: 2. Map the `input` to dataset `data` field 3. Map the `output` to dataset `target` field -Then you can use this dataset to run evaluation of your LLM-as-a-judge evaluator and use human score in `target` field as an expected score for the LLM-as-a-judge evaluator. \ No newline at end of file +Then you can use this dataset to run evaluation of your LLM-as-a-judge evaluator and use human score in `target` field as an expected score for the LLM-as-a-judge evaluator. diff --git a/evaluations/introduction.mdx b/evaluations/introduction.mdx deleted file mode 100644 index c76666f..0000000 --- a/evaluations/introduction.mdx +++ /dev/null @@ -1,48 +0,0 @@ ---- -sidebarTitle: Introduction -title: Introduction to Laminar evaluations ---- -import GetProjectApiKey from '/snippets/get-project-api-key.mdx'; - -Evaluation is the process of validating and testing the outputs that your AI applications are producing. Having strong evaluations ("evals") means a more stable, reliable application that is resilient to code and model changes. An eval is a task used to measure the quality of the output of an LLM or LLM system. - -
-Screenshot of a trace visualization -
- -### Why do we need evals? - -In short, evaluations bring rigor to AI development process. - -When you are building with foundation models, creating high-quality evals is one of the most impactful things you can do. Developing AI solutions involves an iterative design process. Without evals, it can be difficult and time-intensive to understand how different model versions and prompts affect your use case. - -With continuous model upgrades from providers, evals allow you to efficiently test model performance for your specific uses in a standardized way. Developing a suite of evals customized to your objectives will help you quickly understand how new models perform for your applications. You can also make evals part of your CI/CD pipeline to ensure desired accuracy before deployment. - -### Types of evals - -There are two main approaches to evaluating outputs: - -**1. Logic-based evaluation**: The simplest and most common type uses code to check outputs against expected answers. For example: -- String matching to check if the completion includes an expected phrase -- Parsing to validate proper JSON output -- Custom logic to verify domain-specific requirements - -**2. Model-based evaluation**: A two-stage process where: -- First, the model generates a response to the input -- Then, another model (ideally more powerful) evaluates the quality of that response - -Model-based evaluation works best with powerful models when the desired output has significant variation, such as open-ended questions or creative tasks. - -## How evals differ from traditional unit tests - -Unlike traditional unit tests that focus on binary pass/fail outcomes, evaluations for AI systems require continuous tracking and visualization of performance metrics over time. As models evolve and prompts are refined, being able to compare performance across different versions becomes critical. - -What makes evals unique is their ability to: -- Track nuanced quality metrics beyond simple correctness -- Visualize performance trends across model versions and prompt iterations -- Compare multiple implementations side-by-side -- Detect subtle regressions that might not be obvious in isolated tests - -Laminar provides the best developer experience and visualization capabilities for AI evaluations, making it easy to understand how your models are performing and where improvements can be made. With comprehensive dashboards and detailed tracing, you can get deep insights into every aspect of your AI system's behavior. - -Check out our [Quickstart Guide](/evaluations/quickstart) to run your first evaluation. \ No newline at end of file diff --git a/evaluations/manual-evaluation.mdx b/evaluations/manual-evaluation.mdx index eaf20b2..4434b3b 100644 --- a/evaluations/manual-evaluation.mdx +++ b/evaluations/manual-evaluation.mdx @@ -42,7 +42,7 @@ import { OpenAI } from "openai"; Laminar.initialize({ projectApiKey: 'your_project_api_key', instrumentModules: { - openAI: OpenAI // Automatically traces OpenAI calls + OpenAI // Automatically traces OpenAI calls } }); @@ -224,10 +224,10 @@ import { Laminar, LaminarClient, observe } from "@lmnr-ai/lmnr"; import { OpenAI } from "openai"; Laminar.initialize({ - projectApiKey: 'your_project_api_key', - instrumentModules: { - openAI: OpenAI - } + projectApiKey: 'your_project_api_key', + instrumentModules: { + OpenAI + } }); const client = new LaminarClient({ @@ -436,4 +436,4 @@ For detailed API specifications including request/response schemas, visit: Update datapoint with execution results and scores - \ No newline at end of file + diff --git a/evaluations/online-evaluations.mdx b/evaluations/online-evaluations.mdx index b2bfbc7..b853155 100644 --- a/evaluations/online-evaluations.mdx +++ b/evaluations/online-evaluations.mdx @@ -24,7 +24,7 @@ and run evaluations post-factum. - **Span path** – this is an identifier of your LLM function. It is constructed from the location of the call within your code, and must ideally be unique. For more -information about what a span is, see the [tracing documentation](/tracing/introduction). +information about what a span is, see the [tracing quickstart](/tracing/quickstart). - **Span label** – a label attached to a span. It can be a boolean or a categorical label. The label can be both set manually or by an evaluator. - **Evaluator** – the function evaluating the input and output of your LLM call. It diff --git a/evaluations/online-evaluators/scoring-with-sdk.mdx b/evaluations/online-evaluators/scoring-with-sdk.mdx index 137a8e1..0135e38 100644 --- a/evaluations/online-evaluators/scoring-with-sdk.mdx +++ b/evaluations/online-evaluators/scoring-with-sdk.mdx @@ -16,7 +16,7 @@ Create a score for a span using either a trace ID or span ID. When using a trace import { LaminarClient, Laminar, observe } from "@lmnr-ai/lmnr"; const laminarClient = new LaminarClient({ - apiKey: "your-project-api-key" + projectApiKey: "your-project-api-key" }); // First, capture your LLM calls @@ -71,7 +71,7 @@ await laminarClient.evaluators.score({ from lmnr import LaminarClient, Laminar import time -laminar_client = LaminarClient(api_key="your-project-api-key") +laminar_client = LaminarClient(project_api_key="your-project-api-key") # First, capture your LLM calls with Laminar.start_as_current_span( diff --git a/evaluations/quickstart.mdx b/evaluations/quickstart.mdx index c6c096f..c749e33 100644 --- a/evaluations/quickstart.mdx +++ b/evaluations/quickstart.mdx @@ -1,329 +1,129 @@ --- -title: Get started with Laminar evaluations +title: Evaluations sidebarTitle: Quickstart --- -import GetProjectApiKey from '/snippets/get-project-api-key.mdx'; -This guide will walk you through running your first evaluation using Laminar's evaluation system. +Score your AI automatically. Catch regressions before production. -Laminar provides a structured approach to create, run, and track your AI system's performance through these key components: - -- **Executors** - Functions that process inputs and produce outputs, such as prompt templates, LLM calls, or production logic -- **Evaluators** - Functions that assess outputs against targets or quality criteria, producing numeric scores -- **Datasets** - Collections of datapoints (test cases) with 3 key elements: - - `data` - Required JSON input sent to the executor - - `target` - Optional reference data sent to the evaluator, typically containing expected outputs - - `metadata` - Optional metadata. This can be used to filter evaluation results in the UI after the evaluation is run. -- **Visualization** - Tools to track performance trends and detect regressions over time -- **Tracing** - Automatic recording of execution flow and model invocations - -Example datapoint: -```json5 -{ - "data": { - "question": "What is the capital of France? Respond in one word.", - }, - "target": { - "answer": "Paris" - }, - "metadata": { - "category": "geography" - } -} -``` - -**Evaluation Groups** group related evaluations to assess one feature or component, with results aggregated for comparison. - -## Evaluation Lifecycle - -For each datapoint in a dataset: - -1. The executor receives the `data` as input -2. The executor runs and its output is stored -3. Both the executor output and `target` are passed to the evaluator -4. The evaluator produces either a numeric score or a JSON object with multiple numeric scores -5. Results are stored and can be visualized to track performance over time - -This approach helps you continuously measure your AI system's performance as you make changes, showing the impact of model updates, prompt revisions, and code changes. - -### Evaluation function types - -Each executor takes in the `data` as it is defined in the datapoints. -Evaluator accepts the output of the executor as its first argument, -and `target` as it's defined in the datapoints as the second argument. - -This means that the type of the `data` fields in your datapoints must -match the type of the first parameter of the executor function. Similarly, -the type of the `target` fields in your datapoints must match the type of -the second parameter of the evaluator function(s). - -Python is a bit more permissive. If you see type errors in TypeScript, -make sure the data types and the parameter types match. - -For a more precise description, here's the partial TypeScript type signature of the `evaluate` function: - -```javascript -evaluate( - data: { - data: D, - target?: T, - }, - executor: (data: D, ...args: any[]) => O | Promise; - evaluators: { - [key: string]: (output: O, target?: T, ...args: any[]) => - number | { [key: string]: number } - }, - // ... other parameters -) -``` - -See full reference [here](/evaluations/reference#typescript-evaluation-types). - -## Create your first evaluation - -### Prerequisites - - + + Evaluation dashboard with scores and traces + -### Create an evaluation file +## Why evaluations? - - +- Catch regressions when you change models, prompts, or tools +- Compare variants side by side with scores +- Debug failures — every datapoint links to its trace -Create a file named `my-first-evaluation.ts` and add the following code: +## Run your first eval -```javascript my-first-evaluation.ts + +```typescript TypeScript +// my-first-eval.ts import { evaluate } from '@lmnr-ai/lmnr'; -import { OpenAI } from 'openai'; +import { OpenAI } from 'openai'; const client = new OpenAI(); -const capitalOfCountry = async (data: {country: string}) => { - // replace this with your LLM call or custom logic +const capitalOf = async (data: { country: string }) => { const response = await client.chat.completions.create({ - model: 'gpt-4.1-nano', - messages: [ - { - role: 'user', - content: `What is the capital of ${data['country']}? ` + - 'Answer only with the capital, no other text.' - } - ] + model: 'gpt-4o-mini', + messages: [{ role: 'user', content: `Capital of ${data.country}? One word.` }], }); return response.choices[0].message.content || ''; -} +}; evaluate({ data: [ - { - data: { country: 'France' }, - target: 'Paris', - }, - { - data: { country: 'Germany' }, - target: 'Berlin', - }, + { data: { country: 'France' }, target: 'Paris' }, + { data: { country: 'Germany' }, target: 'Berlin' }, ], - executor: capitalOfCountry, + executor: capitalOf, evaluators: { - accuracy: (output: string, target: string | undefined): number => { - if (!target) return 0; - return output.includes(target) ? 1 : 0; - } + accuracy: (output, target) => (output.includes(target) ? 1 : 0), + }, + config: { + instrumentModules: { OpenAI }, }, - config: { - instrumentModules: { - openAI: OpenAI - } - } -}) +}); ``` - -It is important to pass the `config` object with `instrumentModules` to `evaluate` to ensure that the OpenAI client and any other instrumented modules are instrumented. - - - - -Create a file named `my-first-evaluation.py` and add the following code: - -```python my-first-evaluation.py +```python Python +# my_first_eval.py from lmnr import evaluate from openai import OpenAI client = OpenAI() -def capital_of_country(data: dict) -> str: - # replace this with your LLM call or custom logic - return client.chat.completions.create( - model="gpt-4.1-nano", - messages=[ - { - "role": "user", - "content": f"Generate a poem about {data['country']}. " + - "Answer only with the poem, no other text." - } - ] - ).choices[0].message.content +def capital_of(data): + response = client.chat.completions.create( + model="gpt-4o-mini", + messages=[{"role": "user", "content": f"Capital of {data['country']}? One word."}] + ) + return response.choices[0].message.content -def accuracy(output: str, target: str) -> int: +def accuracy(output, target): return 1 if target in output else 0 evaluate( data=[ - { - "data": {"country": "France"}, - "target": "Paris" - }, - { - "data": {"country": "Germany"}, - "target": "Berlin" - }, + {"data": {"country": "France"}, "target": "Paris"}, + {"data": {"country": "Germany"}, "target": "Berlin"}, ], - executor=capital_of_country, - evaluators={"accuracy": accuracy} + executor=capital_of, + evaluators={"accuracy": accuracy}, ) ``` - - - -### Run the evaluation - -You can run evaluations in two ways: using the `lmnr eval` CLI or directly executing the evaluation file. + -#### Using the CLI +Run it: -The Laminar CLI automatically detects top-level `evaluate` function calls in your files - you don't need to wrap them in a `main` function or any special structure. - - - -```sh -export LMNR_PROJECT_API_KEY= -npx lmnr eval my-first-evaluation.ts + +```bash TypeScript +LMNR_PROJECT_API_KEY=your_key npx lmnr eval my-first-eval.ts ``` -To run multiple evaluations, place them in an `evals` directory with the naming pattern `*.eval.{ts,js}`: - -``` -├─ src/ -├─ evals/ -│ ├── my-first-evaluation.eval.ts -│ ├── my-second-evaluation.eval.ts -│ ├── ... +```bash Python +LMNR_PROJECT_API_KEY=your_key lmnr eval my_first_eval.py ``` + -Then run all evaluations with a single command: -```sh -npx lmnr eval -``` - - -```sh -# 1. Make sure `lmnr` is installed in a virtual environment -# lmnr --help -# 2. Run the evaluation -export LMNR_PROJECT_API_KEY= -lmnr eval my-first-evaluation.py -``` +## What you'll see -To run multiple evaluations, place them in an `evals` directory with the naming pattern `eval_*.py` or `*_eval.py`: +The CLI prints a link to your evaluation run: ``` -├─ src/ -├─ evals/ -│ ├── eval_first.py -│ ├── second_eval.py -│ ├── ... -``` - -Then run all evaluations with a single command: -```sh -lmnr eval +✓ Evaluation complete + View results: https://lmnr.ai/project/xxx/evals/yyy ``` - - - -#### Running as a standalone script - -You can also import and call `evaluate` directly from your application code: - - - -```bash -ts-node my-first-evaluation.ts -# or -npx tsx my-first-evaluation.ts -``` - - -```bash -python my-first-evaluation.py -``` - - - -The `evaluate` function is flexible and can be used both in standalone scripts processed by the CLI and integrated directly into your application code. - - -Evaluator functions must return either a single numeric score or a JSON object where each key is a score name and the value is a numeric score. - - - -No need to initialize Laminar - `evaluate` automatically initializes Laminar behind the scenes. All instrumented function calls and model invocations are traced without any additional setup. - - -### View evaluation results - -When you run an evaluation from the CLI, Laminar will output the link to the dashboard where you can view the evaluation results. - -Laminar stores every evaluation result. A run for every datapoint is represented as a trace. You can view the results and corresponding traces in the evaluations page. - Example evaluation + Eval results with trace links -## Tracking evaluation progress +Click any row to see the full trace for that datapoint. -To track the score progression over time or compare evaluations side-by-side, you need to group them together. This can be achieved by passing the `groupName` parameter to the `evaluate` function. +## How it works - - -```javascript {7} -import { evaluate, LaminarDataset } from '@lmnr-ai/lmnr'; +1. **Dataset** — list of inputs (`data`) and expected outputs (`target`) +2. **Executor** — your function that produces output (LLM call, agent, etc.) +3. **Evaluators** — functions that score the output (return 0-1) -evaluate({ - data: new LaminarDataset("name_of_your_dataset"), - executor: yourExecutorFunction, - evaluators: {evaluatorName: yourEvaluator}, - groupName: "evals_group_1", -}); -``` - - -```python {9} -from lmnr import evaluate, LaminarDataset -import os +Laminar runs your executor on each datapoint, scores the output, and records a trace for every run. -evaluate( - data=LaminarDataset("name_of_your_dataset"), - executor=your_executor_function, - evaluators={"evaluator_name": your_evaluator}, - project_api_key=os.environ["LMNR_PROJECT_API_KEY"], - group_name="evals_group_1", - # ... other optional parameters -) -``` - - +## Next steps - - Example evaluation progression - \ No newline at end of file + + + LLM-as-judge without writing code + + + Load datasets from your project + + + Block deployments on eval failures + + + Add human labels to the loop + + diff --git a/guides/evaluating-tool-calls.mdx b/guides/evaluating-tool-calls.mdx index f602361..03636fc 100644 --- a/guides/evaluating-tool-calls.mdx +++ b/guides/evaluating-tool-calls.mdx @@ -1,12 +1,12 @@ --- title: Evaluating LLM Tool Calls with Laminar sidebarTitle: Evaluating Tool Calls -description: A comprehensive guide to evaluating AI agent tool calls using a Data Analysis Assistant example - from production tracing to systematic evaluation +description: A comprehensive guide to evaluating agent tool calls using a Data Analysis Assistant example - from production tracing to systematic evaluation --- ## Overview -In this guide, we'll follow the complete journey of building and improving a **Data Analysis Assistant** - an AI agent that helps users analyze their data, create visualizations, and generate insights. This example showcases how Laminar's end-to-end platform helps you build reliable tool-calling agents. +In this guide, we'll follow the complete journey of building and improving a **Data Analysis Assistant** - an agent that helps users analyze their data, create visualizations, and generate insights. This example showcases how Laminar's end-to-end platform helps you build reliable tool-calling agents. ### **Why This Guide Matters** Tool-calling agents are powerful but complex - they need to select the right tools, use correct parameters, and handle multi-step workflows. Unlike simple text generation, evaluating these agents requires understanding their decision-making process and systematic improvement based on real user interactions. @@ -658,11 +658,11 @@ Use these insights to: - **Add New Tools**: Identify missing capabilities from user feedback - **Update Training Data**: Create more diverse evaluation cases -This approach ensures the Data Analysis Assistant continuously improves based on real user interactions and systematic evaluation, leading to more reliable and useful AI agents. +This approach ensures the Data Analysis Assistant continuously improves based on real user interactions and systematic evaluation, leading to more reliable and useful agents. ## Learn More To dive deeper into the concepts covered in this guide: -- **[Tracing Documentation](/tracing/introduction)**: Learn more about automatic instrumentation, manual span creation, and advanced tracing patterns -- **[Evaluations Documentation](/evaluations/introduction)**: Explore advanced evaluation patterns, custom evaluators, and evaluation best practices \ No newline at end of file +- **[Tracing quickstart](/tracing/quickstart)**: Learn more about automatic instrumentation, manual span creation, and advanced tracing patterns +- **[Evaluations Documentation](/evaluations/quickstart)**: Explore advanced evaluation patterns, custom evaluators, and evaluation best practices diff --git a/guides/nextjs-aisdk.mdx b/guides/nextjs-aisdk.mdx index 6a32fa5..0f31779 100644 --- a/guides/nextjs-aisdk.mdx +++ b/guides/nextjs-aisdk.mdx @@ -45,7 +45,7 @@ cp .env.local.example .env.local ``` And then fill in the `.env.local` file. -Get [Laminar project API key](https://docs.lmnr.ai/tracing/introduction#2-initialize-laminar-in-your-application). +Get [Laminar project API key](https://docs.lmnr.ai/tracing/quickstart#three-commands-to-first-trace). Get [OpenAI API key](https://platform.openai.com/api-keys) diff --git a/guides/nextjs.mdx b/guides/nextjs.mdx index b107558..e99c06c 100644 --- a/guides/nextjs.mdx +++ b/guides/nextjs.mdx @@ -44,7 +44,7 @@ cp .env.local.example .env.local ``` And then fill in the `.env.local` file. -Get [Laminar project API key](https://docs.lmnr.ai/tracing/introduction#2-initialize-laminar-in-your-application). +Get [Laminar project API key](https://docs.lmnr.ai/tracing/quickstart#three-commands-to-first-trace). Get [OpenAI API key](https://platform.openai.com/api-keys). Get [Anthropic API key](https://console.anthropic.com/settings/keys). diff --git a/how-it-works.mdx b/how-it-works.mdx new file mode 100644 index 0000000..c4ab10f --- /dev/null +++ b/how-it-works.mdx @@ -0,0 +1,60 @@ +--- +title: How Laminar Works +description: Architecture overview — what gets captured and where it goes. +--- + +Laminar captures telemetry from your AI app and streams it to a dashboard where you debug, evaluate, and analyze. + +## Data flow + +``` +Your App Laminar Cloud / Self-hosted +┌─────────────────┐ ┌─────────────────────────┐ +│ Laminar. │ │ │ +│ initialize(...) │ ──────► │ Trace storage (ClickHouse) +│ │ gRPC │ Dashboard (UI) │ +│ OpenAI calls │ │ Evaluations (scoring) │ +│ Agent steps │ ◄────── │ │ +│ Browser events │ HTTP │ │ +└─────────────────┘ └─────────────────────────┘ +``` + +## What gets captured + +`Laminar.initialize(...)` patches supported SDKs at runtime. For each LLM call Laminar records: + +- Inputs: full prompt/messages +- Outputs: complete model response +- Metadata: model name, temperature, tokens +- Cost: calculated from token counts and model pricing +- Timing: start/end/latency + +Use `observe()` to wrap functions and build parent/child span trees. + +## Spans and traces + +A **span** is one operation (LLM call or function). A **trace** is a tree of spans for one request. + +``` +trace: handle_support_ticket +├── span: classify_intent (LLM) +├── span: fetch_context (function) +└── span: generate_response (LLM) +``` + + + Trace tree with timings and costs + + +## Where data lives + +- **Laminar Cloud**: managed storage and dashboard. +- **Self-hosted**: data stays in your ClickHouse; switch by changing the endpoint. + +## Security + +- Project-scoped API keys +- TLS in transit; encryption at rest +- Self-hosting keeps data in your network + +Next: get a trace in minutes → [Quickstart](/tracing/quickstart) diff --git a/installation.mdx b/installation.mdx index c47d8fd..8d307da 100644 --- a/installation.mdx +++ b/installation.mdx @@ -1,112 +1,67 @@ --- title: Installation -description: Laminar installation guide +description: Copy/paste installs to get your first Laminar trace fast. --- -## Install the package +Laminar is designed to get you to a live trace in minutes. Pick your language, run the three commands, and your LLM calls will show up with inputs, outputs, tokens, and costs. - - +## What you'll do -Install the package from [npm](https://www.npmjs.com/package/@lmnr-ai/lmnr). +- Install Laminar alongside your model SDK. +- Initialize Laminar once at startup. +- Run your existing code and open the trace. -```sh -npm add @lmnr-ai/lmnr -``` +## JavaScript/TypeScript - -yarn +```bash +# 1) Install Laminar + OpenAI client +npm install @lmnr-ai/lmnr openai -```sh -yarn add @lmnr-ai/lmnr -``` +# 2) Initialize Laminar before creating SDK clients +cat <<'EOF' > bootstrap.ts +import { Laminar } from '@lmnr-ai/lmnr'; +import { OpenAI } from 'openai'; -pnpm +Laminar.initialize({ + projectApiKey: process.env.LMNR_PROJECT_API_KEY, + instrumentModules: { OpenAI }, +}); +EOF -```sh -pnpm add @lmnr-ai/lmnr +# 3) Run with your Laminar key +LMNR_PROJECT_API_KEY=your_key node app.js ``` - - - -Install the package from [PyPI](https://pypi.org/project/lmnr/). + +Yarn: `yarn add @lmnr-ai/lmnr openai` +pnpm: `pnpm add @lmnr-ai/lmnr openai` + -```sh -pip install --upgrade 'lmnr[all]' -``` +## Python -This will install the package and enable all the available automatic instrumentations. +```bash +# 1) Install Laminar + OpenAI client +pip install --upgrade "lmnr[openai]" -However, this installs a lot of dependencies, so you can specify the extras to enable -specific automatic instrumentations of client SDKs/libraries. +# 2) Initialize Laminar before creating SDK clients +cat <<'EOF' > bootstrap.py +import os +from lmnr import Laminar +from openai import OpenAI -For example, to enable automatic instrumentations of Anthropic and OpenAI, run: +Laminar.initialize(project_api_key=os.environ["LMNR_PROJECT_API_KEY"]) +EOF -```sh -pip install --upgrade 'lmnr[anthropic,openai]' +# 3) Run with your Laminar key +LMNR_PROJECT_API_KEY=your_key python app.py ``` - -If you do not specify any extras, no automatic instrumentation will be enabled. - - - - -poetry - -```sh -poetry add 'lmnr[anthropic,openai]' -``` + +Add extras to auto-trace specific SDKs. Examples: -uv - -```sh -uv add lmnr --extra anthropic --extra openai -``` - -uv pip - -```sh -uv pip install 'lmnr[anthropic,openai]' -``` +- Anthropic + OpenAI: `pip install --upgrade "lmnr[anthropic,openai]"` +- LangChain + LlamaIndex: `pip install --upgrade "lmnr[langchain,llamaindex]"` +- Vector DBs (Pinecone, Weaviate, Qdrant): `pip install --upgrade "lmnr[pinecone,weaviate,qdrant]"` +Full extras list: alephalpha, anthropic, bedrock, cohere, google-generativeai, groq, haystack, lancedb, langchain, llamaindex, marqo, milvus, mistralai, ollama, openai, pinecone, qdrant, replicate, sagemaker, together, transformers, vertexai, watsonx, weaviate. - - -Full list of available extras: - -- `alephalpha` -- `anthropic` -- `bedrock` -- `cohere` -- `google-generativeai` -- `groq` -- `haystack` -- `lancedb` -- `langchain` -- `llamaindex` -- `marqo` -- `milvus` -- `mistralai` -- `ollama` -- `openai` -- `pinecone` -- `qdrant` -- `replicate` -- `sagemaker` -- `together` -- `transformers` -- `vertexai` -- `watsonx` -- `weaviate` - - - - The extras configuration is only available since version `0.4.39`. - Before that, default option would install all the available instruments. - - - - - \ No newline at end of file diff --git a/overview.mdx b/overview.mdx index 36df142..1473d3a 100644 --- a/overview.mdx +++ b/overview.mdx @@ -1,42 +1,82 @@ --- title: Laminar -sidebarTitle: Laminar +sidebarTitle: Introduction --- -Laminar is a comprehensive **open-source platform** for observability and evaluations of AI agents. +See exactly what your AI is doing. -- **Open-source** - Fully open-source and easy to self-host. Give us a ⭐ [here](https://github.com/lmnr-ai/lmnr) -- **Cloud** - Managed cloud service available at [laminar.sh](https://laminar.sh) +Initialize once to get full visibility into every LLM call, browser action, and agent step. Open-source. Self-hostable. Production-ready. -## Get Started + + + First trace in 60 seconds + + + Star us on GitHub + + - - - Instrument your entire AI application and automatically trace popular AI libraries and SDKs — **OpenAI, Anthropic, Gemini, Vercel AI SDK, LangChain, Browser Use**, and more. - + + Laminar trace view + - - Measure, track, and improve your AI application performance with powerful evaluation tools. - +## The 30-second version - - Query all your data stored in Laminar using SQL for advanced analytics, custom dashboards, and dataset creation. - + +```typescript TypeScript +import { Laminar } from '@lmnr-ai/lmnr'; +import { OpenAI } from 'openai'; - - Create and manage custom dashboards from trace and evaluation data. - +Laminar.initialize({ + projectApiKey: process.env.LMNR_PROJECT_API_KEY, + instrumentModules: { OpenAI }, +}); - - Experiment with prompts, test different models, and iterate on your AI application in an interactive environment. - +// Your existing code works unchanged +const client = new OpenAI(); +const response = await client.chat.completions.create({ + model: 'gpt-4o-mini', + messages: [{ role: 'user', content: 'Hello!' }], +}); +``` - - Use streamlined UI to quickly label and build datasets for evaluations from trace data and other datasets. - +```python Python +import os +from lmnr import Laminar +from openai import OpenAI - - Create and manage datasets for evaluations and other use cases. - +Laminar.initialize(project_api_key=os.environ["LMNR_PROJECT_API_KEY"]) +client = OpenAI() - +response = client.chat.completions.create( + model="gpt-4o-mini", + messages=[{"role": "user", "content": "Hello!"}] +) +``` + + +Open your dashboard. Every call appears with inputs, outputs, tokens, and cost. + +## What you can do + + + + Automatic instrumentation for OpenAI, Anthropic, Gemini, LangChain, and more. + + + Session recordings synchronized with traces. See exactly what your agent saw. + + + Score prompts and agents. Every datapoint links to its trace. + + + Query traces, evals, and costs. Build dashboards or export datasets. + + + +## Start here + +1. [Quickstart](/tracing/quickstart) — first trace in 60 seconds +2. [Installation](/installation) — add Laminar to your stack +3. [How Laminar works](/how-it-works) — architecture overview for production +4. [Evaluations](/evaluations/quickstart) — score your AI once you have traces and datasets diff --git a/playground/introduction.mdx b/playground/introduction.mdx index b8b6e5e..d63a6a9 100644 --- a/playground/introduction.mdx +++ b/playground/introduction.mdx @@ -1,24 +1,37 @@ --- sidebarTitle: Introduction -title: Introduction to Laminar Playground +title: Iterate faster with Laminar Playground --- -Playground is an interactive environment that allows you to experiment with AI models, test prompts, and analyze responses. +Playground lets you tweak prompts, tools, and models using real trace context without extra wiring. Open any span, reproduce it, and iterate until it is right. -## What is the Playground? + + Laminar Playground interface + -The Playground serves as a sandbox environment where you can: +## What you'll do here -- **Prompt experimentation**: Test different prompt variations and input configurations with instant results. -- [**Playground from span**](/playground/playground-from-span): Reproduce and experiment with exact configurations from any span by opening it directly in playground. -- [**Tool integration**](/playground/tools): Configure and test custom tools that models can call during conversations. -- [**Session history**](/playground/history): Access complete traces of all previous runs with full context and configurations. +- Reproduce any trace span (LLM/tool call) in one click. +- Edit prompts, models, and tool configs with instant feedback. +- Save variants and compare outputs; keep session history for later. -
- Laminar Playground Interface -
+## Fastest way to try it -To access the playground: -1. Navigate to your Laminar dashboard -2. Create playground by clicking on "New playground" -3. Choose your preferred model and start experimenting \ No newline at end of file +1. Run the [Tracing quickstart](/tracing/quickstart) to generate a trace. +2. In the Laminar UI, open that trace and click **“Open in Playground.”** +3. Edit the prompt/model/tool settings; run and compare outputs. +4. Save the best variant or push it back to your app. + +## Key features + +- **Playground from span**: jump in directly from any LLM/tool span with the original inputs and settings. +- **Tools**: configure custom tools and test tool-calling models end-to-end. +- **History**: every run is saved with inputs/outputs, tokens, and cost. +- **Shareable**: copy a link to share the exact playground state with teammates. + +## Build this next + +- Open a trace and reproduce a span → [Playground from span](/playground/playground-from-span) +- Add and test tools → [Tools](/playground/tools) +- Review previous runs → [History](/playground/history) +- Capture the trace that feeds your playground → [Tracing quickstart](/tracing/quickstart) diff --git a/projects/introduction.mdx b/projects/introduction.mdx index c8d086c..4325756 100644 --- a/projects/introduction.mdx +++ b/projects/introduction.mdx @@ -5,7 +5,7 @@ title: Introduction ## What is it A Laminar project is a collection of [datasets](/datasets/introduction), -[evaluations](/evaluations/introduction), and [traces](/tracing/introduction). +[evaluations](/evaluations/quickstart), and [traces](/tracing/quickstart). Access [API keys](/api-reference/introduction#authentication) and environment variables are configured at the project level as well. Think about it as a scope of a certain work. A project always belong to a workspace. ## Creating a project diff --git a/queues/quickstart.mdx b/queues/quickstart.mdx index 785afc5..2ede0d2 100644 --- a/queues/quickstart.mdx +++ b/queues/quickstart.mdx @@ -1,76 +1,43 @@ --- title: Labeling Queues sidebarTitle: Quickstart -description: Labeling queues are a way to quickly label and build datasets for evaluations from span data and other datasets. +description: Turn traces and datasets into labeled examples fast. --- -## What is a Labeling Queue? +Labeling queues let you turn raw outputs into labeled targets with a fast, focused UI. Push spans or dataset rows into a queue, label them, and write back to datasets for evals or training. -- A labeling queue is a collection of items that need to be labeled. -- Labeling queue is an actual queue with FIFO (first in, first out) order. -- Items in the queue have exactly the same shape as datapoints in a dataset. -- Labeling operation in this context means writing a data to the target field of a datapoint. + + Labeling queue interface + -
-Screenshot of a trace visualization -
+ +The current server build does not expose labeling queue APIs or UI. Keep this page bookmarked for when the feature ships; for now, use datasets directly for labeling workflows. + -## How to Use the Labeling Interface +## What you'll do -When you open a labeling queue, you'll see a split-screen interface designed for efficient labeling: - -### Payload view -The left panel shows you the full JSON payload of the current item you're labeling. -Payload is a JSON object with the same shape as a datapoint in a dataset. It has `data` and `target` fields. - - -### Target Editor -This is where you do the actual labeling work: -- **Edit the JSON in the target editor** to correct, improve, or write new data to the target field. -- **Use proper JSON formatting** - the editor will help you with syntax highlighting - - -As you type in the target editor on the right, watch how the `"target"` section in the left payload updates in real-time. This helps you see exactly what will be saved to your dataset. - - - -### Save Preferences -- **Select your target dataset** from the dropdown to choose where completed items should go -- **Click "Complete"** to save the current item to the dataset and move to the next item in the queue. - -### Navigation -- **Check the item counter** ("Item 5 of 11") to see how many items you've completed and how many remain -- **Use the navigation buttons** to move through your queue: - - Click **"Skip"** if you want to pass on the current item without making changes - - Use **"Prev"** and **"Next"** to move between items (helpful for comparing similar cases) - - Click **"Complete"** when you're satisfied with your labeling - -## Push items to the queue - -### From Span View -You can push individual spans directly to a labeling queue for labeling. -This is particularly useful when you want to label specific model outputs for evaluation. -Span input will be added to the `data` field of the datapoint, and span output will be added to the `target` field. - -
-Screenshot of a trace visualization -
- -### From Dataset View -You can also push existing datapoints from datasets into a labeling queue. -You can either push individual datapoint or select a subset of datapoints in the dataset view. - -
-Screenshot of a trace visualization -
+- Queue items that share the dataset shape (`data`, `target`, `metadata`). +- Edit targets quickly with a side-by-side payload + editor. +- Save to a dataset in a single click; keep FIFO flow to stay organized. +## Fast path +1. From any trace span, click **“Send to labeling queue.”** Inputs become `data`; outputs become `target`. +2. Open the queue; left pane shows payload, right pane is the target editor. +3. Choose the destination dataset, click **Complete**, move to the next item. +4. Use **Skip/Prev/Next** to navigate; item counter shows progress. +## Other ways to populate queues +- From a dataset: select rows and push them into a queue for review. +- From SQL: export query results into a dataset, then queue them for labeling. -When pushing items to a queue, they maintain the same JSON structure as datapoints in datasets, ensuring consistency between your labeling workflow and final datasets. +Queues preserve the dataset JSON shape, so labeled items drop straight back into evals without extra mapping. +## Build this next - +- Run evals on your newly labeled data → [Evaluations quickstart](/evaluations/quickstart) +- Export tricky cases via SQL → [SQL editor](/sql-editor/overview) +- Capture the traces that feed your queues → [Tracing quickstart](/tracing/quickstart) diff --git a/snippets/Trace.mdx b/snippets/Trace.mdx new file mode 100644 index 0000000..861c86d --- /dev/null +++ b/snippets/Trace.mdx @@ -0,0 +1,58 @@ +import React, { useMemo } from "react"; + +export default function Trace({ id, traceId, spanId, host, height = 760, previewOnly = false }) { + const baseStyles = { + container: { + border: "1px solid rgba(0,0,0,0.08)", + background: "linear-gradient(145deg, rgba(0,0,0,0.02), rgba(0,0,0,0.01))", + borderRadius: 12, + color: "inherit", + fontFamily: "Inter, system-ui, -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif", + boxShadow: "0 10px 40px rgba(0,0,0,0.1)", + overflow: "hidden", + }, + notice: { + padding: 14, + fontSize: 14, + color: "#4b5563", + }, + frame: { + border: "none", + width: "100%", + display: "block", + }, + }; + + const traceIdentifier = traceId || id; + const resolvedHost = useMemo(() => { + const fallback = "https://laminar.sh"; + const detected = typeof window !== "undefined" ? window.location.origin : ""; + const base = host || detected || fallback; + return base.endsWith("/") ? base.slice(0, -1) : base; + }, [host]); + + const src = traceIdentifier + ? `${resolvedHost}/shared/traces/${traceIdentifier}${spanId ? `?spanId=${spanId}` : ""}` + : ""; + + if (!traceIdentifier || previewOnly) { + return ( +
+
+ Preview your trace embed here. Provide a `traceId` (and optional `spanId`) to render the shared trace viewer. +
+
+ ); + } + + return ( +
+