+ );
+}
diff --git a/packages/programs-react/src/components/index.ts b/packages/programs-react/src/components/index.ts
new file mode 100644
index 000000000..886c22d62
--- /dev/null
+++ b/packages/programs-react/src/components/index.ts
@@ -0,0 +1,39 @@
+/**
+ * Component exports.
+ */
+
+export {
+ ProgramExampleContextProvider,
+ useProgramExampleContext,
+ type ProgramExampleState,
+ type ProgramExampleProps,
+} from "./ProgramExampleContext.js";
+
+export { Opcodes } from "./Opcodes.js";
+
+export { SourceContents } from "./SourceContents.js";
+
+export { HighlightedInstruction } from "./HighlightedInstruction.js";
+
+// Trace components
+export {
+ TraceProvider,
+ useTraceContext,
+ type TraceState,
+ type TraceProviderProps,
+ type ResolvedVariable,
+} from "./TraceContext.js";
+
+export {
+ TraceControls,
+ TraceProgress,
+ type TraceControlsProps,
+ type TraceProgressProps,
+} from "./TraceControls.js";
+
+export {
+ VariableInspector,
+ StackInspector,
+ type VariableInspectorProps,
+ type StackInspectorProps,
+} from "./VariableInspector.js";
diff --git a/packages/programs-react/src/index.ts b/packages/programs-react/src/index.ts
new file mode 100644
index 000000000..aa32365c2
--- /dev/null
+++ b/packages/programs-react/src/index.ts
@@ -0,0 +1,68 @@
+/**
+ * @ethdebug/programs-react
+ *
+ * React components for visualizing ethdebug program annotations.
+ */
+
+// Components
+export {
+ ProgramExampleContextProvider,
+ useProgramExampleContext,
+ type ProgramExampleState,
+ type ProgramExampleProps,
+} from "#components/ProgramExampleContext";
+
+export { Opcodes } from "#components/Opcodes";
+
+export { SourceContents } from "#components/SourceContents";
+
+export { HighlightedInstruction } from "#components/HighlightedInstruction";
+
+// Trace components
+export {
+ TraceProvider,
+ useTraceContext,
+ TraceControls,
+ TraceProgress,
+ VariableInspector,
+ StackInspector,
+ type TraceState,
+ type TraceProviderProps,
+ type ResolvedVariable,
+ type TraceControlsProps,
+ type TraceProgressProps,
+ type VariableInspectorProps,
+ type StackInspectorProps,
+} from "#components/index";
+
+// Shiki utilities
+export {
+ useHighlighter,
+ ShikiCodeBlock,
+ type Highlighter,
+ type HighlightOptions,
+ type ShikiCodeBlockProps,
+} from "#shiki/index";
+
+// Utility functions
+export {
+ computeOffsets,
+ resolveDynamicInstruction,
+ createMockTrace,
+ findInstructionAtPc,
+ extractVariablesFromInstruction,
+ buildPcToInstructionMap,
+ type DynamicInstruction,
+ type DynamicContext,
+ type ContextThunk,
+ type FindSourceRangeOptions,
+ type ResolverOptions,
+ type TraceStep,
+ type MockTraceSpec,
+} from "#utils/index";
+
+// CSS - consumers should import these stylesheets
+// import "@ethdebug/programs-react/components/Opcodes.css";
+// import "@ethdebug/programs-react/components/SourceContents.css";
+// import "@ethdebug/programs-react/components/TraceControls.css";
+// import "@ethdebug/programs-react/components/VariableInspector.css";
diff --git a/packages/programs-react/src/shiki/ShikiCodeBlock.tsx b/packages/programs-react/src/shiki/ShikiCodeBlock.tsx
new file mode 100644
index 000000000..ebee07643
--- /dev/null
+++ b/packages/programs-react/src/shiki/ShikiCodeBlock.tsx
@@ -0,0 +1,38 @@
+/**
+ * Simple code block component using Shiki syntax highlighting.
+ */
+
+import React from "react";
+import { type HighlightOptions, useHighlighter } from "./useHighlighter.js";
+
+/**
+ * Props for ShikiCodeBlock component.
+ */
+export interface ShikiCodeBlockProps extends HighlightOptions {
+ code: string;
+ className?: string;
+}
+
+/**
+ * Renders a code block with syntax highlighting using Shiki.
+ *
+ * @param props - Code and highlight options
+ * @returns Highlighted code block element
+ */
+export function ShikiCodeBlock({
+ code,
+ className,
+ ...highlightOptions
+}: ShikiCodeBlockProps): JSX.Element {
+ const highlighter = useHighlighter();
+
+ if (!highlighter) {
+ return <>Loading...>;
+ }
+
+ const html = highlighter.highlight(code, highlightOptions);
+
+ return (
+
+ );
+}
diff --git a/packages/programs-react/src/shiki/index.ts b/packages/programs-react/src/shiki/index.ts
new file mode 100644
index 000000000..b4ed16f3b
--- /dev/null
+++ b/packages/programs-react/src/shiki/index.ts
@@ -0,0 +1,9 @@
+/**
+ * Shiki syntax highlighting exports.
+ */
+
+export { useHighlighter } from "./useHighlighter.js";
+export type { Highlighter, HighlightOptions } from "./useHighlighter.js";
+
+export { ShikiCodeBlock } from "./ShikiCodeBlock.js";
+export type { ShikiCodeBlockProps } from "./ShikiCodeBlock.js";
diff --git a/packages/web/src/theme/ShikiCodeBlock/useHighlighter.ts b/packages/programs-react/src/shiki/useHighlighter.ts
similarity index 72%
rename from packages/web/src/theme/ShikiCodeBlock/useHighlighter.ts
rename to packages/programs-react/src/shiki/useHighlighter.ts
index 96f091e61..82a2112ef 100644
--- a/packages/web/src/theme/ShikiCodeBlock/useHighlighter.ts
+++ b/packages/programs-react/src/shiki/useHighlighter.ts
@@ -1,18 +1,36 @@
+/**
+ * React hook for Shiki syntax highlighter.
+ */
+
import { useEffect, useState } from "react";
import * as Shiki from "shiki/core";
import { createOnigurumaEngine } from "shiki/engine/oniguruma";
+/**
+ * Highlighter interface for syntax highlighting.
+ */
export interface Highlighter {
highlight(text: string, options: HighlightOptions): string;
}
+/**
+ * Options for highlighting code.
+ */
export interface HighlightOptions {
language?: string;
decorations?: Shiki.DecorationItem[];
+ className?: string;
}
-export function useHighlighter() {
+/**
+ * React hook that provides a Shiki highlighter instance.
+ *
+ * The highlighter is created asynchronously on mount.
+ *
+ * @returns Highlighter instance or undefined while loading
+ */
+export function useHighlighter(): Highlighter | undefined {
const [highlighter, setHighlighter] = useState();
useEffect(() => {
diff --git a/packages/programs-react/src/utils/dynamic.test.ts b/packages/programs-react/src/utils/dynamic.test.ts
new file mode 100644
index 000000000..1347f8329
--- /dev/null
+++ b/packages/programs-react/src/utils/dynamic.test.ts
@@ -0,0 +1,163 @@
+/**
+ * Tests for dynamic instruction resolution.
+ */
+
+import { describe, it, expect } from "vitest";
+import {
+ resolveDynamicInstruction,
+ type DynamicInstruction,
+ type ContextThunk,
+} from "./dynamic.js";
+
+describe("resolveDynamicInstruction", () => {
+ const source = {
+ id: "test-source",
+ path: "/test/source.js",
+ language: "javascript",
+ contents: "let x = 1;\nlet y = 2;\nlet z = 3;",
+ };
+
+ it("passes through static context unchanged", () => {
+ const instruction: DynamicInstruction = {
+ offset: 0,
+ operation: { mnemonic: "PUSH1", arguments: ["0x01"] },
+ context: { remark: "static context" },
+ };
+
+ const result = resolveDynamicInstruction(instruction, {
+ sources: [source],
+ });
+
+ expect(result.context).toEqual({ remark: "static context" });
+ expect(result.offset).toBe(0);
+ expect(result.operation).toEqual({
+ mnemonic: "PUSH1",
+ arguments: ["0x01"],
+ });
+ });
+
+ it("resolves dynamic context thunk", () => {
+ const context: ContextThunk = ({ findSourceRange }) => {
+ const range = findSourceRange("let x");
+ return {
+ code: {
+ source: { id: source.id },
+ range: range?.range,
+ },
+ };
+ };
+ const instruction: DynamicInstruction = {
+ offset: 0,
+ operation: { mnemonic: "PUSH1", arguments: ["0x01"] },
+ context,
+ };
+
+ const result = resolveDynamicInstruction(instruction, {
+ sources: [source],
+ });
+
+ expect(result.context).toEqual({
+ code: {
+ source: { id: "test-source" },
+ range: {
+ offset: 0,
+ length: 5,
+ },
+ },
+ });
+ });
+
+ it("findSourceRange locates string in source", () => {
+ const context: ContextThunk = ({ findSourceRange }) => {
+ const range = findSourceRange("let y");
+ return { remark: `found at ${range?.range?.offset}` };
+ };
+ const instruction: DynamicInstruction = {
+ offset: 0,
+ operation: { mnemonic: "PUSH1", arguments: ["0x01"] },
+ context,
+ };
+
+ const result = resolveDynamicInstruction(instruction, {
+ sources: [source],
+ });
+
+ // "let y" starts at position 11 (after "let x = 1;\n")
+ expect(result.context).toEqual({ remark: "found at 11" });
+ });
+
+ it("findSourceRange with after option skips to position after query", () => {
+ const sourceWithRepeats = {
+ id: "repeats",
+ path: "/test/repeats.js",
+ language: "javascript",
+ contents: "let a = 1; let a = 2; let a = 3;",
+ };
+
+ const context: ContextThunk = ({ findSourceRange }) => {
+ // Find second occurrence of "let a" by searching after the first "= 1"
+ const range = findSourceRange("let a", { after: "= 1" });
+ return { remark: `found at ${range?.range?.offset}` };
+ };
+ const instruction: DynamicInstruction = {
+ offset: 0,
+ operation: { mnemonic: "PUSH1", arguments: ["0x01"] },
+ context,
+ };
+
+ const result = resolveDynamicInstruction(instruction, {
+ sources: [sourceWithRepeats],
+ });
+
+ // Second "let a" starts at position 11
+ expect(result.context).toEqual({ remark: "found at 11" });
+ });
+
+ it("throws when after query not found", () => {
+ const context: ContextThunk = ({ findSourceRange }) => {
+ findSourceRange("let x", { after: "nonexistent" });
+ return { remark: "should not reach here" };
+ };
+ const instruction: DynamicInstruction = {
+ offset: 0,
+ operation: { mnemonic: "PUSH1", arguments: ["0x01"] },
+ context,
+ };
+
+ expect(() =>
+ resolveDynamicInstruction(instruction, { sources: [source] }),
+ ).toThrow(/could not find string nonexistent/);
+ });
+
+ it("throws when query not found", () => {
+ const context: ContextThunk = ({ findSourceRange }) => {
+ findSourceRange("nonexistent");
+ return { remark: "should not reach here" };
+ };
+ const instruction: DynamicInstruction = {
+ offset: 0,
+ operation: { mnemonic: "PUSH1", arguments: ["0x01"] },
+ context,
+ };
+
+ expect(() =>
+ resolveDynamicInstruction(instruction, { sources: [source] }),
+ ).toThrow(/could not find string nonexistent/);
+ });
+
+ it("returns undefined range when no sources", () => {
+ const context: ContextThunk = ({ findSourceRange }) => {
+ const range = findSourceRange("let x");
+ return { remark: range ? "found" : "not found" };
+ };
+ const instruction: DynamicInstruction = {
+ offset: 0,
+ operation: { mnemonic: "PUSH1", arguments: ["0x01"] },
+ context,
+ };
+
+ const result = resolveDynamicInstruction(instruction, { sources: [] });
+
+ expect(result.context).toEqual({ remark: "not found" });
+ });
+});
diff --git a/packages/web/src/theme/ProgramExample/dynamic.ts b/packages/programs-react/src/utils/dynamic.ts
similarity index 74%
rename from packages/web/src/theme/ProgramExample/dynamic.ts
rename to packages/programs-react/src/utils/dynamic.ts
index e80dada73..285b7bb8e 100644
--- a/packages/web/src/theme/ProgramExample/dynamic.ts
+++ b/packages/programs-react/src/utils/dynamic.ts
@@ -1,12 +1,28 @@
+/**
+ * Dynamic instruction resolution.
+ *
+ * Allows defining instructions with context thunks that are resolved
+ * against source materials.
+ */
+
import { Program, Materials } from "@ethdebug/format";
+/**
+ * Instruction with dynamic context that can be resolved.
+ */
export type DynamicInstruction = Omit<
Program.Instruction,
"context" | "operation"
> & { operation: Program.Instruction.Operation } & { context: DynamicContext };
+/**
+ * Context that can be either static or a thunk that resolves against sources.
+ */
export type DynamicContext = Program.Context | ContextThunk;
+/**
+ * Function that resolves a dynamic context using source information.
+ */
export type ContextThunk = (props: {
findSourceRange(
query: string,
@@ -14,15 +30,28 @@ export type ContextThunk = (props: {
): Materials.SourceRange | undefined;
}) => Program.Context;
+/**
+ * Options for finding a source range.
+ */
export interface FindSourceRangeOptions {
source?: Materials.Reference;
after?: string;
}
+/**
+ * Options for resolving dynamic instructions.
+ */
export interface ResolverOptions {
sources: Materials.Source[];
}
+/**
+ * Resolve a dynamic instruction to a static instruction.
+ *
+ * @param dynamicInstruction - Instruction with potentially dynamic context
+ * @param options - Resolver options including source materials
+ * @returns Resolved static instruction
+ */
export function resolveDynamicInstruction(
dynamicInstruction: DynamicInstruction,
options: ResolverOptions,
diff --git a/packages/programs-react/src/utils/index.ts b/packages/programs-react/src/utils/index.ts
new file mode 100644
index 000000000..9e24ab67b
--- /dev/null
+++ b/packages/programs-react/src/utils/index.ts
@@ -0,0 +1,23 @@
+/**
+ * Utility exports.
+ */
+
+export { computeOffsets } from "./offsets.js";
+
+export {
+ resolveDynamicInstruction,
+ type DynamicInstruction,
+ type DynamicContext,
+ type ContextThunk,
+ type FindSourceRangeOptions,
+ type ResolverOptions,
+} from "./dynamic.js";
+
+export {
+ createMockTrace,
+ findInstructionAtPc,
+ extractVariablesFromInstruction,
+ buildPcToInstructionMap,
+ type TraceStep,
+ type MockTraceSpec,
+} from "./mockTrace.js";
diff --git a/packages/programs-react/src/utils/mockTrace.ts b/packages/programs-react/src/utils/mockTrace.ts
new file mode 100644
index 000000000..6adef9e05
--- /dev/null
+++ b/packages/programs-react/src/utils/mockTrace.ts
@@ -0,0 +1,119 @@
+/**
+ * Utilities for creating mock execution traces.
+ */
+
+import type { Program } from "@ethdebug/format";
+
+/**
+ * A single step in an execution trace.
+ */
+export interface TraceStep {
+ /** Program counter (byte offset in bytecode) */
+ pc: number;
+ /** Opcode mnemonic (e.g., "PUSH1", "SLOAD") */
+ opcode: string;
+ /** Stack entries (from top to bottom) as hex strings or bigints */
+ stack?: Array;
+ /** Memory contents as hex string */
+ memory?: string;
+ /** Storage state: slot (hex) → value (hex) */
+ storage?: Record;
+ /** Gas remaining */
+ gas?: bigint;
+ /** Return data from last call */
+ returndata?: string;
+}
+
+/**
+ * Specification for creating a mock trace.
+ */
+export interface MockTraceSpec {
+ /** Sequence of execution steps */
+ steps: TraceStep[];
+ /** Program definition with instructions */
+ program: Program;
+}
+
+/**
+ * Create a mock trace from a specification.
+ *
+ * This allows creating traces for demonstration without running real EVM.
+ */
+export function createMockTrace(spec: MockTraceSpec): TraceStep[] {
+ return spec.steps.map((step) => ({
+ ...step,
+ // Ensure stack has default value
+ stack: step.stack || [],
+ // Ensure storage has default value
+ storage: step.storage || {},
+ }));
+}
+
+/**
+ * Find the instruction at a given program counter.
+ */
+export function findInstructionAtPc(
+ program: Program,
+ pc: number,
+): Program.Instruction | undefined {
+ return program.instructions?.find((instr) => instr.offset === pc);
+}
+
+/**
+ * Extract variables that are in scope at a given instruction.
+ *
+ * This walks the context and extracts variables from Variables contexts.
+ */
+export function extractVariablesFromInstruction(
+ instruction: Program.Instruction,
+): Array<{ identifier?: string; type?: unknown; pointer?: unknown }> {
+ if (!instruction.context) {
+ return [];
+ }
+
+ return extractVariablesFromContext(instruction.context);
+}
+
+function extractVariablesFromContext(
+ context: Program.Context,
+): Array<{ identifier?: string; type?: unknown; pointer?: unknown }> {
+ // Variables context
+ if ("variables" in context && Array.isArray(context.variables)) {
+ return context.variables;
+ }
+
+ // Gather context (combines multiple contexts)
+ if ("gather" in context && Array.isArray(context.gather)) {
+ return context.gather.flatMap(extractVariablesFromContext);
+ }
+
+ // Pick context (picks from multiple contexts - take first with variables)
+ if ("pick" in context && Array.isArray(context.pick)) {
+ for (const subContext of context.pick) {
+ const vars = extractVariablesFromContext(subContext);
+ if (vars.length > 0) {
+ return vars;
+ }
+ }
+ }
+
+ return [];
+}
+
+/**
+ * Build a map of PC to instruction for quick lookup.
+ */
+export function buildPcToInstructionMap(
+ program: Program,
+): Map {
+ const map = new Map();
+ for (const instr of program.instructions || []) {
+ // offset can be number or hex string (Data.Value)
+ const offset =
+ typeof instr.offset === "string"
+ ? parseInt(instr.offset, 16)
+ : instr.offset;
+ map.set(offset, instr);
+ }
+ return map;
+}
diff --git a/packages/programs-react/src/utils/offsets.test.ts b/packages/programs-react/src/utils/offsets.test.ts
new file mode 100644
index 000000000..0f8377785
--- /dev/null
+++ b/packages/programs-react/src/utils/offsets.test.ts
@@ -0,0 +1,93 @@
+/**
+ * Tests for offset computation utility.
+ */
+
+import { describe, it, expect } from "vitest";
+import { computeOffsets } from "./offsets.js";
+
+describe("computeOffsets", () => {
+ it("computes offset 0 for first instruction", () => {
+ const instructions = [
+ { operation: { mnemonic: "PUSH1" as const, arguments: ["0x80"] } },
+ ];
+
+ const result = computeOffsets(instructions);
+
+ expect(result).toHaveLength(1);
+ expect(result[0].offset).toBe(0);
+ });
+
+ it("computes sequential offsets based on operation size", () => {
+ const instructions = [
+ // PUSH1 0x80 = 2 bytes (1 opcode + 1 byte argument)
+ { operation: { mnemonic: "PUSH1" as const, arguments: ["0x80"] } },
+ // PUSH1 0x40 = 2 bytes
+ { operation: { mnemonic: "PUSH1" as const, arguments: ["0x40"] } },
+ // MSTORE = 1 byte (no arguments)
+ { operation: { mnemonic: "MSTORE" as const } },
+ ];
+
+ const result = computeOffsets(instructions);
+
+ expect(result).toHaveLength(3);
+ expect(result[0].offset).toBe(0); // First instruction at 0
+ expect(result[1].offset).toBe(2); // After PUSH1 0x80 (2 bytes)
+ expect(result[2].offset).toBe(4); // After PUSH1 0x40 (2 bytes)
+ });
+
+ it("handles larger push operations correctly", () => {
+ const instructions = [
+ // PUSH32 = 33 bytes (1 opcode + 32 byte argument)
+ {
+ operation: {
+ mnemonic: "PUSH32" as const,
+ arguments: ["0x" + "ff".repeat(32)],
+ },
+ },
+ // STOP = 1 byte
+ { operation: { mnemonic: "STOP" as const } },
+ ];
+
+ const result = computeOffsets(instructions);
+
+ expect(result).toHaveLength(2);
+ expect(result[0].offset).toBe(0);
+ expect(result[1].offset).toBe(33); // 1 + 32
+ });
+
+ it("handles numeric arguments", () => {
+ const instructions = [
+ // Using numeric argument
+ { operation: { mnemonic: "PUSH1" as const, arguments: [128] } },
+ { operation: { mnemonic: "STOP" as const } },
+ ];
+
+ const result = computeOffsets(instructions);
+
+ expect(result).toHaveLength(2);
+ expect(result[0].offset).toBe(0);
+ expect(result[1].offset).toBe(2); // 1 opcode + 1 byte for 0x80
+ });
+
+ it("preserves original instruction properties", () => {
+ const instructions = [
+ {
+ operation: { mnemonic: "PUSH1" as const, arguments: ["0x80"] },
+ context: { remark: "test" },
+ },
+ ];
+
+ const result = computeOffsets(instructions);
+
+ expect(result[0]).toMatchObject({
+ offset: 0,
+ operation: { mnemonic: "PUSH1", arguments: ["0x80"] },
+ context: { remark: "test" },
+ });
+ });
+
+ it("returns empty array for empty input", () => {
+ const result = computeOffsets([]);
+ expect(result).toEqual([]);
+ });
+});
diff --git a/packages/web/src/theme/ProgramExample/offsets.ts b/packages/programs-react/src/utils/offsets.ts
similarity index 75%
rename from packages/web/src/theme/ProgramExample/offsets.ts
rename to packages/programs-react/src/utils/offsets.ts
index 9e60320f4..2b6c911ed 100644
--- a/packages/web/src/theme/ProgramExample/offsets.ts
+++ b/packages/programs-react/src/utils/offsets.ts
@@ -1,15 +1,30 @@
+/**
+ * Compute instruction offsets from operation sizes.
+ */
+
import { Data, Program } from "@ethdebug/format";
-// define base generic instruction since other parts of this module
-// allow dynamic contexts and such
+/**
+ * Base instruction type that can have offsets computed.
+ */
interface OffsetComputableInstruction {
operation: Program.Instruction.Operation;
}
+/**
+ * Instruction with computed offset.
+ */
type OffsetComputedInstruction = I & {
offset: Data.Value;
};
+/**
+ * Compute bytecode offsets for a sequence of instructions based on their
+ * operation sizes.
+ *
+ * @param instructions - Instructions without offsets
+ * @returns Instructions with computed offsets
+ */
export function computeOffsets(
instructions: I[],
): OffsetComputedInstruction[] {
diff --git a/packages/programs-react/tsconfig.json b/packages/programs-react/tsconfig.json
new file mode 100644
index 000000000..cdfb74de8
--- /dev/null
+++ b/packages/programs-react/tsconfig.json
@@ -0,0 +1,18 @@
+{
+ "extends": "../../tsconfig.base.json",
+ "compilerOptions": {
+ "rootDir": "./",
+ "outDir": "./dist/",
+ "baseUrl": "./",
+ "jsx": "react-jsx",
+ "lib": ["ES2020", "DOM", "DOM.Iterable"],
+ "paths": {
+ "#components/*": ["./src/components/*"],
+ "#shiki/*": ["./src/shiki/*"],
+ "#utils/*": ["./src/utils/*"]
+ }
+ },
+ "include": ["src/**/*"],
+ "exclude": ["node_modules", "dist"],
+ "references": [{ "path": "../format" }]
+}
diff --git a/packages/programs-react/vitest.config.ts b/packages/programs-react/vitest.config.ts
new file mode 100644
index 000000000..ab56fbfcb
--- /dev/null
+++ b/packages/programs-react/vitest.config.ts
@@ -0,0 +1,13 @@
+import { defineConfig } from "vitest/config";
+
+export default defineConfig({
+ test: {
+ globals: true,
+ environment: "jsdom",
+ coverage: {
+ provider: "v8",
+ reporter: ["text", "json", "html"],
+ exclude: ["node_modules/", "dist/", "**/*.test.ts", "**/*.test.tsx"],
+ },
+ },
+});
diff --git a/packages/web/docs/concepts/_category_.json b/packages/web/docs/concepts/_category_.json
new file mode 100644
index 000000000..1d3167d49
--- /dev/null
+++ b/packages/web/docs/concepts/_category_.json
@@ -0,0 +1,4 @@
+{
+ "label": "Concepts",
+ "position": 3
+}
diff --git a/packages/web/docs/concepts/index.mdx b/packages/web/docs/concepts/index.mdx
new file mode 100644
index 000000000..b2f727bb4
--- /dev/null
+++ b/packages/web/docs/concepts/index.mdx
@@ -0,0 +1,117 @@
+---
+sidebar_position: 1
+sidebar_label: Overview
+---
+
+# Concepts
+
+This section introduces the core concepts behind **ethdebug/format**.
+Understanding these mental models will help you work with the format
+effectively, whether you're consuming it in a debugger or producing it from a
+compiler.
+
+## The main components
+
+**ethdebug/format** consists of four main kinds of information:
+
+### Types
+
+**Types** describe the structure of data — its shape, not its location. They
+specify what kind of value you're looking at and how its parts relate.
+
+For example, a type might describe:
+
+- A `uint256` (256-bit unsigned integer)
+- An `address` (20-byte account identifier)
+- A `struct` with named fields in a specific order
+- A `mapping` from keys to values
+
+Types don't say where data lives or how to find it. That's what pointers do.
+Together, a type and pointer give debuggers everything needed to locate bytes
+and interpret them as meaningful values.
+
+**[Learn more about types →](/docs/core-schemas/types)**
+
+### Pointers
+
+**Pointers** describe where data lives. They're recipes for finding bytes in
+EVM state.
+
+Simple pointers specify static locations:
+
+- "Storage slot 0"
+- "Memory offset 0x80"
+- "Stack position 2"
+
+Complex pointers describe dynamic locations:
+
+- "Storage slot `keccak256(key, baseSlot)`" for mapping values
+- "Memory at the offset stored in stack position 1" for dynamic references
+
+Pointers can include expressions that compute locations based on runtime state.
+
+**[Learn more about pointers →](/docs/core-schemas/pointers)**
+
+### Programs
+
+**Programs** describe runtime context. They tell a debugger what's happening
+at each point in execution.
+
+Programs answer questions like:
+
+- What source code corresponds to this bytecode instruction?
+- What variables are in scope right now?
+- What function are we in?
+
+This information enables source-level debugging of optimized bytecode.
+
+**[Learn more about programs →](/docs/core-schemas/programs)**
+
+### Info
+
+The **info schema** bundles all the pieces together. It's the top-level
+container that compilers actually emit, containing references to programs,
+types, and shared resources.
+
+An info object provides:
+
+- The compiled programs for each bytecode artifact
+- Shared type definitions referenced across programs
+- Source file content or references
+- Compilation metadata (compiler version, settings)
+
+**[Learn more about the info schema →](/docs/core-schemas/info)**
+
+## How they work together
+
+These components combine to enable rich debugging:
+
+1. A debugger reads **program** information to know which variables are in
+ scope at the current instruction
+2. Each variable has a **type** that describes its structure
+3. Each variable has a **pointer** that describes where to find its value
+4. The debugger resolves the pointer against current EVM state to get raw bytes
+5. The debugger decodes the bytes using the type definition
+6. The user sees meaningful variable values
+
+## Next steps
+
+Dive deeper into each component:
+
+- **[Types concepts](/docs/concepts/types)** — Mental model for type
+ representations
+- **[Pointers concepts](/docs/concepts/pointers)** — Mental model for pointer
+ representations (includes EVM data locations)
+- **[Programs concepts](/docs/concepts/programs)** — Mental model for program
+ representations
+- **[Info schema concepts](/docs/concepts/info)** — Mental model for bundling
+ debug data
+
+Or jump to the reference documentation:
+
+- **[Types reference](/docs/core-schemas/types)** — Full documentation on type definitions
+- **[Pointers reference](/docs/core-schemas/pointers)** — Full documentation on pointer
+ definitions
+- **[Programs reference](/docs/core-schemas/programs)** — Full documentation on program
+ annotations
+- **[Info schema reference](/docs/core-schemas/info)** — Full documentation on info objects
diff --git a/packages/web/docs/concepts/info.mdx b/packages/web/docs/concepts/info.mdx
new file mode 100644
index 000000000..bad5ae814
--- /dev/null
+++ b/packages/web/docs/concepts/info.mdx
@@ -0,0 +1,116 @@
+---
+sidebar_position: 6
+---
+
+import SpecLink from "@site/src/components/SpecLink";
+
+# Info
+
+
+
+This page explains the mental model behind **ethdebug/format** info objects.
+For reference documentation, see the [Info reference](/docs/core-schemas/info).
+
+## The info schema is the top-level container
+
+While types, pointers, and programs describe individual aspects of debug data,
+the **info schema** bundles everything together. It's the top-level container
+that represents a complete unit of debug information for a compilation.
+
+When a compiler produces debug data, it typically emits an info object (or the
+closely related info/resources object) containing all the debug information
+for that compilation.
+
+## What an info object contains
+
+An **ethdebug/format/info** object includes:
+
+- **Programs** — one for each bytecode artifact (create and call bytecode)
+- **Shared types** — type definitions referenced across programs
+- **Sources** — original source files (content or references)
+- **Compilation metadata** — compiler version, settings, and other details
+
+This bundling avoids duplication — when multiple programs reference the same
+type definition or source file, the info object contains it once with
+references pointing to it.
+
+## Two schema variants
+
+The format defines two related schemas:
+
+### ethdebug/format/info
+
+A complete, standalone representation containing all debugging data plus
+compilation metadata. This is what tools consume when they need full debug
+information.
+
+### ethdebug/format/info/resources
+
+A more minimal representation containing compilation-related debug information
+**without** metadata that may already exist elsewhere in compiler output.
+
+The distinction exists because compilers typically already produce structured
+JSON output with compilation metadata. Rather than duplicate that information,
+compilers can emit info/resources objects and let language-specific tooling
+combine them with existing compiler output to produce full info objects.
+
+## Expected workflow
+
+The format anticipates this workflow:
+
+1. **Compilers emit info/resources** — alongside their existing JSON output
+2. **Language tooling combines** — merges info/resources with other compiler
+ output
+3. **Tools consume info objects** — use the complete standalone representation
+
+For example, the Solidity compiler might emit info/resources objects as part
+of its standard JSON output. A separate tool (perhaps `solc-dbg` or similar)
+would then combine these with the rest of solc's output to produce full
+**ethdebug/format/info** objects.
+
+The format recommends that compilers ensure tooling exists to create full info
+objects, whether through custom compilation modes or companion tools.
+
+## Relationship to other schemas
+
+The info schema connects everything:
+
+```
+info
+├── programs[]
+│ ├── instructions[]
+│ │ └── context
+│ │ ├── variables[]
+│ │ │ ├── type (or reference)
+│ │ │ └── pointer
+│ │ └── code (source range)
+│ └── compilation reference
+├── sources[]
+├── compilations[]
+└── types[] (shared definitions)
+```
+
+- **Info objects** reference **programs** — one per bytecode
+- **Programs** reference **types** and **pointers** for variables
+- **Programs** reference **sources** via source ranges in contexts
+- **Types** can reference other **types** by ID (for shared definitions)
+
+## When to use which schema
+
+**Use the info schema** when:
+
+- Building a debugger that consumes debug data
+- Creating standalone debug artifacts
+- Distributing debug information separately from compiler output
+
+**Use the info/resources schema** when:
+
+- Implementing a compiler that already produces structured output
+- Embedding debug data within existing compiler JSON formats
+- Building tooling that will combine with other compiler output
+
+## Next steps
+
+- **[Info schema reference](/docs/core-schemas/info)** — Schema documentation
+- **[Resources](/docs/core-schemas/info/resources)** — Compilation-level shared data
+- **[Info specification](/spec/info)** — Formal schema definitions
diff --git a/packages/web/docs/concepts/pointers.mdx b/packages/web/docs/concepts/pointers.mdx
new file mode 100644
index 000000000..64945b5ce
--- /dev/null
+++ b/packages/web/docs/concepts/pointers.mdx
@@ -0,0 +1,297 @@
+---
+sidebar_position: 4
+---
+
+import SpecLink from "@site/src/components/SpecLink";
+import SchemaExample from "@site/src/components/SchemaExample";
+
+# Pointers
+
+
+
+This page explains the mental model behind **ethdebug/format** pointer
+representations. For reference documentation on regions, expressions, and
+collections, see the [Pointers reference](/docs/core-schemas/pointers).
+
+## Pointers are recipes for finding bytes
+
+A pointer describes **where** data lives, not what it means. It's a recipe that
+a debugger can follow to locate bytes in EVM state.
+
+Simple pointers specify static locations:
+
+
+ {`{
+ "location": "storage",
+ "slot": "0x0"
+}`}
+
+
+This says "the data is in storage slot 0." The debugger can resolve this
+directly against the EVM state.
+
+Complex pointers describe dynamic locations that depend on runtime values:
+
+
+ {`{
+ "location": "storage",
+ "slot": {
+ "$keccak256": [{ "$read": "key-value" }, { "$wordsized": 3 }]
+ }
+}`}
+
+
+This says "compute the slot by hashing a runtime value with the number 3." The
+debugger must evaluate this expression against the current machine state.
+
+Pointers are self-contained — they include everything needed to resolve the
+location.
+
+## EVM data locations
+
+The EVM stores data in several distinct locations. Understanding these is
+essential for working with pointers.
+
+### Storage
+
+**Storage** is persistent data associated with a contract. It survives
+transaction boundaries and is where contracts store their state.
+
+- **Persistent** — values remain until explicitly changed
+- **Slot-based** — organized into 32-byte slots numbered from 0
+- **Contract-specific** — each contract has its own storage space
+
+Storage is where you find contract state variables, mapping contents, and
+dynamic array contents.
+
+### Memory
+
+**Memory** is temporary data that exists only during execution. It's cleared
+between calls.
+
+- **Temporary** — cleared after each external call returns
+- **Byte-addressable** — accessed by byte offset, not slots
+- **Linear** — grows as needed from offset 0
+
+Memory holds function arguments, return data being prepared, and temporary
+values.
+
+### Stack
+
+**The stack** is where the EVM performs computations. It holds operands and
+intermediate results.
+
+- **256-bit words** — each stack item is 32 bytes
+- **Limited depth** — maximum 1024 items
+- **LIFO** — last in, first out access pattern
+
+The stack contains function arguments (for internal calls), local variables,
+and intermediate computation results.
+
+### Calldata
+
+**Calldata** is the read-only input data sent to a contract when called.
+
+- **Read-only** — cannot be modified during execution
+- **Byte-addressable** — accessed by byte offset
+- **Cheap to read** — cheaper than memory or storage reads
+
+Calldata contains the function selector (first 4 bytes) and ABI-encoded
+function arguments.
+
+### Returndata
+
+**Returndata** is the output from the most recent external call.
+
+- **Read-only** — set by called contract, read by caller
+- **Replaced on each call** — each external call overwrites previous returndata
+- **Byte-addressable** — accessed by byte offset
+
+### Code
+
+**Code** refers to the contract's bytecode itself. Sometimes data is embedded
+in bytecode.
+
+- **Immutable** — cannot change after deployment
+- **Byte-addressable** — accessed by byte offset
+
+Code is where you find immutable variables and embedded constants.
+
+### Transient storage
+
+**Transient storage** (EIP-1153) is storage that persists within a transaction
+but is cleared afterward.
+
+- **Transaction-scoped** — persists across calls within a transaction
+- **Cleared after transaction** — does not persist to the next transaction
+- **Slot-based** — like storage, organized into 32-byte slots
+
+### Summary
+
+| Location | Persistence | Addressing | Primary use |
+| ---------- | ------------------ | -------------- | ------------------- |
+| Storage | Permanent | 32-byte slots | Contract state |
+| Memory | Single call | Byte offset | Temporary data |
+| Stack | Instruction-level | Position index | Computation |
+| Calldata | Single call | Byte offset | Input parameters |
+| Returndata | Until next call | Byte offset | Call results |
+| Code | Permanent | Byte offset | Bytecode/immutables |
+| Transient | Single transaction | 32-byte slots | Tx-scoped state |
+
+## A pointer is a region or a collection
+
+The **ethdebug/format/pointer** schema is recursive: a pointer is either a
+**region** (a single continuous byte range) or a **collection** (an
+aggregation of other pointers).
+
+### Regions
+
+A **region** represents a single continuous range of bytes at a specific
+location. Different locations use different region schemas:
+
+**Slice-based regions** (memory, calldata, returndata, code) specify an offset
+and length:
+
+
+ {`{
+ "location": "memory",
+ "offset": "0x80",
+ "length": 32
+}`}
+
+
+**Segment-based regions** (storage, stack, transient) specify a slot, with
+optional offset and length for packed values:
+
+
+ {`{
+ "location": "storage",
+ "slot": 5,
+ "offset": 0,
+ "length": 16
+}`}
+
+
+Regions can be **named** for reference elsewhere in the pointer:
+
+
+ {`{
+ "name": "array-length",
+ "location": "storage",
+ "slot": 0
+}`}
+
+
+### Collections
+
+A **collection** aggregates multiple pointers. Six collection types exist
+for different purposes:
+
+- **group** — combine pointers statically (e.g., struct members)
+- **list** — generate a sequence of pointers (e.g., array elements)
+- **conditional** — choose between pointers based on a runtime condition
+- **scope** — define variables for use in nested pointers
+- **reference** — refer to a previously defined template
+- **templates** — define reusable pointer patterns with expected variables
+
+## Expressions enable dynamic computation
+
+Static offsets and slots aren't enough for real-world data. Array elements,
+mapping values, and many other locations depend on runtime values.
+
+**Expressions** let pointers compute addresses dynamically:
+
+
+ {`{
+ "location": "storage",
+ "slot": {
+ "$sum": [{ "$keccak256": [{ "$wordsized": 5 }] }, "element-index"]
+ }
+}`}
+
+
+Expressions support:
+
+- **Arithmetic**: `$sum`, `$difference`, `$product`, `$quotient`, `$remainder`
+- **Reading values**: `$read` retrieves bytes from a named region
+- **Region properties**: `.offset`, `.length`, `.slot` reference region fields
+- **Hashing**: `$keccak256` computes storage slots for dynamic data
+- **Data manipulation**: `$concat`, `$sized`, `$wordsized`
+
+Variables in expressions come from list iteration (`each`) or scope definitions
+(`define`).
+
+## Addressing schemes
+
+Regions use one of two addressing schemes based on their location:
+
+**Slice-based addressing** (memory, calldata, returndata, code):
+
+- `offset` — byte position from the start
+- `length` — number of bytes
+
+**Segment-based addressing** (storage, stack, transient):
+
+- `slot` — 256-bit slot number
+- `offset` (optional) — byte offset within the slot for packed values
+- `length` (optional) — number of bytes for packed values
+
+## Named regions enable composition
+
+Giving a region a `name` lets you reference it elsewhere:
+
+
+ {`{
+ "group": [
+ {
+ "name": "base-pointer",
+ "location": "stack",
+ "slot": 0
+ },
+ {
+ "location": "memory",
+ "offset": { "$read": "base-pointer" },
+ "length": 32
+ }
+ ]
+}`}
+
+
+The second region's offset comes from reading the value in the first region.
+This pattern is essential for describing data whose location is stored in
+another location (like memory pointers on the stack).
+
+## Next steps
+
+- **[Regions](/docs/core-schemas/pointers/regions)** — Reference for all region types
+- **[Expressions](/docs/core-schemas/pointers/expressions)** — Reference for expression
+ syntax
+- **[Collections](/docs/core-schemas/pointers/collections)** — Reference for collection
+ types
+- **[Pointer specification](/spec/pointer)** — Formal schema definitions
diff --git a/packages/web/docs/concepts/programs.mdx b/packages/web/docs/concepts/programs.mdx
new file mode 100644
index 000000000..334deb1e1
--- /dev/null
+++ b/packages/web/docs/concepts/programs.mdx
@@ -0,0 +1,180 @@
+---
+sidebar_position: 5
+---
+
+import SpecLink from "@site/src/components/SpecLink";
+import SchemaExample from "@site/src/components/SchemaExample";
+
+# Programs
+
+
+
+This page explains the mental model behind **ethdebug/format** program
+representations. For reference documentation on instructions, variables, and
+tracing, see the [Programs reference](/docs/core-schemas/programs).
+
+## Programs map bytecode to high-level context
+
+When a compiler transforms source code into bytecode, it creates a gap between
+what developers wrote and what actually executes. A **program** bridges this
+gap by providing high-level context for each bytecode instruction.
+
+Programs enable debuggers to answer:
+
+- What source code corresponds to this instruction?
+- What variables are in scope right now?
+- What function are we in?
+- Should this instruction be treated as "stepping into" a call?
+
+## Programs correspond to bytecode
+
+Each program is associated with a specific piece of bytecode:
+
+- **Call bytecode** — executed when a contract receives a message
+- **Create bytecode** — executed during contract deployment
+
+The same contract typically has both: create bytecode runs once during
+deployment, call bytecode runs whenever the contract is invoked afterward.
+
+Think of it as: "you have this bytecode → here's its program." A program
+references the compilation that produced the bytecode, linking back to
+source files and compiler metadata through the
+[info/resources](/docs/core-schemas/info/resources) schema.
+
+## Instruction listings
+
+Programs contain a sequential list of **instructions**, one for each machine
+instruction in the bytecode. Each instruction specifies:
+
+- **offset** — the byte position in the bytecode (equal to the program counter
+ on non-EOF EVMs)
+- **context** — high-level information about this point in execution
+
+Instructions are ordered to match the bytecode, enabling fast lookup by offset.
+Not every byte offset has an entry — only positions where opcodes begin.
+
+
+ {`{
+ "instructions": [
+ { "offset": 0, "context": { /* ... */ } },
+ { "offset": 1, "context": { /* ... */ } },
+ { "offset": 4, "context": { /* ... */ } }
+ ]
+}`}
+
+
+## Context information
+
+Each instruction's **context** describes what's true at that point in
+execution. Context information may include:
+
+### Source ranges
+
+Which source code relates to this instruction:
+
+
+ {`{
+ "code": {
+ "source": { "id": "source-1" },
+ "range": { "offset": 150, "length": 25 }
+ }
+}`}
+
+
+### Variables
+
+What variables are in scope and where to find their values:
+
+
+ {`{
+ "variables": [
+ {
+ "identifier": { "name": "balance" },
+ "type": { "kind": "uint", "bits": 256 },
+ "pointer": { "location": "storage", "slot": 0 }
+ }
+ ]
+}`}
+
+
+Each variable has an identifier, a type, and a pointer. The pointer tells the
+debugger where to find the variable's current value.
+
+### Control flow hints
+
+Information about stepping behavior:
+
+
+ {`{
+ "frame": {
+ "step-in": true
+ }
+}`}
+
+
+This tells debuggers whether an instruction represents entering or leaving a
+function call.
+
+## Context is valid after instruction execution
+
+An instruction's context describes the state that exists **after** that
+instruction completes. This timing is important:
+
+- Before the instruction runs, the previous context applies
+- After the instruction runs, this context applies
+
+For example, if an instruction stores a value in a variable, the variable's
+pointer in that instruction's context points to where the value now lives.
+
+## Contexts as state transitions
+
+A debugger maintains a model of the high-level program state as it steps
+through execution. Each context encountered serves as a state transition:
+
+1. Debugger observes the program counter
+2. Looks up the instruction at that offset
+3. Reads the context to learn what changed
+4. Updates its high-level state model
+5. Continues to the next instruction
+
+Contexts can be composed using:
+
+- **gather** — combine multiple context pieces together
+- **pick** — choose a context based on a runtime condition
+- **remark** — add metadata without changing scope
+
+This composition enables describing complex scenarios like conditional variable
+assignments or function inlining.
+
+## What tracing enables
+
+By following contexts through execution, debuggers can provide:
+
+1. **Source mapping** — show the current line in source code
+2. **Variable inspection** — display current values of in-scope variables
+3. **Call stacks** — reconstruct function call history
+4. **Data structure visualization** — present arrays and mappings meaningfully
+5. **Control flow insight** — indicate loop iterations, function boundaries
+
+The program schema provides the compile-time guarantees that make runtime
+debugging possible.
+
+## Next steps
+
+- **[Instructions](/docs/core-schemas/programs/instructions)** — Reference for instruction
+ structure
+- **[Variables](/docs/core-schemas/programs/variables)** — Reference for variable
+ definitions
+- **[Tracing](/docs/core-schemas/programs/tracing)** — Guide to using programs during
+ execution
+- **[Program specification](/spec/program)** — Formal schema definitions
diff --git a/packages/web/docs/concepts/types.mdx b/packages/web/docs/concepts/types.mdx
new file mode 100644
index 000000000..f51c8bb09
--- /dev/null
+++ b/packages/web/docs/concepts/types.mdx
@@ -0,0 +1,212 @@
+---
+sidebar_position: 3
+---
+
+import SpecLink from "@site/src/components/SpecLink";
+import SchemaExample from "@site/src/components/SchemaExample";
+
+# Types
+
+
+
+This page explains the mental model behind **ethdebug/format** type
+representations. For reference documentation on specific type kinds, see the
+[Types reference](/docs/core-schemas/types).
+
+## Types describe structure, not location
+
+A type definition tells you what shape data takes — not where that data lives.
+For example, a type might say "this is an array of uint256 values" without
+specifying whether those values are in storage, memory, or calldata.
+
+This separation is intentional:
+
+- **Types** describe the logical structure (what the data means)
+- **Pointers** describe the physical location (where to find the bytes)
+
+Together, they enable a debugger to find bytes (via the pointer) and interpret
+them correctly (via the type). The same type definition can be used regardless
+of where the data happens to be stored.
+
+## All types have a `kind` field
+
+Every type representation is a JSON object with a `kind` field that identifies
+what kind of type it is:
+
+
+ {`{
+ "kind": "uint",
+ "bits": 256
+}`}
+
+
+
+ {`{
+ "kind": "bool"
+}`}
+
+
+The `kind` field serves as a discriminator, telling parsers which schema to use
+for validation and how to interpret the rest of the object.
+
+## Known vs. unknown kinds
+
+**ethdebug/format** defines specific schemas for known type kinds. These
+correspond to reserved string values for `kind`:
+
+- Elementary types: `uint`, `int`, `bool`, `address`, `bytes`, `string`,
+ `fixed`, `ufixed`, `enum`, `contract`
+- Complex types: `array`, `struct`, `mapping`, `tuple`, `alias`, `function`
+
+Type representations **should** use the specific schema when representing a
+known type. They **must not** reuse reserved `kind` values for other purposes.
+
+For custom types or types not covered by the format, you **may** use other
+`kind` values with associated external schemas. This extensibility allows the
+format to support new languages and type systems.
+
+## The base type schema
+
+All type representations — both known and unknown kinds — must conform to the
+[base type schema](/spec/type/base). This ensures a minimum level of structure
+even for custom types.
+
+Known types have specific subschemas that extend the base with additional
+required fields. Unknown types must still satisfy the base constraints plus
+any additional requirements for unknown types.
+
+## Elementary vs. complex types
+
+Types fall into one of two classes:
+
+**Elementary types** don't contain other types. They represent atomic values:
+
+- `uint256` — an unsigned integer
+- `address` — a 20-byte account identifier
+- `bool` — true or false
+
+**Complex types** compose one or more other types:
+
+- `uint256[]` — an array containing uint256 elements
+- A struct with multiple member types
+- A mapping from one type to another
+
+This distinction is expressed through the presence or absence of a `contains`
+field. Complex types always have `contains`; elementary types never do.
+
+## The `contains` field for complex types
+
+Complex types use `contains` to specify what types they compose. This field is
+polymorphic — it takes one of three forms depending on the type kind:
+
+### Single type (e.g., arrays)
+
+Arrays compose exactly one element type:
+
+
+ {`{
+ "kind": "array",
+ "contains": {
+ "type": {
+ "kind": "uint",
+ "bits": 256
+ }
+ }
+}`}
+
+
+### Ordered list (e.g., structs)
+
+Structs compose an ordered list of named members:
+
+
+ {`{
+ "kind": "struct",
+ "contains": [
+ {
+ "name": "balance",
+ "type": { "kind": "uint", "bits": 256 }
+ },
+ {
+ "name": "owner",
+ "type": { "kind": "address" }
+ }
+ ]
+}`}
+
+
+Member order matters — it typically matches declaration order and affects
+storage layout.
+
+### Object mapping (e.g., mappings)
+
+Mappings compose a key type and a value type:
+
+
+ {`{
+ "kind": "mapping",
+ "contains": {
+ "key": { "type": { "kind": "address" } },
+ "value": { "type": { "kind": "uint", "bits": 256 } }
+ }
+}`}
+
+
+## Type wrappers and references
+
+Notice how types in `contains` are wrapped in `{ "type": ... }` objects. These
+**type wrappers** serve two purposes:
+
+1. They allow additional properties alongside the type (like `"name"` for
+ struct members)
+2. They enable type references
+
+Instead of duplicating a type definition, you can reference it by ID:
+
+
+ {`{
+ "type": {
+ "id": "some-opaque-id"
+ }
+}`}
+
+
+IDs can be strings or numbers. This enables:
+
+- Avoiding duplication when the same type appears multiple times
+- Representing recursive types (a type that contains itself)
+- Sharing types across multiple pointers or programs
+
+## Types with definitions
+
+Some types originate from source code definitions — structs, enums, and type
+aliases are explicitly defined by developers. These types may include a
+`definition` field specifying the type's name and source location:
+
+
+ {`{
+ "kind": "enum",
+ "definition": {
+ "name": "Status",
+ "source": {
+ "id": "source-id",
+ "range": { "offset": 100, "length": 45 }
+ }
+ },
+ "values": ["Pending", "Active", "Completed"]
+}`}
+
+
+The `definition` field is optional even for these types, but when present it
+enables debuggers to display the type's declared name and navigate to its
+definition in source code.
+
+## Next steps
+
+- **[Elementary types](/docs/core-schemas/types/elementary)** — Reference for atomic types
+- **[Composite types](/docs/core-schemas/types/composite)** — Reference for complex types
+- **[Type specification](/spec/type)** — Formal schema definitions
diff --git a/packages/web/docs/core-schemas/_category_.json b/packages/web/docs/core-schemas/_category_.json
new file mode 100644
index 000000000..0fbf76d9e
--- /dev/null
+++ b/packages/web/docs/core-schemas/_category_.json
@@ -0,0 +1,4 @@
+{
+ "label": "Core schemas",
+ "position": 4
+}
diff --git a/packages/web/docs/core-schemas/info/_category_.json b/packages/web/docs/core-schemas/info/_category_.json
new file mode 100644
index 000000000..4bb5cd2af
--- /dev/null
+++ b/packages/web/docs/core-schemas/info/_category_.json
@@ -0,0 +1,4 @@
+{
+ "label": "Info",
+ "position": 4
+}
diff --git a/packages/web/docs/core-schemas/info/index.mdx b/packages/web/docs/core-schemas/info/index.mdx
new file mode 100644
index 000000000..9e74a629d
--- /dev/null
+++ b/packages/web/docs/core-schemas/info/index.mdx
@@ -0,0 +1,103 @@
+---
+sidebar_position: 1
+---
+
+import SpecLink from "@site/src/components/SpecLink";
+import SchemaExample from "@site/src/components/SchemaExample";
+
+# Info
+
+
+
+The **info schema** is the top-level container for **ethdebug/format** debug
+data. It bundles programs, types, sources, and compilation metadata into a
+single representation.
+
+For the mental model behind info objects, see
+[Concepts: Info](/docs/concepts/info).
+
+## What the info schema provides
+
+The **ethdebug/format/info** schema represents a complete unit of debug
+information for a compilation:
+
+- **Programs** for each bytecode artifact (create and call)
+- **Shared type definitions** referenced across programs
+- **Source files** (content or references)
+- **Compilation metadata** (compiler version, settings)
+
+## Schema overview
+
+An info object at minimum includes:
+
+
+ {`{
+ "$schema": "https://ethdebug.github.io/format/schema/info.json",
+ "programs": [
+ { /* program for create bytecode */ },
+ { /* program for call bytecode */ }
+ ],
+ "sources": [
+ { "id": "source-1", "path": "contracts/Token.sol" }
+ ]
+}`}
+
+
+## When to use the info schema
+
+Use **ethdebug/format/info** when you need:
+
+- A complete, standalone debug artifact
+- Debug data separate from compiler output
+- Input for debuggers and analysis tools
+
+For embedding debug data within compiler output, see
+[info/resources](/docs/core-schemas/info/resources).
+
+## Navigation
+
+
+
+
+
+
Resources
+
+
+ Compilation-level shared data: sources, compilations, and shared types.
+
+
+## Related concepts
+
+- **[Programs](/docs/core-schemas/programs)** — Runtime context for each bytecode
+- **[Types](/docs/core-schemas/types)** — Data structure descriptions
+- **[Pointers](/docs/core-schemas/pointers)** — Data location descriptions
diff --git a/packages/web/docs/core-schemas/info/resources.mdx b/packages/web/docs/core-schemas/info/resources.mdx
new file mode 100644
index 000000000..07ef04a01
--- /dev/null
+++ b/packages/web/docs/core-schemas/info/resources.mdx
@@ -0,0 +1,165 @@
+---
+sidebar_position: 2
+pagination_next: examples/index
+---
+
+import SpecLink from "@site/src/components/SpecLink";
+import SchemaExample from "@site/src/components/SchemaExample";
+
+# Resources
+
+
+
+The **ethdebug/format/info/resources** schema represents compilation-level
+shared data — information that multiple programs within a compilation might
+reference.
+
+## What resources contains
+
+Resources provide a way to avoid duplication by centralizing:
+
+- **Sources** — original source file content or references
+- **Compilations** — compiler invocation metadata
+- **Shared types** — type definitions used by multiple programs
+
+## Sources
+
+Sources represent the original source files that were compiled. Each source has
+an identifier and either contains the full content or references an external
+location:
+
+
+ {`{
+ "sources": [
+ {
+ "id": "source-1",
+ "path": "contracts/Token.sol",
+ "contents": "// SPDX-License-Identifier: MIT\\npragma solidity ^0.8.0;\\n..."
+ },
+ {
+ "id": "source-2",
+ "path": "contracts/Utils.sol"
+ }
+ ]
+}`}
+
+
+Sources can include:
+
+- **path** — file path (relative or absolute)
+- **contents** — the actual source code text
+- **contentHash** — hash of the contents for verification
+
+When `contents` is omitted, tools must retrieve the source from the file system
+or another source.
+
+## Source ranges
+
+Throughout **ethdebug/format**, source locations are specified as ranges within
+a source file:
+
+
+ {`{
+ "source": { "id": "source-1" },
+ "range": {
+ "offset": 150,
+ "length": 25
+ }
+}`}
+
+
+This identifies bytes 150-174 in the source with id "source-1". Ranges use byte
+offsets, not line/column positions.
+
+## Compilations
+
+Compilation metadata captures information about how the code was compiled:
+
+
+ {`{
+ "compilations": [
+ {
+ "id": "compilation-1",
+ "compiler": {
+ "name": "solc",
+ "version": "0.8.20"
+ },
+ "sources": ["source-1", "source-2"],
+ "settings": { /* compiler settings */ }
+ }
+ ]
+}`}
+
+
+Programs reference their compilation, allowing debuggers to understand the
+build context.
+
+## Shared types
+
+Type definitions that appear in multiple programs can be defined once in
+resources and referenced by ID:
+
+
+ {`{
+ "types": [
+ {
+ "id": "type-balance",
+ "kind": "uint",
+ "bits": 256
+ },
+ {
+ "id": "type-user",
+ "kind": "struct",
+ "definition": { "name": "User" },
+ "contains": [
+ { "name": "balance", "type": { "id": "type-balance" } },
+ { "name": "active", "type": { "kind": "bool" } }
+ ]
+ }
+ ]
+}`}
+
+
+Programs then reference these types by ID rather than duplicating the full
+definition.
+
+## Info vs. resources
+
+The key distinction:
+
+- **info** — complete standalone representation with full metadata
+- **info/resources** — debug data without duplicating existing compiler output
+
+Since compilers typically already produce structured JSON with compilation
+metadata, they can emit resources objects and let tooling combine them into
+full info objects.
+
+## Related schemas
+
+Resources builds on schemas from the **ethdebug/format/materials** namespace:
+
+- **[Source](/spec/materials/source)** — individual source file representation
+- **[Source range](/spec/materials/source-range)** — location within a source
+- **[Compilation](/spec/materials/compilation)** — compiler invocation details
+- **[Identifiers](/spec/materials/id)** — ID and reference patterns
+
+## Specification
+
+For the formal schema definition, see the
+[info/resources specification](/spec/info/resources).
diff --git a/packages/web/docs/core-schemas/pointers/_category_.json b/packages/web/docs/core-schemas/pointers/_category_.json
new file mode 100644
index 000000000..330c8d566
--- /dev/null
+++ b/packages/web/docs/core-schemas/pointers/_category_.json
@@ -0,0 +1,4 @@
+{
+ "label": "Pointers",
+ "position": 2
+}
diff --git a/packages/web/docs/core-schemas/pointers/collections.mdx b/packages/web/docs/core-schemas/pointers/collections.mdx
new file mode 100644
index 000000000..dc46151d1
--- /dev/null
+++ b/packages/web/docs/core-schemas/pointers/collections.mdx
@@ -0,0 +1,363 @@
+---
+sidebar_position: 4
+---
+
+import SpecLink from "@site/src/components/SpecLink";
+import { PointerPlayground, PointerExample } from "@theme/PointersExample";
+
+# Collections
+
+
+
+
+
+While [regions](./regions) describe single contiguous byte ranges, **collections**
+aggregate multiple pointers together. Collections handle cases where data
+structures span multiple locations or have dynamic configurations.
+
+## Why collections?
+
+Consider a Solidity struct with multiple fields, or a dynamic array whose
+length isn't known at compile time. These require more than pointing to a
+single memory location—they need to describe relationships between multiple
+pointers or generate pointers based on runtime values.
+
+Collections provide six patterns for composing pointers:
+
+| Collection | Purpose |
+| ------------- | -------------------------------------------- |
+| `group` | Combine multiple pointers into one |
+| `list` | Generate pointers for indexed sequences |
+| `conditional` | Choose between pointers based on a condition |
+| `scope` | Define variables for use in nested pointers |
+| `reference` | Invoke reusable pointer templates |
+| `templates` | Define inline templates for local reuse |
+
+Click **"▶ Try it"** on any example to load it into the Pointer Playground
+drawer at the bottom of the screen.
+
+## Group
+
+A **group** combines multiple pointers into a single composite pointer. Each
+pointer in the group can have a name for identification.
+
+
+
+Groups are useful for structs and other compound data types where multiple
+fields need to be accessed together.
+
+## List
+
+A **list** generates a sequence of pointers based on a count expression. This
+handles dynamic arrays and other indexed collections.
+
+:::tip[Terminology note]
+The `each` property introduces a **pointer variable** — a named binding for use
+in expressions. This is different from **program variables** (source-level
+identifiers with types and pointers). See the
+[glossary](/docs/reference/glossary) for complete definitions.
+:::
+
+
+
+The list evaluates `count` to determine how many pointers to generate, then
+for each index (bound to the variable named by `each`), it evaluates the `is`
+pointer template.
+
+### List with dynamic count
+
+When the count comes from storage:
+
+
+
+### List properties
+
+| Property | Description |
+| -------- | --------------------------------------------------- |
+| `count` | Expression evaluating to the number of items |
+| `each` | Variable name for the current index (starting at 0) |
+| `is` | Pointer template evaluated for each index |
+
+## Conditional
+
+A **conditional** selects between pointers based on whether an expression
+evaluates to a non-zero value.
+
+
+
+### Conditional properties
+
+| Property | Description |
+| -------- | ---------------------------------------- |
+| `if` | Expression to evaluate (non-zero = true) |
+| `then` | Pointer to use when condition is true |
+| `else` | Optional pointer when condition is false |
+
+## Scope
+
+A **scope** defines variables that can be used in a nested pointer. Variables
+are evaluated in order, so later variables can reference earlier ones.
+
+
+
+Scopes help break complex pointer definitions into readable steps. For examples
+combining scopes with keccak256 for storage slot computation, see the
+[expressions documentation](./expressions#computing-storage-slots-with-keccak256).
+
+### Scope properties
+
+| Property | Description |
+| -------- | --------------------------------------------- |
+| `define` | Object mapping variable names to expressions |
+| `in` | Pointer where defined variables are available |
+
+## Reference and templates
+
+A **reference** invokes a named pointer template, while **templates** defines
+them inline. These work together for reusable pointer patterns.
+
+
+
+### Template definition
+
+Each template in the `templates` object has:
+
+| Property | Description |
+| -------- | -------------------------------- |
+| `expect` | Array of required variable names |
+| `for` | The pointer template body |
+
+### Reference properties
+
+| Property | Description |
+| ---------- | ---------------------------------------------------------- |
+| `template` | Name of the template to invoke |
+| `yields` | Optional object mapping template region names to new names |
+
+## Nesting collections
+
+Collections can be nested to build complex pointer structures. A group might
+contain lists, conditionals might wrap groups, and scopes can define variables
+used throughout nested collections.
+
+
+
+## Named regions in collections
+
+Pointers within collections can include a `name` property. Named regions are
+tracked during resolution and can be referenced using the `.slot`, `.offset`,
+and `.length` syntax in expressions.
+
+
+
+## Learn more
+
+- [Regions](./regions) for simple pointer definitions
+- [Expressions](./expressions) for dynamic value computation
+- [Pointer specification](/spec/pointer/collection) for formal definitions
+
+
diff --git a/packages/web/docs/core-schemas/pointers/expressions.mdx b/packages/web/docs/core-schemas/pointers/expressions.mdx
new file mode 100644
index 000000000..ddca75b23
--- /dev/null
+++ b/packages/web/docs/core-schemas/pointers/expressions.mdx
@@ -0,0 +1,473 @@
+---
+sidebar_position: 3
+---
+
+import SpecLink from "@site/src/components/SpecLink";
+import SchemaExample from "@site/src/components/SchemaExample";
+import { PointerPlayground, PointerExample } from "@theme/PointersExample";
+
+# Expressions
+
+
+
+
+
+Static offsets work for simple variables, but most interesting data has
+locations that depend on runtime values. Expressions let pointers compute
+addresses dynamically.
+
+## Why expressions are needed
+
+Consider reading element `i` from a memory array. The element's location
+depends on:
+
+- Where the array starts (might come from the free memory pointer)
+- Which element we want (the index `i`)
+- How big each element is (32 bytes for `uint256`)
+
+A static pointer can't capture this. Expressions can:
+
+
+
+## Arithmetic expressions
+
+Basic math operations for computing addresses:
+
+### `$sum` — Addition
+
+Adds all values in an array:
+
+
+
+### `$difference` — Subtraction
+
+Subtracts the second value from the first (saturates at zero):
+
+
+
+### `$product` — Multiplication
+
+Multiplies all values in an array:
+
+
+
+### `$quotient` — Division
+
+Integer division of first value by second:
+
+
+
+### `$remainder` — Modulo
+
+Remainder after division:
+
+
+
+## Reading values
+
+### `$read` — Read from a named region
+
+Reads the bytes from a previously defined region:
+
+
+
+The `$read` expression retrieves the actual runtime value stored in the
+`array-length-slot` region—the array's length.
+
+## Region property lookups
+
+Reference properties of named regions with `.property` syntax:
+
+### `.offset` — Region's offset
+
+
+ {`{ ".offset": "previous-element" }`}
+
+
+Returns the offset of the named region.
+
+### `.length` — Region's length
+
+
+ {`{ ".length": "previous-element" }`}
+
+
+Returns the length of the named region.
+
+### `.slot` — Region's slot
+
+
+ {`{ ".slot": "base-slot" }`}
+
+
+Returns the slot number for storage/stack/transient regions.
+
+### Chaining lookups
+
+Compute the next element's position from the previous one:
+
+
+
+## Computing storage slots with `$keccak256`
+
+Solidity uses keccak256 hashing to compute storage locations for dynamic data.
+
+### Array element slots
+
+For a dynamic array at slot `n`, elements start at `keccak256(n)`:
+
+
+
+### Mapping value slots
+
+For a mapping at slot `n`, the value for key `k` is at `keccak256(k, n)`:
+
+
+
+### Nested mappings
+
+For `mapping(address => mapping(uint => uint))` at slot 2:
+
+
+
+This computes: `keccak256(inner_key, keccak256(outer_key, 2))`
+
+## Data manipulation
+
+### `$concat` — Concatenate bytes
+
+Joins byte sequences without padding:
+
+
+
+Useful for building hash inputs from multiple values.
+
+### `$sized` — resize to N bytes
+
+Truncates or pads to exactly N bytes:
+
+
+
+Pads with zeros on the left; truncates from the left if too long.
+
+### `$wordsized` — Resize to word size
+
+Equivalent to `$sized32` on the EVM:
+
+
+
+## Variables in expressions
+
+Expressions can reference variables by name. These come from list pointer
+contexts:
+
+
+
+The variable `"i"` takes values from 0 to count-1, computing each element's
+slot.
+
+## Complete example: dynamic array element
+
+Reading element `i` from `uint256[] storage arr` at slot 5:
+
+
+
+The pointer:
+
+1. Defines the array's base slot
+2. Computes the element's slot: `keccak256(5) + element_index`
+3. Returns that storage location
+
+## Learn more
+
+- [Regions documentation](./regions) for region structure
+- [Expression specification](/spec/pointer/expression) for the complete
+ expression language
+- [Implementation guide](/docs/implementation-guides/pointers/evaluating-expressions)
+ for building an expression evaluator
+
+
diff --git a/packages/web/docs/core-schemas/pointers/index.mdx b/packages/web/docs/core-schemas/pointers/index.mdx
new file mode 100644
index 000000000..db33d46b7
--- /dev/null
+++ b/packages/web/docs/core-schemas/pointers/index.mdx
@@ -0,0 +1,114 @@
+---
+sidebar_position: 1
+---
+
+import SpecLink from "@site/src/components/SpecLink";
+
+# Pointers
+
+
+
+Pointers describe where data lives in the EVM. They're recipes that tell
+debuggers how to find bytes at runtime — recipes that can depend on the
+current machine state.
+
+For the mental model behind pointers (including EVM data locations), see
+[Concepts: Pointers](/docs/concepts/pointers).
+
+## Reference documentation
+
+
+
+
+
+
Regions
+
+
+
How to specify byte ranges in different data locations.
diff --git a/packages/web/docs/core-schemas/pointers/regions.mdx b/packages/web/docs/core-schemas/pointers/regions.mdx
new file mode 100644
index 000000000..96cede088
--- /dev/null
+++ b/packages/web/docs/core-schemas/pointers/regions.mdx
@@ -0,0 +1,234 @@
+---
+sidebar_position: 2
+---
+
+import SpecLink from "@site/src/components/SpecLink";
+import { PointerPlayground, PointerExample } from "@theme/PointersExample";
+
+# Regions
+
+
+
+
+
+A region represents a contiguous block of bytes in a specific EVM data
+location. Regions are the leaves of the pointer tree—the actual byte ranges
+that hold data.
+
+## Addressing schemes
+
+The EVM uses two different models for organizing bytes, and regions reflect
+this:
+
+### Slice-based locations
+
+**Memory**, **calldata**, **returndata**, and **code** are byte-addressable.
+Regions in these locations use `offset` and `length`:
+
+
+
+- `offset`: byte position from the start (required)
+- `length`: number of bytes (optional; may be computed or implied by type)
+
+### Slot-based locations
+
+**Storage**, **transient storage**, and **stack** are organized in 32-byte
+slots. Regions use `slot`:
+
+
+
+For storage and transient storage, values that don't fill a full slot can
+specify sub-slot positioning:
+
+
+
+This addresses 20 bytes starting at byte 12 within slot 0—useful for packed
+storage.
+
+## Location-specific details
+
+### Memory
+
+Memory is a simple byte array that grows as needed:
+
+
+
+Memory addresses often come from the free memory pointer (stored at `0x40`).
+
+### Storage
+
+Storage persists between transactions. Slots are 32-byte words addressed by
+256-bit keys:
+
+
+
+Slot addresses can be literal numbers, hex strings, or computed expressions.
+
+### Stack
+
+The EVM stack holds up to 1024 words. Slot 0 is the top:
+
+
+
+Stack regions are typically read-only from a debugging perspective—you observe
+values but don't address sub-ranges.
+
+### Calldata
+
+Function arguments arrive in calldata, read-only and byte-addressable:
+
+
+
+The first 4 bytes are typically the function selector; arguments follow.
+
+### Returndata
+
+After a call, the returned data is accessible:
+
+
+
+### Code
+
+Contract bytecode can be read as data:
+
+
+
+This is used for immutable variables and other data embedded in bytecode.
+
+### Transient storage
+
+Transient storage (EIP-1153) persists only within a transaction:
+
+
+
+Uses the same slot-based addressing as regular storage.
+
+## Naming regions
+
+Any region can have a `name` that other parts of the pointer reference:
+
+
+
+Names enable:
+
+- Reading the region's value with `{ "$read": "token-balance" }`
+- Referencing properties with `{ ".slot": "token-balance" }`
+- Building self-documenting pointer structures
+
+## Dynamic addresses
+
+Region fields like `offset`, `slot`, and `length` can use expressions to
+compute values at runtime. This enables pointers for dynamic data like arrays
+and mappings.
+
+For the full expression language including arithmetic, `$keccak256`, and value
+reading, see [expressions](./expressions).
+
+## Learn more
+
+For complete schemas for each location type, see the
+[pointer region specification](/spec/pointer/region).
+
+
diff --git a/packages/web/docs/core-schemas/programs/_category_.json b/packages/web/docs/core-schemas/programs/_category_.json
new file mode 100644
index 000000000..c75b81e20
--- /dev/null
+++ b/packages/web/docs/core-schemas/programs/_category_.json
@@ -0,0 +1,4 @@
+{
+ "label": "Programs",
+ "position": 3
+}
diff --git a/packages/web/docs/core-schemas/programs/index.mdx b/packages/web/docs/core-schemas/programs/index.mdx
new file mode 100644
index 000000000..5fd80bd62
--- /dev/null
+++ b/packages/web/docs/core-schemas/programs/index.mdx
@@ -0,0 +1,93 @@
+---
+sidebar_position: 1
+---
+
+import SpecLink from "@site/src/components/SpecLink";
+
+# Programs
+
+
+
+Programs describe the high-level context at each point in EVM bytecode
+execution. They're the bridge between raw machine instructions and the
+source code developers wrote.
+
+For the mental model behind programs, see
+[Concepts: Programs](/docs/concepts/programs).
+
+## Reference documentation
+
+
+
+
+
+
Instructions
+
+
+
How instruction records map bytecode to source and context.
+
+### Explore by topic
+
+- **[Types](/docs/core-schemas/types)** — Full documentation on type definitions
+- **[Pointers](/docs/core-schemas/pointers)** — Full documentation on pointer definitions
+- **[Programs](/docs/core-schemas/programs)** — Full documentation on program annotations
+- **[Info schema](/docs/core-schemas/info)** — Full documentation on bundling debug data
+- **[BUG Playground](/docs/examples/bug-playground)** — See a working
+ compiler that emits **ethdebug/format**
diff --git a/packages/web/docs/getting-started/for-debugger-authors.mdx b/packages/web/docs/getting-started/for-debugger-authors.mdx
new file mode 100644
index 000000000..59646ff5c
--- /dev/null
+++ b/packages/web/docs/getting-started/for-debugger-authors.mdx
@@ -0,0 +1,141 @@
+---
+sidebar_position: 2
+---
+
+import SchemaExample from "@site/src/components/SchemaExample";
+
+# For debugger authors
+
+You're building a debugger, transaction tracer, or analysis tool. Here's how
+**ethdebug/format** helps you understand smart contract execution.
+
+## What the format gives you
+
+With **ethdebug/format** data, your debugger can:
+
+- **Locate variable values at runtime** — Resolve pointers to find data in
+ storage, memory, or the stack, even when locations are computed dynamically
+- **Interpret bytes as meaningful values** — Use type definitions to
+ understand what shape the data takes (struct fields, array elements, etc.)
+- **Map bytecode to source** — Show users which line of source code
+ corresponds to the current instruction
+- **Display variables in scope** — Know which variables exist at each point
+ in execution
+
+## Quick example: reading a type
+
+Type information describes the structure of data. Here's what a simple
+`uint256` type looks like in **ethdebug/format**:
+
+
+ {`{
+ "kind": "uint",
+ "bits": 256
+}`}
+
+
+And a more complex struct:
+
+
+ {`{
+ "kind": "struct",
+ "definition": {
+ "name": "Position"
+ },
+ "contains": [
+ { "name": "x", "type": { "kind": "int", "bits": 256 } },
+ { "name": "y", "type": { "kind": "int", "bits": 256 } }
+ ]
+}`}
+
+
+Your debugger reads these definitions to understand the structure of data.
+Combined with a pointer that locates the bytes, types enable presenting
+meaningful values to users.
+
+## Quick example: resolving a pointer
+
+Pointers describe where data lives. A simple storage variable pointer:
+
+
+ {`{
+ "location": "storage",
+ "slot": "0x0"
+}`}
+
+
+This says: "read from storage slot 0." But pointers can express complex,
+dynamic locations too — like array elements or mapping values whose locations
+depend on runtime state.
+
+## What you need to implement
+
+To consume **ethdebug/format**, your debugger needs:
+
+1. **Schema parsing** — Load and validate **ethdebug/format** JSON
+2. **Pointer resolution** — Evaluate pointer expressions against EVM state to
+ locate bytes
+3. **Type interpretation** — Use type definitions to understand the structure
+ of located data
+4. **Program interpretation** — Track context as execution progresses
+
+## Go deeper
+
+
+
+
+
+
Understand the concepts
+
+
+ Learn the mental models behind types, pointers, and programs.
+
+
+### Explore by topic
+
+- **[Types](/docs/core-schemas/types)** — How the format describes data structures
+- **[Pointers](/docs/core-schemas/pointers)** — How the format describes data locations
+- **[Programs](/docs/core-schemas/programs)** — How the format describes runtime context
+- **[Info schema](/docs/core-schemas/info)** — How the format bundles everything together
+- **[Specification](/spec/overview)** — Formal schema definitions
diff --git a/packages/web/docs/getting-started/index.mdx b/packages/web/docs/getting-started/index.mdx
new file mode 100644
index 000000000..14ecd87e6
--- /dev/null
+++ b/packages/web/docs/getting-started/index.mdx
@@ -0,0 +1,72 @@
+---
+sidebar_position: 1
+sidebar_label: Overview
+---
+
+# Getting started
+
+Welcome to the **ethdebug/format** documentation! This section helps you get
+started based on what you're building.
+
+## Select a guide
+
+
+
+
+
+
For debugger authors
+
+
+
+ You're building a debugger, transaction tracer, or other tool that
+ needs to understand smart contract execution.
+
+
+ Learn how to consumeethdebug/format{" "}
+ data to resolve pointers and map bytecode back to source.
+
-This guide provides readers with a tour of the **@ethdebug/pointers**
-TypeScript reference implementation, showing example concrete logic for how a
-debugger might process **ethdebug/format** pointers.
+A detailed walkthrough of the **@ethdebug/pointers** TypeScript reference
+implementation, showing how to resolve pointers to concrete values.
-For an introduction to **ethdebug/format** pointers, please see
-the Pointer specification's [Overview](/spec/pointer/overview) and
-[Key concepts](/spec/pointer/concepts) pages.
+For background on pointer concepts, see
+the [Pointers documentation](/docs/core-schemas/pointers) and the
+[Pointer specification](/spec/pointer/overview).
**Other guides**
- _Guides for other aspects of debugger-side **ethdebug/format** implementation
- are planned and still need to be written._
+ _Additional debugger implementation guides are planned._
-
## For compilers
+Guides for emitting **ethdebug/format** data from compilers and toolchains.
+
- _Guides for implementing **ethdebug/format** support inside a compiler are
- planned and still need to be written._
+ Guidance on integrating **ethdebug/format** output into your compiler,
+ including what to emit and when.
-:::tip[Work in progress]
-
-Sadly, things are looking a little scarce right now. Please stay tuned as work
-on this effort progresses.
+:::tip[Contributing]
-**Interested in helping out?** If you'd like to help with writing initial
-reference implementations for one or more schemas, please reach out in our
-[Matrix.chat](https://matrix.to/#/#ethdebug:matrix.org).
+Interested in helping improve these guides? Contributions are welcome!
+Reach out in our [Matrix.chat](https://matrix.to/#/#ethdebug:matrix.org)
+or open an issue on [GitHub](https://github.com/ethdebug/format).
:::
diff --git a/packages/web/docs/implementation-guides/pointers/dereference-logic/making-regions-concrete.mdx b/packages/web/docs/implementation-guides/pointers/dereference-logic/making-regions-concrete.mdx
index 9115f69e2..36fac2996 100644
--- a/packages/web/docs/implementation-guides/pointers/dereference-logic/making-regions-concrete.mdx
+++ b/packages/web/docs/implementation-guides/pointers/dereference-logic/making-regions-concrete.mdx
@@ -3,6 +3,7 @@ sidebar_position: 3
---
import CodeListing from "@site/src/components/CodeListing";
+import SchemaExample from "@site/src/components/SchemaExample";
# Making regions concrete
@@ -41,15 +42,19 @@ cycles and determine the evaluation order for each property based on which
property references which other property. That is, a robust implementation
might take this pointer:
-```json
-{
+
+ {`{
"location": "memory",
"offset": {
"$sum": [0x60, { ".length": "$this" }]
},
"length": "$wordsize"
-}
-```
+}`}
+
... and detect that it must evaluate `length` before evaluating `offset`.
diff --git a/packages/web/docs/implementation-guides/pointers/pointers.mdx b/packages/web/docs/implementation-guides/pointers/pointers.mdx
index 9be30f782..7b135bff9 100644
--- a/packages/web/docs/implementation-guides/pointers/pointers.mdx
+++ b/packages/web/docs/implementation-guides/pointers/pointers.mdx
@@ -31,7 +31,7 @@ evaluate data in this schema requires some careful consideration.
If you're reading this page without first having familiarized yourself with the
concepts/terminology defined by the **ethdebug/format/pointer** schema,
it is recommended that you start by reading the
-[Key concepts](/spec/pointer/concepts) page and then study the handful of
+[Pointers concepts](/docs/concepts/pointers) page and then study the handful of
example pointers provided by [the schema itself](/spec/pointer).
A fully-detailed understanding of the schema is not necessary for the purposes
diff --git a/packages/web/docs/overview.mdx b/packages/web/docs/overview.mdx
index c7065468e..aa8b469a4 100644
--- a/packages/web/docs/overview.mdx
+++ b/packages/web/docs/overview.mdx
@@ -4,9 +4,80 @@ sidebar_position: 1
# Project overview
-The security of smart contracts hinges on the availability of robust debugging
-tools. As the compiler optimizes a contract, it may move instructions around or
-remove them thus weakening its relationship with the original source code. The
-debugger then faces the challenging task of reversing these transformations to
-enrich the often cryptic artifacts with contexts mapping back to the contract's
-source.
+**ethdebug/format** is an open specification for debugging information in
+EVM-based smart contracts. It provides a standard way for compilers to emit
+rich debug data that debuggers can use to help developers understand what
+their contracts are doing at runtime.
+
+## The problem
+
+Compilers are relentless optimizers. They inline functions, eliminate dead
+code, reorder instructions, and pack multiple values into single storage
+slots. The resulting bytecode bears little resemblance to the source code
+that produced it.
+
+This creates a fundamental challenge for debugging. When a transaction fails,
+developers need to understand what happened — but the connection between
+bytecode and source has been obscured. Without standardized debug information,
+every debugger must reverse-engineer these transformations on its own, and
+each compiler-debugger pair requires custom integration work.
+
+## What this format provides
+
+**ethdebug/format** defines schemas including:
+
+- **Types** — Describe the structure of data (integers, structs, arrays,
+ mappings) — what shape it takes and how its parts relate
+- **Pointers** — Describe where data lives at runtime, including dynamic
+ locations that depend on execution state
+- **Programs** — Describe the runtime context at each bytecode instruction,
+ including which variables are in scope and what source code corresponds
+ to each operation
+- **Info schema** — Bundles all debug data for a compilation, including
+ programs, shared types, sources, and compiler metadata
+
+## Get started
+
+
+
+
+
+
Building a debugger?
+
+
+ Learn how to consume **ethdebug/format** data to build better debugging
+ tools.
+
+
+Or explore the [concepts](/docs/concepts) to understand the format's design,
+browse [examples](/docs/examples) to see it in action, or dive into the
+[specification](/spec/overview) for formal definitions.
diff --git a/packages/web/docs/reference/_category_.json b/packages/web/docs/reference/_category_.json
new file mode 100644
index 000000000..17c9bfb32
--- /dev/null
+++ b/packages/web/docs/reference/_category_.json
@@ -0,0 +1,4 @@
+{
+ "label": "Reference",
+ "position": 7
+}
diff --git a/packages/web/docs/known-challenges.mdx b/packages/web/docs/reference/challenges.mdx
similarity index 99%
rename from packages/web/docs/known-challenges.mdx
rename to packages/web/docs/reference/challenges.mdx
index c051dc4b5..f1d293453 100644
--- a/packages/web/docs/known-challenges.mdx
+++ b/packages/web/docs/reference/challenges.mdx
@@ -4,7 +4,7 @@ sidebar_position: 2
import TOCInline from "@theme/TOCInline";
-# Known challenges
+# Challenges
The fundamental challenge for an Ethereum debugging data format is that, on the
one hand, we want it to be able to handle the complexity of Solidity and other
diff --git a/packages/web/docs/reference/glossary.mdx b/packages/web/docs/reference/glossary.mdx
new file mode 100644
index 000000000..eac37393f
--- /dev/null
+++ b/packages/web/docs/reference/glossary.mdx
@@ -0,0 +1,230 @@
+---
+sidebar_position: 3
+---
+
+import SchemaExample from "@site/src/components/SchemaExample";
+
+# Glossary
+
+This page defines key terms used throughout the **ethdebug/format**
+specification and documentation.
+
+## Core concepts
+
+### Pointer
+
+A structured definition that describes how to locate data within EVM state.
+Pointers act as "recipes" that can be evaluated against machine state to
+produce concrete byte regions. Unlike static offsets, pointers can include
+dynamic computations (like keccak256 hashing for storage slots) that depend on
+runtime values.
+
+See: [Pointers documentation](/docs/core-schemas/pointers)
+
+### Region
+
+A contiguous range of bytes within a specific data location. A region
+specifies:
+
+- **Location**: Where the data lives (storage, memory, stack, etc.)
+- **Offset/Slot**: Where in that location the region starts
+- **Length**: How many bytes the region spans
+
+Regions are the concrete output of pointer resolution.
+
+See: [Regions documentation](/docs/core-schemas/pointers/regions)
+
+### Cursor
+
+The result of dereferencing a pointer. A cursor provides an interface for
+viewing pointer regions against different machine states. It can be thought of
+as a "resolved pointer" that knows how to extract data from the EVM.
+
+### Type
+
+A structured definition describing the shape and structure of data. Types
+specify what kind of value something is (integer, struct, mapping, etc.) and
+how its parts relate. Combined with a pointer that locates the data, types
+enable debuggers to display values in human-readable form.
+
+See: [Types documentation](/docs/core-schemas/types)
+
+### Program
+
+A representation of compiled code that maps bytecode instructions to their
+semantic context. Programs describe what code means at each instruction,
+including:
+
+- Source code ranges
+- Variables in scope
+- Frame information
+- Remarks and annotations
+
+See: [Programs documentation](/docs/core-schemas/programs)
+
+## EVM data locations
+
+### Storage
+
+Persistent, contract-specific data that survives between transactions.
+Organized as a key-value store where:
+
+- Keys are 32-byte **slots** (often computed via keccak256)
+- Values are 32-byte words
+
+Storage is the only data location that persists after execution ends.
+
+### Memory
+
+Temporary, byte-addressable scratch space available during execution. Memory:
+
+- Starts empty at the beginning of each call
+- Can be expanded dynamically (costs gas)
+- Is byte-addressed (unlike word-addressed storage)
+- Is cleared when the call ends
+
+### Stack
+
+The EVM's operand stack, used for computation. The stack:
+
+- Holds 256-bit (32-byte) values
+- Has a maximum depth of 1024 items
+- Is accessed from the top (LIFO)
+- Is ephemeral per call frame
+
+### Calldata
+
+Read-only input data provided to a transaction or call. Calldata:
+
+- Is byte-addressed
+- Cannot be modified during execution
+- Contains the function selector and encoded arguments
+
+### Returndata
+
+Data returned by the most recent external call. Returndata:
+
+- Is byte-addressed
+- Is overwritten by each subsequent call
+- Contains the return value or revert reason
+
+### Code
+
+The deployed bytecode of a contract. Code:
+
+- Is immutable after deployment
+- Can be read via CODECOPY/EXTCODECOPY
+- Contains both executable instructions and embedded data
+
+### Transient storage
+
+Temporary storage that persists within a transaction but is cleared at the end.
+Introduced in EIP-1153, transient storage:
+
+- Has the same slot/word structure as persistent storage
+- Is cleared after each transaction
+- Is cheaper than persistent storage
+
+## Pointer expressions
+
+### Expression
+
+A computation that produces a bytes value when evaluated against machine state.
+Expressions enable pointers to describe dynamic data locations. Common
+expression types include:
+
+- **Arithmetic**: `$sum`, `$difference`, `$product`, `$quotient`, `$remainder`
+- **Hashing**: `$keccak256` (for computing storage slots)
+- **Lookups**: `$offset`, `$length`, `$slot` (reading from defined regions)
+- **References**: `$this`, variables by name
+
+### Variable (in pointers)
+
+A named binding that can be referenced within a pointer definition. Variables
+allow pointers to name and reuse intermediate computations:
+
+
+ {`{
+ "define": { "name": "baseSlot", "location": "storage", "slot": 0 },
+ "in": {
+ "location": "storage",
+ "slot": { "$keccak256": [{ ".slot": "baseSlot" }] }
+ }
+}`}
+
+
+## Program context
+
+### Context
+
+Information associated with a bytecode instruction that describes its semantic
+meaning. Contexts can include:
+
+- **Code**: Source location
+- **Variables**: Variables entering scope
+- **Remark**: Human-readable annotation
+- **Frame**: Function/call frame identifier
+
+### Instruction
+
+A single bytecode operation at a specific program counter offset. Instructions
+combine:
+
+- **Offset**: Byte position in the bytecode
+- **Operation**: The opcode and its arguments
+- **Context**: Semantic information about what the instruction does
+
+### Variable (in programs)
+
+A named value in the source program that can be inspected during debugging.
+Program variables have:
+
+- **Identifier**: The variable's name
+- **Declaration**: Where it was defined in source
+- **Type**: How to interpret the value
+- **Pointer**: Where to find the value at runtime
+
+## Execution tracing
+
+### Trace
+
+A record of EVM execution showing the state at each step. Traces capture:
+
+- Program counter
+- Opcode executed
+- Stack contents
+- Memory changes
+- Storage modifications
+
+### Machine state
+
+A snapshot of all EVM state at a specific execution point. Machine state
+includes the current values of:
+
+- Stack
+- Memory
+- Storage
+- Calldata
+- Returndata
+- Program counter
+- Gas remaining
+
+## Materials
+
+### Source
+
+Original source code associated with a compiled program. Sources are referenced
+by ID and contain the raw text content.
+
+### Source range
+
+A reference to a specific portion of source code, defined by:
+
+- **Source**: Which source file
+- **Range**: Offset and length within that file
+
+Source ranges enable mapping bytecode back to the original code that produced
+it.
diff --git a/packages/web/docs/goals.mdx b/packages/web/docs/reference/goals.mdx
similarity index 94%
rename from packages/web/docs/goals.mdx
rename to packages/web/docs/reference/goals.mdx
index 94c022037..34cd56981 100644
--- a/packages/web/docs/goals.mdx
+++ b/packages/web/docs/reference/goals.mdx
@@ -1,5 +1,5 @@
---
-sidebar_position: 2
+sidebar_position: 1
---
# Goals
@@ -10,9 +10,9 @@ Smart contract languages have evolved independently, each with its own
debugging requirements. This fragmentation makes universal tooling challenging
if not impossible.
-The **ethdebug format** seeks to establish a common language for debug
+**ethdebug/format** seeks to establish a common language for debug
information across all smart contract languages. By defining comprehensive yet
-flexible schemas, **ethdebug format** aims to bridge the gap between
+flexible schemas, **ethdebug/format** aims to bridge the gap between
different compilers, languages, and tools while respecting the unique
characteristics of each. The goal is to create a foundation that works equally
well for Solidity, Vyper, Fe, and any future smart contract language.
diff --git a/packages/web/docs/sketches/_category_.json b/packages/web/docs/sketches/_category_.json
deleted file mode 100644
index f40aabfa9..000000000
--- a/packages/web/docs/sketches/_category_.json
+++ /dev/null
@@ -1,8 +0,0 @@
-{
- "label": "Prototype sketches",
- "position": 3,
- "link": {
- "type": "generated-index",
- "description": "Informally specified proposals to inform format design"
- }
-}
diff --git a/packages/web/docs/sketches/layout.mdx b/packages/web/docs/sketches/layout.mdx
deleted file mode 100644
index 9cf980945..000000000
--- a/packages/web/docs/sketches/layout.mdx
+++ /dev/null
@@ -1,496 +0,0 @@
----
-description: Initial format sketch
----
-
-# @haltman-at's allocation data draft
-
-## Status of this document
-
-This is an initial draft for review and comment. It does not have consensus and should only be cited as work in progress.
-
-## Goal of this document
-
-To present the skeleton of a format for describing layout of complex types or variables of those types (in storage or elsewhere) that is:
-
-1. Expressive enough to cover what Solidity and Vyper actually do,
-2. Simple enough to be usable, and
-3. Decently general, avoiding too much building in of Solidity and Vyper behaviors, and instead providing a way to specify those behaviors
-
-Hopefully this approximately does that! (Note that it may make assumptions based on the EVM, rather than Solidity and Vyper;
-e.g., in our discussion of endianness, we'll say that we don't need to support little-endian numbers, because the EVM makes them
-difficult; but note this is a property of the EVM, not any particular language.)
-
-This is something of a skeleton. One big problem that needs to be solved is to what extent this is applied to types vs to what
-extent it's applied to individual variables. For now this will basically assume it's applied to types. Of course, it is also necessary
-to describe the placement of individual variables, but hopefully with type layout information it's not necessary to individually describe
-their layout.
-
-So, for each type, we'll discuss what needs to be specified to specify the type itself, and then what needs to be specified to specify how it's laid out
-in each particular location.
-Also, we'll discuss how to specify locations of individual variables.
-
-What's written here might not be entirely compatible with what's in [prototype.mdx](./prototype.mdx). That will need to be hammered out.
-
-### Things this doesn't do
-
-There's one big thing that this doesn't attempt, which is arrays that are directly multidimensional; more generally it doesn't cover
-anything similar, like having arrays of structs where each struct takes up multiple words but they don't all start on word boundaries
-but rather are packed in as if it was all just primitive types. That seems to be too much complexity.
-
-There's some other weird possibilities I didn't consider, like arrays that go downward in storage instead of upward.
-
-## Specifying variable positions
-
-Of course, the overall location itself will need to be specified, which (for now) can be memory, calldata, code, the stack, or storage.
-(Coming soon: Transient storage?) For each location, further information is then needed to specify the position within the location.
-
-**Discussion**: Should position specifications include both start and end? Notionally, end is redundant if layout is specified in the
-type information. I'll just discuss start here. ("End" also potentially gets a bit messy when not everything runs the same way in
-storage.)
-
-**Discussion**: This document mentions "bytes" a lot. Should many of these mentions be "bits"? In many cases this would make no sense,
-but in some cases, it could conceptually be possible. The problem is that using bits instead of bytes is overall less convenient but
-doesn't gain much generality. But, it does gain us one important case (regarding how strings are stored in storage in Solidity),
-so we need it at least there. It seems inconsistent to use it only there and not more generally, though. So likely we should more
-often be using bits instead of bytes? Something for later.
-
-:::note
-@cameel comments here:
-
-> The string format does not really require this though. You can always look at
-> the last bit just as a part of the length field. I.e. the length is specified
-> as either 2N or 2N+1 and odd numbers indicate one format and even ones the
-> other.
-> :::
-
-### Positions in memory, calldata, or code
-
-These locations are byte-based, so here, positions can just be described as byte offsets.
-
-### Positions on the stack
-
-The stack is word-based. So positions can be described as stack slots (counted from the bottom), plus a byte within the slot
-(numbered from the little end?). Now this last part may seem unnecessary, as who would put two different variables in the
-same stack slot? Well, see below regarding internal function pointers; I think we may need this.
-
-### Positions in storage
-
-Note: This presumably will apply also to transient storage, although implementation there is yet to be seen.
-
-Sometimes multiple variables are packed into the same storage slot, so we need to specify both a storage slot and a byte within that slot (from the little end, probably).
-
-This leaves the question of specifying a storage slot -- is it sufficient to just give the slot address, or do we need to show how it was constructed? For
-top-level variables, the slot address should be enough. So if that's all we need, we don't need to say any more. But I'll cover the other case just to be sure.
-
-#### A note on endianness in storage
-
-Above speaks of the "start", but what's the "start" in storage for, e.g., an integer packed into the middle of a word? Is it the big end or the little end?
-
-Assuming any particular endianness in storage seems bad (in Solidity e.g. it's different for arrays vs bytestrings), so each type should have a storage endianness
-specified -- which does not need to agree with the endianness of its component types! It covers only the outermost layer.
-For something like an integer this is meaningless per se, but it is necessary to make sense of the "start" of that integer.
-
-:::note
-@cameel asks about this:
-
-> How do you define endianness for arrays?
-> :::
-
-#### Specifying complex storage slots (if necessary)
-
-A storage slot can be specified as one of the following objects:
-
-`{ slotType: "raw", offset: bigint }`
-
-`{ slotType: "offset", path: Slot, offset: bigint }`
-
-`{ slotType: "hashedoffset", path: Slot, offset: bigint }`
-
-:::note
-@cameel asks:
-
-> Do we need a distinction between relative and absolute locations? I.e. when
-> describing the nested layout or something like a struct you might want to
-> interpret locations as relative but then you might still want to have some
-> things interpreted as absolute (specifically the hashed locations).
-> :::
-
-```
-{
-slotType: "mapentry",
-path: Slot,
-mapType: "prefix" | "postfix" | "postfix-prehashed" | "prefix-prehashed"
-key:
-}
-```
-
-Here, prefix vs postfix means, does the key go before the map slot, or after? "Prehashed" means we hash the key separately and then hash the _result_
-together with the map slot (Vyper does this for certain types). The possibility "prefix-prehashed" isn't currently used anywhere but may as well include
-it form generality.
-
-Ideally the key might be represented as some sort of decoded value, but that seems out of scope, so let's just record the raw bytes of it, I figure.
-
-Possibly, for types that get padded before hashing, we could restrict the `key` field to be the bytes that actually represent the value, and
-correspondingly increase the set of `mapType`s to also include information about how the value is padded. Something to consider. See the section
-on specifying mappings for more discussion of this.
-
-Question: Allow offset on map entry? Don't really see a need for this.
-
-## Specifying basic types
-
-This might not need to be this complex. The suggestions in [prototype.mdx](./prototype.mdx) suggest group all these together as just primitive types
-with just `keyword`, `bitwidth`, and `alignment`. Maybe that's better? Although `alignment` should likely distinguish between zero-padding and sign-padding.
-
-### Integers
-
-Integers can be signed or unsigned and take up a specified number of bytes. No need for anything exotic here. We assume no integer type takes
-up more than a single word.
-
-`{ signed: boolean, bytes: number }`
-
-#### Specifying layout
-
-There are two things here that might need to be specified: endianness and padding. Note that since we assume no integer type takes up more than a single word,
-endianness is only a question for byte-based locations (memory, calldata, code). It's not a meaningful question for storage or the stack, as these are word-based. (However for storage layout
-information there should still be an endianness specified, even though it's technically meaningless, so that sense can be made of which end is the "start".)
-
-The EVM only really makes big-endian easy, so we probably don't need to specify endianness, and can just assume everything is big-endian. If anyone ever does
-little-endian for some reason, support for that can be added later. For now though we can ignore the distinction between bytes that are earlier and bytes that
-are more significant.
-
-That leaves padding. We can specify this as follows:
-
-`{ paddedBytes: number, paddingType: "zero" | "sign" | "right" }`
-
-:::note
-@cameel asks about this:
-
-> Does bytes include paddedBytes or not?
->
-> From the note below about "bytewidth of the unpadded type" I assume it does
-> not, but perhaps that should be said explicitly.
-> :::
-
-(Here `"zero"` means left-padded with zeroes, and `"right"` means right-padded with zeroes; `"sign"` means sign-padding.)
-
-Likely there should be some simpler way to indicate when no padding is used (`{paddingType: "none"}`?), but this will do.
-
-Note we don't include the bytewidth (or bitwidth) of the unpadded type, as that's in the type information rather than the layout information. But obviously it needs to be specified somewhere.
-
-### Fixed-point numbers
-
-These work like integers, except we also need to specify a denominator. Two possibilities:
-
-1. Add a `bigint` `denominator` field
-2. Add a `number` `base` field and a `number` `places` field
-
-Either should work.
-
-One could argue that we only need `places`, as only decimal fixed-point is implemented in any popular EVM language (Vyper), but binary fixed-point has
-also been discussed in the past, and there's little cost to being general here. If someone wants to do ternary fixed-point for some reason, sure, we can support that,
-that isn't costly to include.
-
-#### Specifying layout
-
-Same as for integers.
-
-### Short fixed-length bytestrings
-
-"Short" meaning "fits in a word and is treated as a primitive type". Probably this should be folded in with bytestrings more generally rather than treated
-separately, see below about that, but this is listed here in case we want to treat it separately.
-
-Not much to say here, just number of bytes.
-
-#### Specifying layout
-
-Same as above!
-
-### Booleans
-
-It's a boolean, nothing to say here.
-
-#### Specifying layout
-
-Same as above!
-
-### Addresses and other primitive types?
-
-Addresses are often treated as primitive? The idea of not separating out primitive types is starting to sound like a better idea. So maybe that's the thing to do, or maybe we can have the types above
-and then just have a bucket for other primitives, such as addresses.
-
-#### Specifying layout
-
-Same as above!
-
-#### A note on function pointers
-
-What about function pointers? Those are treated as a primitive type in Solidity!
-
-Well, external function pointers decompose into two parts, an address and a selector. So I think they should be treated as a complex type for our purposes here.
-Internal function pointers also decompose into two parts in non-IR Solidity.
-
-But, in IR Solidity, they don't decompose. Also, in non-IR Solidity, what do they decompose into? We might want some way to mark one of these miscellaneous primitive types
-as an internal function pointer, so that whatever's reading this format can know to treat them as that. (I don't see that we need this for external function pointers, since
-each _part_ of those is meaningful without this annotation.)
-
-:::note
-@cameel adds:
-
-> They decompose into two separate jump destinations: one into the creation
-> code, the other into the deployed code. But this is something that feels like
-> an implementation detail so not sure it has a place here.
-> :::
-
-## Specifying more complex types
-
-### Structs and tuples
-
-This can include things that may not necessarily be structs according to the language, but similarly contain a fixed number of parts and which aren't arrays.
-So, for instance, as suggested above, external function pointers could be handled here, as well as internal function pointers in non-IR Solidity (of course then the two
-components of that need to be handled some other way).
-
-Anyway, obviously, you have to specify the component types and their order.
-
-#### Specifying layout
-
-For byte-based locations: Each component needs to have its starting offset specified, but that's not enough. Each one also needs padding specified.
-You can also specify an overall length for the whole thing, which is useful for in storage specifying that it should take up a whole number of words;
-for storage this should be allowed in bytes or in words.
-
-Also, each component needs to have specified how it's stored. Based on how things are done in Solidity and Vyper, we can have several possibilities:
-
-1. It's stored inline. (This includes reference types in storage; they're not always "inline" per se
- but they're inline for our purposes.)
-2. It's stored as a pointer. In this case we'll need to specify the length of the pointer.
-3. It's stored as a relative pointer. Now, in Solidity, when relative pointers are used, they're not relative to
- the current location, they're relative to the start of the container they're inside. We can allow for both possibilities,
- probably (relative pointers aren't so exotic). And of course we need to know the length of the pointer.
-
-:::note
-@cameel adds:
-
-> In the future in Solidity also pointers to data stored in other locations
-> will be possible. Things like a storage struct nested inside a memory struct.
-> The concept of located types in the main spec already allows for that in full
-> generality.
-> :::
-
-For the stack: Overall this is similar? Structs don't live on the stack, but function pointers do. It'll be necessary here
-to use the ability to specify particular bytes within a stack slot. Alternatively, if we don't want to allow that,
-because we don't think splitting up internal function pointers is a good idea, we could allow separately specifying the padding
-in each stack slot (this is necessary to handle Solidity's external function pointers, assuming we're handling them under this).
-
-:::note
-@cameel adds:
-
-> In the future structs will be allowed to live anywhere.
-> :::
-
-For storage: We _could_ do something complicated, assuming that structs might get relocated in all sorts of weird ways,
-but this is probably not a good idea to start with. Instead we'll just assume that each struct either:
-
-1. always start on a word boundary and so is always laid out internally in the same way, so we can give the
- locations of the components relative to the start of the struct, or
-2. is no more than a single word in length and never crosses word boundaries, in which case we can give positions
- within the single word it's contained within (byte offsets relative to the start; endianness would have to be
- marked to make these meaningful).
-
-It'll probably be necessary to include an explicit tag to distinguish between these two cases. Note the second
-case is included to cover things that aren't actually structs but decompose into multiple parts.
-
-### Tagged unions
-
-These don't currently exist in Solidity or Vyper, but we should probably handle them? Pyramid had them (in that
-it was dynamically typed so everything was one).
-
-:::note
-@cameel notes:
-
-> They're planned in Solidity and may already exist in Fe. In Solidity they
-> will most likely be implemented in a form similar to Rust's enums with data.
-> Algebraic types in general will be possible in the future.
-> :::
-
-For the type, we say what it's a union of.
-
-#### Specifying layout
-
-So, we have to specify where to find the tag, and what to do in each case.
-
-For where to find the tag, we can give a start position and a length; note that for the reasons discussed below,
-we may want to allow the tag to be have start and length given in individual _bits_ rather than bytes.
-
-For each option, then, we can give a layout specification and a start point.
-
-### Union representations of non-union types
-
-So, this is a bit funky, but what if we allowed union representations of non-union types?
-
-That is, a type could indicate that in a particular location, it had a tagged union representation;
-as with tagged unions, it would be specified where to find the tag, and then there'd be an object for each case.
-But the object would specify a layout, not a type!
-
-This would allow handling Solidity storage strings. The last bit of the word would be the tag. In case 0,
-bits 1-31 are the length, and bits 32-255 are the contents. (So, we'd need to be able to specify individual
-bits here, not just bytes. Of course that's partly a concern for strings, not unions.) In case 1, bits 1-255 are
-the length, and we specify that the contents are at a hashed location. (Note that if we use the ideas below,
-we wouldn't actually specify the end of the contents, only the start.)
-
-Of course, doing this means that all _ordinary_ representations descriptions would need to have an additional
-field to specify that they're not a union. Or perhaps this information could go in a field outside the representation
-description, to avoid that?
-
-### Enumerations
-
-Maybe these are treated like primitive types? Maybe they're treated like tagged unions whose unioned types are all the unit type? In that case we'd need to be able
-to represent the unit type.
-
-:::note
-@cameel adds:
-
-> This _might_ need specifying the size in bytes. In older Solidity versions
-> enums took a variable number of bytes, depending on the number of members.
-> Now they're limited to 256 members so 1 byte
-> (https://github.com/ethereum/solidity/pull/10247). Other languages could be
-> doing it differently.
-> :::
-
-### Strings and bytestrings
-
-Type information: Is it a string or a bytestring? Is there a bound on its length? Is the bound an exact length it must be (as has been proposed for Solidity), or is it a cap (as in Vyper)?
-
-We probably don't need to bother with questions of string encodings, everything can be assumed to be UTF-8. Possibly we could have a separate type for ASCII-only strings,
-since some languages may want that as a separate type (Solidity has separate literals for with or without Unicode, though not separate types).
-We probably don't need Latin-1 strings or anything like that.
-
-#### Specifying layout
-
-For numbers, endianness was potentially a concern for byte-based locations. Here, it's not; instead it's potentially a concern for storage, since it's _not_ byte-based. Once again, though,
-the EVM makes big-endian easy and little-endian hard, so we'll just assume big-endian and not include an endianness specification.
-
-(On the other hand, Solidity does little-endian for arrays, so...?)
-
-For ones of fixed (not merely bounded) length, there's not much to specify. We're assuming big-endian, and the start is stored elsewhere. We may want
-to allow an offset in case the length is stored redundantly? Also, for storage specifically, we do have to notate whether the
-string is stored at the _actual_ specified start, or at a hashed location. So, `{ hashSlot: boolean }`.
-
-For ones of variable length, we have more work to do, as we have to specify where to find both the length and the contents.
-
-For storage, we can reasonably assume that strings have the two cases that structs do (possibly just the first but seems less clear we should assume that).
-(Actually, if we don't assume that, possibly we could fold primitive bytestrings into the fixed-length case here as well. There may be some situations that warrant
-distinguishing, but that could likely be handled by explicitly tagging the different types as different types, not representing them differently
-internally aside from the tag.)
-
-So, we can specify where to find the length, the length of the length (or that can be determined by giving the length a type?), and the start of the contents. For byte-based locations
-that suffices.
-
-However in storage, when we specify the offset, we also have to specify (for both the length and the contents separately!) whether the offset is relative
-to the current slot or to the hash of the current slot.
-
-You can also specify an overall length for the whole thing, which is useful for in storage specifying that it should take up a whole number of words;
-for storage this should be allowed in bytes or in words.
-
-Of course, Solidity famously does something more complicated with its strings, see union representation of non-union types for a possibility regarding handling that.
-
-### Mappings
-
-Have to specify key and value types, obviously.
-
-Mappings are weird and specific enough that it makes sense to build-in a lot of the behavior rather than attempting to be very general.
-
-#### Specifying layout
-
-We'll just assume all mappings use something like Solidity or Vyper's system. In this case, what needs to be specified for a given mapping is:
-
-1. Does the key go before the slot, or after?
-2. Is the key pre-hashed, like for strings in Vyper?
-3. Is the key padded at all, and if so how? I.e., to what width and with which padding type. (Notionally this padding information could go in the key type itself, adding a "key" location for this purpose. I am not assuming that
- all locations get the same type of padding because this has not always been true in all versions of Solidity.)
-
-Probably it is best to combine (1) and (2) into a `mapType` and keep (3) separate as a `paddingType`.
-
-### Arrays
-
-Note: This will exclude strings and bytestrings, handling them separately above, unlike [prototype.mdx](./prototype.mdx); another difference that will have to be figured out.
-
-We can split these into fixed-length and variable length (whether bounded or unbounded). And then you've got the base type.
-
-#### Specifying layout
-
-Oh boy, arrays. This is where it truly gets messy if we want to be general. Probably some generality will have to be
-axed here for the sake of simplicity.
-
-If the array is variable length, you need to specify the start of the length and of the contents;
-for fixed-length, only the latter (it may not be at the start as the length may be stored redundantly). You also need to specify the
-length of the length, or perhaps that can be handled by giving the length a type.
-
-In the case of storage, as is typical, this requires not only specifying an offset but also whether to hash the slot (this is separate
-for the length and for the contents!).
-
-Also, as with structs, you're going to have to specify whether the base type is stored inline, or whether it's a pointer, or whether it's
-a relative pointer and of what sort.
-
-You can once again also specify an overall length for the whole thing, which is useful for in storage specifying that it should take up a whole number of words;
-for storage this should be allowed in bytes or in words.
-
-What about padding of the elements? Well, that's the messy part... the stride pattern.
-
-See, we _could_ just specify padding for the base type (what it's padded to and with what padding type). But this wouldn't suffice to
-handle the case of how Solidity does arrays in storage! Maybe we can make this optional -- you can give a `paddedWith` and `paddingType`,
-_or_ you can use the more complicated stride pattern system.
-
-Note that for storage you will also need to specify an endianness, since storage is word-based rather than byte-based.
-Solidity does arrays little-endian! So we really do need this to be specified here. This could be specified for every
-location for consistency, but that seems unnecessary.
-
-Anyway, stride patterns. Here's a simple proposal for how a stride pattern might be represented.
-
-A stride pattern will be an array of objects, each of which is one of the following: `{ type: "element" }`, `{ type: "zero", length: number }`, or `{ type: "sign", length: number }`.
-
-A stride pattern is interpreted as follows: `"element"` means an element goes here, of its appropriate length (no padding). The `"zero"` type means this many bytes of zeroes.
-And (this isn't currently necessary, but) `"sign"` will mean this many bytes of sign-padding, where the thing it's sign-padding is determined from context
-(in big-endian contexts, it's the next thing; in little-endian contexts, the previous thing). The stride pattern is implicitly periodic; the number of `"element"` entries is not
-supposed to match that of the array, rather, when you get to the end of the stride pattern you go back to the start.
-
-In a byte-based location, this means what it sounds like. In storage, you have to read according to the endianness that was specified. Note it's assumed that no element
-that fits in a word will cross a word boundary, and that you won't use `"sign"` in places it doesn't make sense, that you won't have structs that are supposed to start
-on a word boundary start elsewhere, etc.
-
-In addition to the stride pattern, you can separately specify padding for the array as a whole (useful for making clear that it should take up a whole number of words).
-
-Solidity examples:
-
-- `uint256[]` -- it takes up the whole word, so the pattern is `[{ type: "element" }]`
-- `uint128[]` -- there's two of them, so `[{ type: "element" }, { type: "element" }]`
-- `uint96[]` -- there's two of them and then 64 bytes of padding, so `[{ type: "element" }, { type: "element" }, { type: "zero", length: 64 }]`
-- `uint96[3][]` -- a `uint96[3]` takes up two full words always, so just `[{ type: "element" }]` suffices; what goes on inside the `uint96[3]` can be handled inside there
-- `uint96[3]` -- the stride pattern is `[{ type: "element" }, { type: "element" }, { type: "zero", length: 64 }]` as above, but now we should _also_ specify that the array as
- a whole has an overall length of two words, so that in a `uint96[3][]`, there's no confusion about the fact that each one should start on a fresh word boundary.
- (Not that it would be legal to start it anywhere else, but it should still be explicitly specified, not left as error-recovery behavior.)
-
-#### Things probably not to include for now
-
-Probably don't attempt to handle arrays that are directly multidimensional (as opposed to
-multidimensional arrays just being ordinary arrays of arrays). Allowing this also raises possibility
-of a flag for row-major vs column-major order. Probably best to just exclude this for now.
-
-:::note
-@haltman-at notes in a comment (after writing this)
-
-> Oh, geez, I just realized there's something big I left out: How things are
-> pointed to on the stack. Actually, one could perhaps speak of cross-location
-> pointers in general, but as that doesn't exist mostly at the moment, probably
-> no sense in including that; it's premature.
->
-> But, I guess something that needs to be added is, for each type, for the
-> stack location, I talked about from/to but really we also need to say, does
-> this thing live directly on the stack or is it pointed to. And if it's
-> pointed to, we need to specify the pointer format -- do we just point to the
-> start, or do we have start/length? And then if it's start/length we need to
-> break down which part is the start and which part is the length... also, for
-> length, we likely want to be able to specify what the length is measured in
-> -- for instance it could potentially be `"bytes"` or `"words"` or `"items"`.
->
-> (Yes this should be added to the PR itself but I don't have a lot of time at
-> the moment)
-> :::
diff --git a/packages/web/docs/sketches/prototype.mdx b/packages/web/docs/sketches/prototype.mdx
deleted file mode 100644
index 789fb9059..000000000
--- a/packages/web/docs/sketches/prototype.mdx
+++ /dev/null
@@ -1,390 +0,0 @@
----
-description: Initial format sketch
----
-
-import TOCInline from "@theme/TOCInline";
-
-# @jtoman's format prototype
-
-
-
-## Status of this document
-
-This is an initial draft for review and comment. It does not have consensus and should only be cited as work in progress.
-
-## Scope of this Document
-
-This document proposes a "general" shape of the ultimate debugging format to be decided upon
-by the ethdebug working group. As such, it does not aim to be a complete formal specification
-of a JSON format that is expected to cover every single case. Rather, it aims to provide a basis
-from which a fully formal specification will be developed based on discussions generated around
-this document.
-
-Under this vague scope, it is worth noting some non-goals. This document describes a debug
-format for EVM bytecode; support for other VMs is at least initially out of scope. In addition,
-this proposal is necessarily geared towards the state of the Solidity/Vyper languages as they
-exist now. It cannot (and will not) account for any possible future changes to the source language
-(Solidity, Vyper, etc.); rather, any significant changes to source languages/target VMs
-that require fundamental extensions to this format should be developed as needed and gated
-with a versioning scheme.
-
-## Goals of this Format
-
-Before describing the format, it is useful to lay out the information that this initial proposal is
-attempting to provide to end-users of the format. Extensions to this format to support other use
-cases not explicitly identified here are expected.
-
-### Local Variable Information
-
-Currently decompilers and formal methods tools must use internally generated names to give names to
-the values stored on the stack. The debugging format should provide information about what stack slots correspond
-to which source level identifiers.
-
-### Type Information
-
-The EVM has one "type": a 256-bit word. Source languages that compile to the EVM have richer type information
-which can aid in debugging and fuzzing; for example, the counterexample generation used by the Certora prover
-could use type information to pretty print values according to their high-level type.
-
-### Jump Resolution
-
-The EVM allows jumping to arbitrary values on the stack (subject to the restriction that the destination has a corresponding JUMPDEST opcode).
-This makes construction of a static control flow graph challenging (albeit not impossible). The format should provide reasonable hints
-about possible targets of jump commands.
-
-### Internal Function Calls
-
-The EVM has no built-in concept of internal functions. Rather, Solidity internal function implementations are placed at some offset in the
-contract's bytecode, and callers jump to this predetermined location, passing arguments on the stack along with the return location (this is
-one possible complication when attempting to statically resolve jump destinations).
-
-Statically detecting these internal calls and informing an end-user can be surprisingly complicated.
-For example, the Solidity compiler will in some cases perform a "tail-call" optimization: for nested calls like `f(g(x))`
-the compiler will push the entry point of `f` as the return address for the call to `g`. The format should
-help explicitly identify the targets of internal function calls and what arguments are being passed on the stack.
-
-### Mapping key identification
-
-EVM languages commonly include non-enumerable mappings. As such, it is useful to be able to dynamically identify any mapping keys that may appear
-while analyzing a transaction trace or debugging.
-
-## The Format
-
-The format will be JSON so that it may be included in the standard input/output APIs that the Vyper and Solidity compilers support.
-
-### Top Level
-
-The "top-level" artifact of the debug format will be a JSON dictionary with (at least) the following fields:
-
-- `version`: A representation of the major/minor version of the format. The actual representation of this version (a string, an array, etc.) can be decided later.
-- `types`: An array describing the layout of user-defined types defined in contracts referenced during compilation (see below).
-- `bytecode`: Debug information about the bytecode output by the compiler.
-
-### Type Descriptions
-
-When describing user defined types in contracts or describing the types of values on the stack, the format
-will use `type descriptors` to describe the type in question. There is one type descriptor per type in the
-source language. Each descriptor is a JSON object with at least the following fields:
-
-- `id`: a unique numeric id. This may be referenced by type descriptors for aggregate types (arrays, structs, etc.)
-- `sort`: A string representing the sort of the type. Possible values include:
- - `"mapping"` for a dynamic mapping from a key type to a value type
- - `"primitive"` built in primitive type
- - `"array"` for a homogeneous dynamic array of bounded/unbounded size
- - `"static_array"` for homogeneous static arrays
- - `"struct"` for user defined aggregate struct types
- - `"enum"` user defined enumeration types
- - `"contract"` a refinement of an address primitive with information about the contract deployed at the address
- - `"alias"` a user defined alias for some type
- - `"located"` a reference to another type with a data location attached
-- `label`: a (not necessarily human-readable) string representation of the type. Expected to be used for debugging
-
-Depending on the value of `sort` the type descriptor will have additional fields.
-
-**Discussion** The types here do _not_ include events or errors. These can be described elsewhere in the format,
-and indeed, they will likely reference the types defined here. However, as events and errors are not currently
-first class in any language targeting the EVM that I'm aware of (i.e., you cannot declare a variable `x` to be of
-type `error Foo()`) they should be described elsewhere.
-
-**Notes**: some preference was expressed for `kind` over `sort`. In addition, it was suggested we use `pointer` or `reference` over `located`.
-
-#### Mappings
-
-The type descriptor for a mapping type has the following additional fields defined.
-
-- `keyType`: contains the `id` of the type that is the domain of the mapping.
-- `valueType`: contains the `id` of the type that is the codomain of the mapping.
-
-#### Primitives
-
-The type descriptor for a primitive has the following additional fields:
-
-- `keyword`: the source keyword for the type. Examples include `uint256`, `boolean` etc.
-- `bitwidth`: the maximum number of bits a value of this type may occupy
-- `alignment`: one of `high` / `low`, indicating if the bits occur in the most significant bits (`high`) or least significant bits (`low`) of 256-bit EVM word.
-
-**Discussion**: The bitwidth field is an initial attempt to come up with some language agnostic way to
-describe primitive types. It is expected that further fields may be added, or perhaps the Primitive sort
-should be split up into more specific units, like `Integral` and `Real` etc.
-
-#### Array
-
-The type descriptor for an array is further subdivided depending on whether the array
-is a bytes array or any other array. It has at least the following fields:
-
-- `arraySort`: either the string `"bytes"` or `"generic"` (names not final).
-- `bound`: a field indicating the statically known upper bound on the size of this array (for Vyper). If null the array is unbounded.
-
-If `arraySort` is `"bytes"` then the descriptor has the following field:
-
-- `keyword`: the keyword used to declare this type, to account for `string` vs `bytes`
-
-If the `arraySort` is `"generic"` then descriptor has the following field:
-
-- `elementType`: a numeric id that references the type of values held in each element of the array.
-
-**Discussion**: Here, as elsewhere, no attempt is made here in the type descriptors to describe the physical representation
-of the type. Short of some semi-turing complete DSL, there doesn't seem to be a compact way
-to describe declaratively the packed storage representation of strings in storage for example.
-
-#### Static Arrays
-
-The type descriptor for a static array has the following additional fields:
-
-- `size`: the static, pre-declared size of the fixed size array/list
-- `elementType`: a numeric id that references the type of values held in each element of the array.
-
-#### Struct
-
-This format assumes that all struct types are user defined types and thus have a declaration site.
-The type descriptor for a struct has the following addition fields:
-
-- `declaration`: A dictionary describing the definition site of the struct, see below.
-- `fields`: An ordered list of dictionaries describing the fields of the struct.
-- `name`: The name of the struct without the `struct` keyword and without contract qualifiers.
-
-The order of the elements in `fields` is significant, and should match the order that fields are declared in the source file.
-
-Each element of the `fields` array is a dictionary with the following fields:
-
-- `name`: the name of the field
-- `type`: the numeric id of the type held in this field
-
-#### Enums
-
-As with structs, this format assumes that all enumeration types are user defined. The descriptor for an enum contains the following fields:
-
-- `declaration`: A dictionary describing the definition site of the enum, see elow.
-- `name`: the name of the enum, without the `enum` keyword and without any contract qualifiers.
-- `members`: A list of members of the enum, as strings.
-
-The order of elements within `members` is significant, and should match the order that members of the enum are declared in the source file.
-
-#### Contracts
-
-The contract type refers to a primitive value that is known/expected to be an address of a contract deployed on the blockchain
-which implements the given type. It contains the following field:
-
-- `contractDeclaration`: The AST id of the declaration of the contract type.
-- `name`: A string holding the (fully qualified) name of the contract type.
-
-**Discussion** It is unclear to me whether this should actually be separate from primitives. I lean towards no, but it is presented this
-way to prompt discussion. Note that this format assumes that the declaration of the contract type is "visible" to the compiler
-during compilation and thus the declaration site is available for reference.
-
-#### Aliases
-
-As with enums and structs, this format assumes that all aliases are user defined, but this restriction could be relaxed by making the `definitionScope` field optional.
-An alias type descriptor has the following additional fields:
-
-- `aliasName`: The user provided name of the alias type, without qualifiers.
-- `definitionScope`: A dictionary describing the site of the definition, see below
-- `aliasedType`: The numeric id of the type for which this is an alias.
-
-**Discussion**: This could be extended with information such as "is this alias opaque" a la private types in OCaml.
-
-#### Located Types
-
-A "located" type is simply a type that is additionally qualified with a data location, that is, a refinement on some other type to restrict its location.
-A located type has the following fields defined:
-
-- `location`: A string describing EVM data locations. Possible values are `"memory"`, `"storage"`, `"calldata"`, `"returndata"`, `"code"`.
-- `type`: The numeric ID of the type with this location.
-
-It is expected that the type referenced in `type` is not itself a located type, as this would indicate a type like `uint[] calldata memory` which is not
-valid and is never expected to be.
-
-**Discussion**: The lack of a `stack` or `default` location is intentional, but can be added if needed. The choice to separate the location from rest of
-the type was to avoid multiple descriptors for a struct depending on where that struct is located. Under this design, there is a single definition for the
-shape of the struct, and the different data locations of that struct are handled by located type descriptors.
-
-#### Definition Scopes
-
-To provide information about where a user defined type was declared, the descriptors for those type include a `definitionScope` field.
-This field is a dictionary with the following fields:
-
-- `definitionScope`: A dictionary describing where the type is defined. It has at least the following fields
- - `sort`: a string, either `"file"` indicating a top-level declaration or `"contract"` indicating a type defined within a contract
-- `name`: The string representation of the type name. For struct types this is the name of the struct, and does _not_ include the `struct` keyword, and similarly for enums.
-
-The `definitionScope` dictionary has additional fields depending on the value of `sort`. If it is `"contract"`
-then it has the following field:
-
-- `definingContract`: A dictionary with the following fields:
- - `name`: the source name of the defining contract
- - `astId`: the numeric AST id of the declaration which holds this definition
-
-If the field is `"file"`, then it instead has:
-
-- `definingFile`: A dictionary with the following fields:
- - `name`: The path to the file (John: Fully resolved path? The path as understood by the compiler?)
-
-It is expected that the combination of `definitionScope` and `name` is unique within the `types` array
-(otherwise we would have multiple declarations in the same scope).
-
-#### Unresolved Questions
-
-What about generics? Do we want to try to describe their format before any implementation is ready?
-
-### Bytecode Debug Information
-
-The debug information for the bytecode is a dictionary of bytecode offsets to debug information. It is **not**
-required that every opcode in the bytecode has a corresponding entry in the debug dictionary. Implementers
-are encouraged, however, to have as much coverage as possible. Each entry in the debug information dictionary
-is itself a dictionary that (optionally) includes some of the following:
-
-- The source location(s) that "correspond" to the opcode
-- The AST ID(s) that "correspond" to the opcode
-- The layout of the stack, including type information and local variable names (if available)
-- Jump target information (if available/applicable)
-- Identification of mapping key information
-
-In the above "correspond" roughly means "what source code caused the generation of this opcode".
-
-Specifically the dictionary may have the following fields:
-
-- `source`: a list of source location specifiers. The format of these source location specifiers should be decided later. Every element should provide the location of the textual source code
- that contributed to the generation of this opcode.
-- `ast`: A list of AST ids for the "closest" AST node that contributed to the generation of this opcode.
-- `stack` A layout of the stack as understood by the compiler, represented as a list.
-- `jumps`: If present, provides hints about the location being jumped to by a jumping command (JUMP or JUMPI)
-- `mappings`: If present, contains information about how the opcode relates to mapping keys.
-
-#### Source Locations
-
-The choice of which source location should be attached to each opcode is likely an inexact science. However, implementers are encouraged to be as exact as possible: while it
-is technically correct to give the entirety of the contract file as the "source" of every opcode, this is not a useful result. Consumers of this information should also take care
-to assume that source code operations may map to (surprising) AST ids. For example, an optimizing compiler may tag a `PUSH` of a constant `16` with the AST id of the following expression
-`(5 + 11)`. An even more aggressive optimizing compiler could even tag the same push with the AST ids of the literals `5` and `11` in the following `(5 + x) + 11`.
-
-#### Stack Information
-
-Given internal function calls, the format will not (and cannot) represent the entire stack at every point during execution; a program can be reached at many different stack depths.
-However, it is expected that all compilers will have a view of some "prefix" of the stack at each program point analogous to an activation frame in low-level assembly code.
-The list contained in the `stack` field exposes this view; consumers can combine this information with the `jumps` information to build a complete representation of the stack.
-
-The list is ordered such that the first element provides information about the top of the stack, the second element is the next element below it, and so on. Each element is a dictionary
-with the following fields:
-
-- `type`: The type of the value stored in this stack slot. This is _not_ a reference to a type descriptor or an embedding of the type descriptor, see below.
-- `sourceName`: A nullable string representation of the identifier held in this stack slot. A value of null indicates that the value does not come from any single identifier.
-- `sourceId`: A nullable numerical AST id that holds the definition (John: declaration?) of the identifier held in this stack slot. A value of null indicates the value does not come from
- any single identifier.
-
-Note that due to `dup` commands, multiple stack locations may hold the same variable name. If a compiler knows that a stack slot that holds
-a variable will be later overwritten with a new value, it should mark the to be overwritten value with the "junk" type (see below).
-
-The `type` dictionary provides information about the value stored in the stack slot. The types used here are a superset of the types described by type descriptors.
-
-The `type` dictionary has the following field:
-
-- `sort`: A string indicating the sort of value stored in the stack slot, drawn from one of the following values:
- - `"junk"` indicates a value that is dead or about to be popped.
- - `"pc"` A refinement of the numeric type, indicating the slot holds a location which is a jump destination target
- - `"program"` The stack slot holds a value with a "program" type, i.e., one that can be expressed using type descriptors.
- - `"internal"` Indicates that the stack slot holds a value that is being used by the compiler but does not correspond to a user type.
-
-The dictionaries for `pc` and `junk` sorts do not have any additional information. The `internal` type is to be used for, e.g., "scratch" pointers that are used to
-marshal calldata buffers or hash storage keys. Compilers may insert their own information into the `internal` dictionary but this format remains intentionally agnostic
-on these contents. (John: every time a standard has allowed a "vendor specific" extension, it goes badly. Maybe we want to just say, consumers shouldn't look at this field)
-
-If the `sort` is `"program"` then the dictionary has the following field:
-
-- `typeId`: The numeric ID of the type held in this slot
-
-Additionally, the compiler may insert a field to provide additional information about the representation on the stack. This field, if present, has the name `representation` and holds a dictionary.
-This dictionary has the following optional fields:
-
-- `published`: A boolean field which, if present, indicates that this stack slot holds a pointer to some location in memory/storage. Further, if the field is true, then the object is "fully initialized" (the formal definition of
- fully initialized is to be decided on later)
-- `componentOf`: If the representation of a single value spans multiple stack slots, this field provides information about how the value is spread across the stack. It is a dictionary with the following fields:
- - `id`: an ID unique within each stack list. All stack slots with the same value of `id` are considered to represent the same logical value. It is allowed to re-use the same ID in different entries of the `stack` list.
- - `componentName`: The name of the component. The only known use case for this is the decomposition of calldata arrays, so there are two possible values `"ELEM_PTR"` and `"LENGTH"` indicating the stack slots hold the pointer to the calldata location of the array's elements or the logical length of the array respectively.
-
-#### Jumps
-
-For jumping commands, the `jumps` field provides information about the expected target of the jump, and information about the internal function stack.
-
-The value of the `jumps` field is a dictionary with the following (potentially optional) fields:
-
-- `targets`: if present, a list of known PCs to which this command may jump. For JUMPI, this does **not** include the fallthrough case, as this is readily computable. This list may be non-singleton due to,
- e.g., function pointers, but the compiler is able to restrict the potential callees.
-- `sort`: A string indicating the type of jump being performed. One of the following values:
- - `"return"`: Used for a jump out of an internal function
- - `"call"`: Used for a jump into an internal function
- - `"normal"`: Used for all other jumps
-
-**Discussion**: It may be useful to ask compilers to provide richer information about some jumps. For example, tagging a loop exit as a "break" or a backjump as a "continue". This may be redundant given sufficiently
-reliable source information however.
-
-As elsewhere, the dictionary may contain additional fields depending on the value in `sort`.
-
-If the value is `"call"`, then the dictionary contains the following fields:
-
-- `arguments`: A list describing the calling convention. As in the `stack` layout, the first element of this list describes the value on the top of the stack (**after** popping the jump destination). Each element is a
- dictionary described below.
-
-If the callee of the call is known, then the dictionary with sort `"call"` has the following field:
-
-- `callee`: a dictionary with the following fields:
- - `target`: a human readable string name for the function being called
- - `astId`: the AST id of the declaration site of the callee
-
-Note that if the function being called is `virtual` then the declaration site may not have any corresponding body.
-
-Each element of the `arguments` array is a dictionary with the following fields:
-
-- `sort`: `"program"` or `"return_address"`. `"program"` has the same interpretation as in the `type` dictionary above. `"return_address"` is a refinement of the `pc` type indicating this stack slot holds
- the return address of the call being performed.
-- `position`: The logical position of the **parameter** represented by this stack value. The ordering of parameters is defined by their program declaration order, where the first formal parameter to a function has position `0`,
- the next `1`, etc. As with the stack, a single logical argument can be spread across multiple stack slots. If multiple entries share the same `position` value, then those arguments
- should have a `representation` field that has a `componentOf` entry.
-
-**Note**
-Due to named arguments, the order given in the debug information may not match the order of parameters as they appear at a call-site. For example, given a declaration:
-
-```
-function myFunction(uint a, uint b) ...
-```
-
-and an invocation:
-
-```
-myFunction(b = 3, a = 4)
-```
-
-the stack location which contains the `4` argument value will be tagged with position `0`, as that is the position of parameter `a` in the declaration.
-
-If the value of `sort` is `"return"`, then the dictionary has the following field:
-
-- `returns`: A list of dictionaries with the same format as the `arguments` array of `call`, but without any `return_address` entries.
-
-**Discussion**: The above proposal doesn't really handle the case of "tail-calls" identified at the beginning of this document, where multiple return addresses can be pushed onto the stack. Is that something the debug format must explicitly model?
-
-#### Mapping key identification
-
-The value of this field (when present) is a dictionary with (some of) the following fields:
-
-- `isMappingHash`: A boolean that identifies whether the opcode is computing a hash for a mapping.
-- `isMappingPreHash`: For mappings that use two hashes, this boolean can identify whether the opcode is computing the first of the two hashes. Possibly this field should be combined with a previous one into some sort of enum?
-- `mappingHashFormat`: An enumeration; specifies the format of what gets hashed for the mapping. Formats could include "prefix" (for Solidity), "postfix" (for Vyper value types), and "postfix-prehashed" (for Vyper strings and bytestrings). Possibly "prefix" could be split further into "prefix-padded" (for Solidity value types) and "prefix-unpadded" (for Solidity strings and bytestrings). This could be expanded in the future if necessary. (Also, potentially `"prefix-padded"`, if split out, could be broken down even further, by padding type -- zero padding (left) vs sign-padding vs zero-padding (right)...)
diff --git a/packages/web/docusaurus.config.ts b/packages/web/docusaurus.config.ts
index 3abec10e5..7218db98c 100644
--- a/packages/web/docusaurus.config.ts
+++ b/packages/web/docusaurus.config.ts
@@ -1,5 +1,6 @@
import { themes as prismThemes } from "prism-react-renderer";
import path from "path";
+import webpack from "webpack";
import type { Config } from "@docusaurus/types";
import type * as Preset from "@docusaurus/preset-classic";
import type { Configuration } from "webpack";
@@ -59,12 +60,17 @@ const config: Config = {
},
],
},
+ plugins: [
+ new webpack.ProvidePlugin({
+ Buffer: ["buffer", "Buffer"],
+ }),
+ ],
resolve: {
alias: {
react: path.resolve("../../node_modules/react"),
},
fallback: {
- buffer: false,
+ buffer: require.resolve("buffer/"),
util: false,
},
fullySpecified: false,
@@ -168,7 +174,7 @@ const config: Config = {
},
{
label: "Known challenges",
- to: "/docs/known-challenges",
+ to: "/docs/reference/challenges",
},
],
},
diff --git a/packages/web/package.json b/packages/web/package.json
index cdd5a76fe..8cbd3b74d 100644
--- a/packages/web/package.json
+++ b/packages/web/package.json
@@ -21,8 +21,13 @@
"@docusaurus/preset-classic": "^3.9.2",
"@docusaurus/tsconfig": "^3.9.2",
"@docusaurus/types": "^3.9.2",
+ "@ethdebug/bugc": "^0.1.0-0",
+ "@ethdebug/bugc-react": "^0.1.0-0",
+ "@ethdebug/evm": "^0.1.0-0",
"@ethdebug/format": "^0.1.0-0",
"@ethdebug/pointers": "^0.1.0-0",
+ "@ethdebug/pointers-react": "^0.1.0-0",
+ "@ethdebug/programs-react": "^0.1.0-0",
"@fortawesome/fontawesome-svg-core": "^6.7.2",
"@fortawesome/free-brands-svg-icons": "^6.7.2",
"@fortawesome/free-solid-svg-icons": "^6.7.2",
@@ -33,6 +38,7 @@
"@saucelabs/theme-github-codeblock": "^0.2.3",
"@shikijs/themes": "^2.5.0",
"ajv": "^8.17.1",
+ "buffer": "^6.0.3",
"clsx": "^1.2.1",
"docusaurus-json-schema-plugin": "^1.15.0",
"prism-react-renderer": "^2.4.1",
diff --git a/packages/web/spec/overview.mdx b/packages/web/spec/overview.mdx
index e66b3e729..a5b1e3c45 100644
--- a/packages/web/spec/overview.mdx
+++ b/packages/web/spec/overview.mdx
@@ -4,7 +4,7 @@ sidebar_position: 1
# Specification overview
-The **ethdebug format** is currently in the design phase and specification is
+**ethdebug/format** is currently in the design phase and specification is
only just beginning.
## Contents
@@ -31,6 +31,12 @@ This specification currently contains the following primary schemas:
annotated compiled bytecode.
+
[**ethdebug/format/info**](/spec/info/overview)
+
+ A schema for bundling all debug information for a compilation, including
+ programs, shared types, sources, and compilation metadata.
+
+
In addition, this format defines namespaces containing schemas for common
diff --git a/packages/web/spec/pointer/concepts.mdx b/packages/web/spec/pointer/concepts.mdx
index 8e48f22fb..da47aa746 100644
--- a/packages/web/spec/pointer/concepts.mdx
+++ b/packages/web/spec/pointer/concepts.mdx
@@ -110,8 +110,7 @@ involved in a complex allocation.
Besides representing byte offsets, word addresses, byte range lengths, etc.
using just unsigned integer literals, this schema also allows representing
addressing details via numeric operations, references to named regions,
-explicit EVM lookup, and so on.
-reference to other regions by name, and a few builtin operations.
+explicit EVM lookup, and a few builtin operations.
diff --git a/packages/web/src/components/SchemaExample.module.css b/packages/web/src/components/SchemaExample.module.css
new file mode 100644
index 000000000..c181b7b9f
--- /dev/null
+++ b/packages/web/src/components/SchemaExample.module.css
@@ -0,0 +1,60 @@
+.container {
+ margin-bottom: 1rem;
+}
+
+.header {
+ display: flex;
+ align-items: center;
+ justify-content: space-between;
+ gap: 1rem;
+ padding: 0.5rem 0.75rem;
+ background: var(--ifm-color-emphasis-100);
+ border: 1px solid var(--ifm-color-emphasis-200);
+ border-bottom: none;
+ border-radius: 6px 6px 0 0;
+ font-size: 0.8125rem;
+}
+
+.title {
+ font-weight: 500;
+ color: var(--ifm-font-color-base);
+}
+
+.schemaLink {
+ display: inline-flex;
+ align-items: center;
+ gap: 0.35rem;
+ text-decoration: none;
+ margin-left: auto;
+}
+
+.schemaLink:hover {
+ text-decoration: none;
+}
+
+.schemaLink:hover .schemaName {
+ text-decoration: underline;
+}
+
+.schemaLabel {
+ color: var(--ifm-color-emphasis-600);
+ font-size: 0.8125rem;
+}
+
+.schemaName {
+ font-weight: 600;
+ font-size: 0.8125rem;
+ color: var(--ifm-color-primary);
+}
+
+/* Remove the top border radius from the code block */
+.container :global(.theme-code-block) {
+ border-top-left-radius: 0;
+ border-top-right-radius: 0;
+}
+
+/* Dark mode */
+[data-theme="dark"] .header {
+ background: var(--ifm-color-emphasis-100);
+ border-color: var(--ifm-color-emphasis-300);
+}
diff --git a/packages/web/src/components/SchemaExample.tsx b/packages/web/src/components/SchemaExample.tsx
new file mode 100644
index 000000000..1cf366c21
--- /dev/null
+++ b/packages/web/src/components/SchemaExample.tsx
@@ -0,0 +1,56 @@
+import React from "react";
+import Link from "@docusaurus/Link";
+import CodeBlock from "@theme/CodeBlock";
+import styles from "./SchemaExample.module.css";
+
+export interface SchemaExampleProps {
+ /**
+ * The schema ID (e.g., "ethdebug/format/type", "pointer/region/memory")
+ * Can be a short form like "type" or full form like "ethdebug/format/type"
+ */
+ schema: string;
+ /**
+ * The URL path to the spec page for this schema
+ */
+ href: string;
+ /**
+ * The JSON content to display (as a string or object)
+ */
+ children: string | object;
+ /**
+ * Optional title for the example
+ */
+ title?: string;
+}
+
+/**
+ * A code block with a schema badge linking to the specification.
+ * Use this for JSON examples that conform to a specific schema.
+ */
+export default function SchemaExample({
+ schema,
+ href,
+ children,
+ title,
+}: SchemaExampleProps): JSX.Element {
+ const code =
+ typeof children === "string" ? children : JSON.stringify(children, null, 2);
+
+ // Ensure schema displays with full "ethdebug/format/" prefix
+ const displaySchema = schema.startsWith("ethdebug/format/")
+ ? schema
+ : `ethdebug/format/${schema}`;
+
+ return (
+