diff --git a/ax/generators/torch/botorch_modular/acquisition.py b/ax/generators/torch/botorch_modular/acquisition.py index 6e151412f52..31522f0cd62 100644 --- a/ax/generators/torch/botorch_modular/acquisition.py +++ b/ax/generators/torch/botorch_modular/acquisition.py @@ -8,7 +8,6 @@ from __future__ import annotations -import math import operator from collections.abc import Callable from functools import partial, reduce @@ -47,7 +46,12 @@ optimize_acqf_discrete_local_search, optimize_acqf_mixed, ) -from botorch.optim.optimize_mixed import optimize_acqf_mixed_alternating +from botorch.optim.optimize_mixed import ( + MAX_CARDINALITY_FOR_LOCAL_SEARCH, + MAX_CHOICES_ENUMERATE, + optimize_acqf_mixed_alternating, + should_use_mixed_alternating_optimizer, +) from botorch.optim.parameter_constraints import evaluate_feasibility from botorch.utils.constraints import get_outcome_constraint_transforms from pyre_extensions import none_throws @@ -63,13 +67,6 @@ logger: Logger = get_logger(__name__) -# For fully discrete search spaces. -MAX_CHOICES_ENUMERATE = 10_000 -MAX_CARDINALITY_FOR_LOCAL_SEARCH = 100 -# For mixed search spaces. -ALTERNATING_OPTIMIZER_THRESHOLD = 10 - - def determine_optimizer( search_space_digest: SearchSpaceDigest, acqf: AcquisitionFunction | None = None, @@ -119,17 +116,12 @@ def determine_optimizer( else: optimizer = "optimize_acqf_discrete" else: - n_combos = math.prod([len(v) for v in discrete_choices.values()]) - # If there are less than `ALTERNATING_OPTIMIZER_THRESHOLD` combinations of - # discrete choices, we will use `optimize_acqf_mixed`, which enumerates all - # discrete combinations and optimizes the continuous features with discrete - # features being fixed. Otherwise, we will use - # `optimize_acqf_mixed_alternating`, which alternates between - # continuous and discrete optimization steps. - if n_combos <= ALTERNATING_OPTIMIZER_THRESHOLD: - optimizer = "optimize_acqf_mixed" - else: + # For mixed (not fully discrete) search spaces, use the shared utility + # from BoTorch to determine whether to use mixed alternating optimizer. + if should_use_mixed_alternating_optimizer(discrete_dims=discrete_choices): optimizer = "optimize_acqf_mixed_alternating" + else: + optimizer = "optimize_acqf_mixed" return optimizer diff --git a/tutorials/bonsai/bonsai_tutorial.ipynb b/tutorials/bonsai/bonsai_tutorial.ipynb new file mode 100644 index 00000000000..6786b26f5e9 --- /dev/null +++ b/tutorials/bonsai/bonsai_tutorial.ipynb @@ -0,0 +1,1109 @@ +{ + "metadata": { + "kernelspec": { + "name": "python3", + "display_name": "python3", + "language": "python", + "isCinder": true + }, + "fileHeader": "" + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "metadata": [], + "originalKey": "af596e9d-b89a-48e9-bca4-c589607e8de2", + "showInput": false, + "outputsInitialized": false, + "bentoAICellStatus": "none", + "isCommentPanelOpen": false, + "language": "markdown" + }, + "source": [ + "# BONSAI + MAP-SAAS Tutorial: \n", + "\n", + "This tutorial demonstrates how to use **BONSAI** (Bayesian Optimization with Natural Simplicity and Interpretability) with **MAP-SAAS** to optimize the high-dimensional Hartmann50 benchmark problem and simplify proposals in order to only make necessary chagnes from the default (status quo) parameter values.\n", + "\n", + "## Overview\n", + "\n", + "- **Hartmann50**: A 50-dimensional synthetic benchmark where only 6 dimensions are relevant (the true Hartmann function), and 44 dimensions are \"dummy\" irrelevant dimensions.\n", + "- **BONSAI**: A Bayesian optimization method that removes irrelevant parameter changes to simplify proposals from Ax. This simplifies the proposals so that they change fewer parameters, making the proposals more interpretable and more likely to avoid regressions in metrics not captured in the optimization objective.\n", + "- **MAP-SAAS**: A fast Gaussian process model that has a SAAS (sparsity) prior.\n", + "\n", + "This combination is particularly powerful for high-dimensional problems with low effective dimensionality.\n", + "\n", + "These methods were proposed in **Daulton, et al. BONSAI: Bayesian Optimization with Natural Simplicity and Interpretabiltiy, ArXiv, 2026**." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "metadata": [], + "originalKey": "84a0e4c4-867f-416e-91d9-5c123e65355d", + "showInput": false, + "outputsInitialized": false, + "bentoAICellStatus": "none", + "isCommentPanelOpen": false, + "language": "markdown" + }, + "source": [ + "## 1. Imports" + ] + }, + { + "cell_type": "code", + "metadata": { + "metadata": [], + "originalKey": "dce8174d-f2c7-4fe9-9509-ac6d933c5d95", + "outputsInitialized": true, + "bentoAICellStatus": "none", + "isCommentPanelOpen": false, + "bentoCellName": { + "name": "Import Libraries and Check CUDA", + "origin": "ai" + }, + "language": "python", + "executionStartTime": 1769816272615, + "executionStopTime": 1769816283140, + "serverExecutionDuration": null, + "collapsed": false, + "requestMsgId": "6c9a23b5-6c5d-4eb9-a63c-6fe850d63a34" + }, + "source": [ + "import numpy as np\n", + "import torch\n", + "\n", + "from ax.api.client import Client\n", + "from ax.api.configs import RangeParameterConfig\n", + "from ax.api.utils.generation_strategy_dispatch import choose_generation_strategy\n", + "from ax.api.utils.structs import GenerationStrategyDispatchStruct\n", + "\n", + "# Model configuration\n", + "from ax.generators.torch.botorch_modular.surrogate import ModelConfig\n", + "\n", + "# BoTorch model (the key component for BONSAI)\n", + "from botorch.models.map_saas import EnsembleMapSaasSingleTaskGP\n", + "\n", + "print(f\"Using torch version: {torch.__version__}\")\n", + "print(f\"CUDA available: {torch.cuda.is_available()}\")" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "metadata": [], + "originalKey": "db2e5191-a913-4699-8f16-45f21ec4f9e4", + "showInput": false, + "outputsInitialized": false, + "bentoAICellStatus": "none", + "isCommentPanelOpen": false, + "language": "markdown" + }, + "source": [ + "## 2. Understanding the Components\n", + "\n", + "### 2.1 Hartmann50 Problem\n", + "\n", + "The Hartmann50 problem is a 50-dimensional optimization problem where:\n", + "- The first 6 dimensions contain the actual Hartmann function (which has 6 local minima)\n", + "- The remaining 44 dimensions are \"dummy\" and do not affect the objective value\n", + "- This makes it an ideal test case for algorithms that can identify and focus on relevant dimensions\n", + "- The global minimum is approximately -3.32237\n", + "\n", + "### 2.2 A MAP-SAAS model (`EnsembleMapSaasSingleTaskGP`)\n", + "\n", + "This is a Gaussian Process model that uses an ensemble of independent GPs with different samples of the global sparsity level (integrating over the global sparsity level). It uses Maximum A Posteriori (MAP) estimation for fitting each member in the ensemble, which is significantly faster than using MCMC as in SAASBO. It levers the same sparsity prior as SAASBO, but is significantly faster.\n", + "\n", + "### 2.3 BONSAI\n", + "\n", + "BONSAI (Bayesian Optimization with Natural Simplicity and Interpretability) is a technique for post-processing candidates generated by BO to prune irrelevant parameter changes from the default (status quo or target) values. It is compatabile with any acquisition function and is easily enabled by specifying `simplify_parameter_changes=True` in Ax." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "metadata": [], + "originalKey": "0a5d4351-5dd7-46ae-aa04-c5def74a372a", + "showInput": false, + "outputsInitialized": false, + "bentoAICellStatus": "none", + "isCommentPanelOpen": false, + "language": "markdown" + }, + "source": [ + "## 3. Set Up the Hartmann50 Optimization Problem\n", + "\n", + "We'll create a Client and configure the experiment with 50 parameters." + ] + }, + { + "cell_type": "code", + "metadata": { + "metadata": [], + "originalKey": "edc7cc31-608c-41e6-8201-7f201bdbcae2", + "outputsInitialized": true, + "bentoAICellStatus": "none", + "isCommentPanelOpen": false, + "bentoCellName": { + "name": "Configure Optimization Experiment", + "origin": "ai" + }, + "language": "python", + "executionStartTime": 1769816290575, + "executionStopTime": 1769816290835, + "serverExecutionDuration": 33.855004934594, + "collapsed": false, + "requestMsgId": "7daa59f2-ee6c-4aa7-9b16-9ec09d5dac30" + }, + "source": [ + "# Create a client\n", + "client = Client()\n", + "\n", + "# Define 50 parameters (x0 through x49) in the unit hypercube [0, 1]\n", + "parameters = [\n", + " RangeParameterConfig(\n", + " name=f\"x{i}\",\n", + " parameter_type=\"float\",\n", + " bounds=(0.0, 1.0),\n", + " )\n", + " for i in range(50)\n", + "]\n", + "\n", + "# Configure the experiment\n", + "client.configure_experiment(parameters=parameters)\n", + "\n", + "# Define the center of the search space as the pruning target\n", + "# Parameters that are \"pruned\" will be set to these default values\n", + "pruning_target = {f\"x{i}\": 0.5 for i in range(50)}\n", + "\n", + "# Configure optimization to minimize the objective\n", + "metric_name = \"hartmann\"\n", + "objective = f\"-{metric_name}\" # Negative sign indicates minimization\n", + "client.configure_optimization(\n", + " objective=objective,\n", + " pruning_target_parameterization=pruning_target,\n", + ")\n", + "\n", + "print(f\"Experiment configured with {len(parameters)} parameters\")\n", + "print(f\"Objective: minimize {metric_name}\")\n", + "print(f\"Pruning target: center of search space (0.5 for all parameters)\")" + ], + "execution_count": 29, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Experiment configured with 50 parameters\nObjective: minimize hartmann\nPruning target: center of search space (0.5 for all parameters)\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "metadata": [], + "originalKey": "c20cd49d-3c01-4a6b-ba37-80193268892e", + "showInput": false, + "outputsInitialized": false, + "bentoAICellStatus": "none", + "isCommentPanelOpen": false, + "language": "markdown" + }, + "source": [ + "## 4. Define the Hartmann50 Objective Function\n", + "\n", + "The Hartmann50 function uses the 6D Hartmann function on the first 6 dimensions, with 44 dummy dimensions that don't affect the output." + ] + }, + { + "cell_type": "code", + "metadata": { + "metadata": [], + "originalKey": "6fd90df3-758d-4f97-bf95-77c010b299a2", + "outputsInitialized": true, + "bentoAICellStatus": "none", + "isCommentPanelOpen": false, + "bentoCellName": { + "name": "Evaluate Hartmann Function", + "origin": "ai" + }, + "language": "python", + "executionStartTime": 1769816295444, + "executionStopTime": 1769816295658, + "serverExecutionDuration": 6.3301690388471, + "collapsed": false, + "requestMsgId": "35cc934b-7d0b-4311-8daf-ef440375478e" + }, + "source": [ + "from botorch.test_functions import Hartmann\n", + "\n", + "# Create the 6D Hartmann function\n", + "hartmann_6d: Hartmann = Hartmann(dim=6, negate=False)\n", + "\n", + "\n", + "def hartmann50(**parameters) -> float:\n", + " \"\"\"Evaluate the Hartmann50 function.\n", + "\n", + " Only the first 6 dimensions (x0-x5) affect the output.\n", + " The remaining 44 dimensions (x6-x49) are ignored.\n", + "\n", + " Args:\n", + " **parameters: Dict of parameter values (x0 through x49)\n", + "\n", + " Returns:\n", + " The Hartmann function value (to be minimized).\n", + " \"\"\"\n", + " # Extract the first 6 parameters that actually matter\n", + " x = torch.tensor([[parameters[f\"x{i}\"] for i in range(6)]], dtype=torch.double)\n", + " return hartmann_6d(x).item()\n", + "\n", + "\n", + "# Test the function\n", + "test_params = {f\"x{i}\": 0.5 for i in range(50)}\n", + "print(f\"Test evaluation at center: {hartmann50(**test_params):.4f}\")\n", + "print(\"Global optimum is approximately: -3.32237\")" + ], + "execution_count": 30, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Test evaluation at center: -0.5053\nGlobal optimum is approximately: -3.32237\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "metadata": [], + "originalKey": "711b63d2-76d1-4550-933f-b9719c399fd7", + "showInput": false, + "outputsInitialized": false, + "bentoAICellStatus": "none", + "isCommentPanelOpen": false, + "language": "markdown" + }, + "source": [ + "## 5. Configure the Generation Strategy with BONSAI and MAP-SAAS\n", + "\n", + "We use `choose_generation_strategy` with `GenerationStrategyDispatchStruct(method=\"custom\", simplify_parameter_changes=True)` to specify that we want to use BONSAI and we specify to use `EnsembleMapSaasSingleTaskGP`` to leverage MAP-SAAS." + ] + }, + { + "cell_type": "code", + "metadata": { + "metadata": [], + "originalKey": "b9311525-5e75-4c73-bf6f-7dc9335606bf", + "outputsInitialized": true, + "bentoAICellStatus": "none", + "isCommentPanelOpen": false, + "bentoCellName": { + "name": "Configure BONSAI Strategy", + "origin": "ai" + }, + "language": "python", + "executionStartTime": 1769817000911, + "executionStopTime": 1769817001186, + "serverExecutionDuration": 7.064376026392, + "collapsed": false, + "requestMsgId": "bb78e4ac-d972-45cf-95cc-1b77c1dba7fd" + }, + "source": [ + "# Configuration parameters\n", + "NUM_SOBOL_TRIALS = 10 # Number of initial quasi-random trials\n", + "\n", + "# Configure the model for BONSAI with MAP-SAAS\n", + "model_config = ModelConfig(\n", + " botorch_model_class=EnsembleMapSaasSingleTaskGP,\n", + " name=\"BONSAI\",\n", + ")\n", + "\n", + "# Create the BONSAI generation strategy using choose_generation_strategy\n", + "generation_strategy = choose_generation_strategy(\n", + " struct=GenerationStrategyDispatchStruct(\n", + " method=\"custom\",\n", + " initialization_budget=NUM_SOBOL_TRIALS,\n", + " initialize_with_center=True,\n", + " simplify_parameter_changes=True,\n", + " ),\n", + " model_config=model_config,\n", + ")\n", + "\n", + "# Set the generation strategy on the client\n", + "client.set_generation_strategy(generation_strategy=generation_strategy)\n", + "\n", + "print(f\"Generation strategy configured: {generation_strategy.name}\")\n", + "print(\" - 1 Center trial\")\n", + "print(f\" - {NUM_SOBOL_TRIALS - 1} Sobol trials\")\n", + "print(\" - BONSAI with MAP-SAAS\")\n", + "print(\" - simplify_parameter_changes=True (pruning irrelevant dimensions)\")" + ], + "execution_count": 35, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Generation strategy configured: Center+Sobol+MBM:BONSAI\n - 1 Center trial\n - 9 Sobol trials\n - BONSAI with MAP-SAAS\n - simplify_parameter_changes=True (pruning irrelevant dimensions)\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "metadata": [], + "originalKey": "3bf11740-d3c8-4733-bdb5-9ca4e958452c", + "showInput": false, + "outputsInitialized": false, + "bentoAICellStatus": "none", + "isCommentPanelOpen": false, + "language": "markdown" + }, + "source": [ + "## 6. Run the Optimization Loop" + ] + }, + { + "cell_type": "code", + "metadata": { + "metadata": [], + "originalKey": "9b9a3d2b-3c4f-49a7-8454-9c996ea68eb6", + "outputsInitialized": true, + "bentoAICellStatus": { + "status": "none" + }, + "isCommentPanelOpen": false, + "bentoCellName": { + "name": "Initialize Optimization Process", + "origin": "ai" + }, + "language": "python", + "executionStartTime": 1769816304440, + "executionStopTime": 1769816496707, + "serverExecutionDuration": 191986.367804, + "collapsed": false, + "requestMsgId": "83ccc26c-988f-4297-b2ce-2f4e99a4f5cc" + }, + "source": [ + "import logging\n", + "\n", + "# Set the Ax logger to show only warnings and errors.\n", + "logging.getLogger(\"ax.api.client\").setLevel(logging.WARNING)\n", + "\n", + "# Total number of trials\n", + "TOTAL_TRIALS = 50\n", + "\n", + "# Track best values for visualization\n", + "best_values = []\n", + "all_values = []\n", + "current_best = float(\"inf\")\n", + "\n", + "print(f\"Starting optimization with {TOTAL_TRIALS} trials...\")\n", + "print(\"-\" * 60)\n", + "\n", + "for trial_idx in range(TOTAL_TRIALS):\n", + " # Get the next trial(s) from the generation strategy\n", + " trials = client.get_next_trials(max_trials=1)\n", + "\n", + " for index, parameters in trials.items():\n", + " # Evaluate the objective function\n", + " result = hartmann50(**parameters)\n", + " all_values.append(result)\n", + "\n", + " # Update best value (we're minimizing)\n", + " if result < current_best:\n", + " current_best = result\n", + " improvement_marker = \" *NEW BEST*\"\n", + " else:\n", + " improvement_marker = \"\"\n", + "\n", + " best_values.append(current_best)\n", + "\n", + " # Report the result back to Ax\n", + " client.complete_trial(\n", + " trial_index=index,\n", + " raw_data={metric_name: result},\n", + " )\n", + "\n", + " # Determine which phase we're in\n", + " if trial_idx == 0:\n", + " phase = \"Center\"\n", + " elif trial_idx < NUM_SOBOL_TRIALS:\n", + " phase = \"Sobol\"\n", + " else:\n", + " phase = \"BONSAI\"\n", + "\n", + " # Print progress (every 5 trials or when there's improvement)\n", + " if trial_idx % 5 == 0 or improvement_marker:\n", + " print(\n", + " f\"Trial {trial_idx + 1:3d}/{TOTAL_TRIALS} [{phase:6s}]: \"\n", + " f\"value = {result:8.4f}, best = {current_best:8.4f}{improvement_marker}\"\n", + " )\n", + "\n", + "print(\"-\" * 60)\n", + "print(\"Optimization complete!\")\n", + "print(f\"Best value found: {current_best:.4f}\")\n", + "print(\"Global optimum: -3.32237\")\n", + "print(f\"Gap to optimum: {current_best - (-3.32237):.4f}\")" + ], + "execution_count": 32, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Starting optimization with 50 trials...\n------------------------------------------------------------\nTrial 1/50 [Center]: value = -0.5053, best = -0.5053 *NEW BEST*\nTrial 4/50 [Sobol ]: value = -1.2127, best = -1.2127 *NEW BEST*\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Trial 6/50 [Sobol ]: value = -0.0301, best = -1.2127\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[W 260130 15:38:34 cholesky:40] A not p.d., added jitter of 1.0e-08 to the diagonal\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Trial 11/50 [BONSAI]: value = -0.7886, best = -1.2127\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[W 260130 15:38:36 cholesky:40] A not p.d., added jitter of 1.0e-08 to the diagonal\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[W 260130 15:38:40 cholesky:40] A not p.d., added jitter of 1.0e-08 to the diagonal\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[W 260130 15:38:45 cholesky:40] A not p.d., added jitter of 1.0e-08 to the diagonal\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[W 260130 15:38:47 cholesky:40] A not p.d., added jitter of 1.0e-08 to the diagonal\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[W 260130 15:38:49 cholesky:40] A not p.d., added jitter of 1.0e-08 to the diagonal\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Trial 16/50 [BONSAI]: value = -0.7900, best = -1.2127\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[W 260130 15:38:52 cholesky:40] A not p.d., added jitter of 1.0e-08 to the diagonal\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Trial 17/50 [BONSAI]: value = -2.3504, best = -2.3504 *NEW BEST*\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[W 260130 15:38:54 cholesky:40] A not p.d., added jitter of 1.0e-08 to the diagonal\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[W 260130 15:38:57 cholesky:40] A not p.d., added jitter of 1.0e-08 to the diagonal\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[W 260130 15:38:59 cholesky:40] A not p.d., added jitter of 1.0e-08 to the diagonal\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[W 260130 15:39:01 cholesky:40] A not p.d., added jitter of 1.0e-08 to the diagonal\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Trial 21/50 [BONSAI]: value = -2.5353, best = -2.5353 *NEW BEST*\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[W 260130 15:39:04 cholesky:40] A not p.d., added jitter of 1.0e-08 to the diagonal\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[W 260130 15:39:06 cholesky:40] A not p.d., added jitter of 1.0e-08 to the diagonal\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[W 260130 15:39:08 cholesky:40] A not p.d., added jitter of 1.0e-08 to the diagonal\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[W 260130 15:39:10 cholesky:40] A not p.d., added jitter of 1.0e-08 to the diagonal\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Trial 25/50 [BONSAI]: value = -2.8627, best = -2.8627 *NEW BEST*\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[W 260130 15:39:12 cholesky:40] A not p.d., added jitter of 1.0e-08 to the diagonal\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Trial 26/50 [BONSAI]: value = -2.0271, best = -2.8627\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Trial 27/50 [BONSAI]: value = -2.9566, best = -2.9566 *NEW BEST*\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[W 260130 15:39:17 cholesky:40] A not p.d., added jitter of 1.0e-08 to the diagonal\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Trial 28/50 [BONSAI]: value = -2.9679, best = -2.9679 *NEW BEST*\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[W 260130 15:39:18 cholesky:40] A not p.d., added jitter of 1.0e-08 to the diagonal\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[W 260130 15:39:21 cholesky:40] A not p.d., added jitter of 1.0e-08 to the diagonal\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[W 260130 15:39:23 cholesky:40] A not p.d., added jitter of 1.0e-08 to the diagonal\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Trial 31/50 [BONSAI]: value = -2.9697, best = -2.9697 *NEW BEST*\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[W 260130 15:39:28 cholesky:40] A not p.d., added jitter of 1.0e-08 to the diagonal\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[W 260130 15:39:38 cholesky:40] A not p.d., added jitter of 1.0e-08 to the diagonal\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Trial 33/50 [BONSAI]: value = -2.9826, best = -2.9826 *NEW BEST*\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[W 260130 15:39:57 cholesky:40] A not p.d., added jitter of 1.0e-08 to the diagonal\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[W 260130 15:40:06 cholesky:40] A not p.d., added jitter of 1.0e-08 to the diagonal\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[W 260130 15:40:11 cholesky:40] A not p.d., added jitter of 1.0e-08 to the diagonal\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Trial 36/50 [BONSAI]: value = -2.9701, best = -2.9826\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[W 260130 15:40:15 cholesky:40] A not p.d., added jitter of 1.0e-08 to the diagonal\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[W 260130 15:40:19 cholesky:40] A not p.d., added jitter of 1.0e-08 to the diagonal\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[W 260130 15:40:23 cholesky:40] A not p.d., added jitter of 1.0e-08 to the diagonal\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[W 260130 15:40:26 cholesky:40] A not p.d., added jitter of 1.0e-08 to the diagonal\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[W 260130 15:40:29 cholesky:40] A not p.d., added jitter of 1.0e-08 to the diagonal\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Trial 41/50 [BONSAI]: value = -2.9673, best = -2.9826\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[W 260130 15:40:42 cholesky:40] A not p.d., added jitter of 1.0e-08 to the diagonal\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[W 260130 15:40:55 cholesky:40] A not p.d., added jitter of 1.0e-08 to the diagonal\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[W 260130 15:41:08 cholesky:40] A not p.d., added jitter of 1.0e-08 to the diagonal\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Trial 44/50 [BONSAI]: value = -3.1051, best = -3.1051 *NEW BEST*\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[W 260130 15:41:11 cholesky:40] A not p.d., added jitter of 1.0e-08 to the diagonal\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Trial 45/50 [BONSAI]: value = -3.1220, best = -3.1220 *NEW BEST*\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[W 260130 15:41:19 cholesky:40] A not p.d., added jitter of 1.0e-08 to the diagonal\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Trial 46/50 [BONSAI]: value = -2.8086, best = -3.1220\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[W 260130 15:41:21 cholesky:40] A not p.d., added jitter of 1.0e-08 to the diagonal\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Trial 47/50 [BONSAI]: value = -3.1354, best = -3.1354 *NEW BEST*\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[W 260130 15:41:27 cholesky:40] A not p.d., added jitter of 1.0e-08 to the diagonal\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[W 260130 15:41:32 cholesky:40] A not p.d., added jitter of 1.0e-08 to the diagonal\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[W 260130 15:41:36 cholesky:40] A not p.d., added jitter of 1.0e-08 to the diagonal\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "------------------------------------------------------------\nOptimization complete!\nBest value found: -3.1354\nGlobal optimum: -3.32237\nGap to optimum: 0.1870\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "metadata": [], + "originalKey": "267b4003-c88c-4973-aec8-d421a9a5edc4", + "showInput": false, + "outputsInitialized": false, + "bentoAICellStatus": "none", + "isCommentPanelOpen": false, + "language": "markdown" + }, + "source": [ + "## 7. Visualize Optimization Performance" + ] + }, + { + "cell_type": "code", + "metadata": { + "metadata": [], + "originalKey": "6d04b53f-4dc5-4086-a7c1-27c33c5eab14", + "outputsInitialized": true, + "bentoAICellStatus": { + "status": "none" + }, + "isCommentPanelOpen": false, + "bentoCellName": { + "name": "Plot Optimization Results", + "origin": "ai" + }, + "language": "python", + "executionStartTime": 1769817024748, + "executionStopTime": 1769817025279, + "serverExecutionDuration": 302.34268680215, + "collapsed": false, + "requestMsgId": "c68e5132-4402-452a-95bb-20f13ea1d30c" + }, + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "fig, ax = plt.subplots(figsize=(12, 6))\n", + "\n", + "trials_range = range(1, len(best_values) + 1)\n", + "\n", + "# Plot best values over trials (convergence plot)\n", + "ax.plot(trials_range, best_values, 'b-', linewidth=2, label='Best value found')\n", + "ax.axvline(x=1, color='purple', linestyle='--', alpha=0.5, label='Center')\n", + "ax.axvline(x=NUM_SOBOL_TRIALS, color='r', linestyle='--', alpha=0.7, label='Sobol → BONSAI')\n", + "ax.axhline(y=-3.32237, color='g', linestyle=':', alpha=0.7, label='Global optimum (-3.32)')\n", + "\n", + "# Scatter plot of all trial values\n", + "colors = ['purple'] + ['orange'] * (NUM_SOBOL_TRIALS - 1) + ['blue'] * (TOTAL_TRIALS - NUM_SOBOL_TRIALS)\n", + "ax.scatter(trials_range, all_values, c=colors, alpha=0.6, s=50)\n", + "\n", + "ax.set_xlabel('Trial', fontsize=12)\n", + "ax.set_ylabel('Objective Value', fontsize=12)\n", + "ax.set_title('BONSAI Optimization Progress on Hartmann50', fontsize=14)\n", + "ax.legend(loc='upper right')\n", + "ax.grid(True, alpha=0.3)\n", + "ax.set_xlim([1, TOTAL_TRIALS])\n", + "\n", + "plt.tight_layout()\n", + "plt.show()" + ], + "execution_count": 36, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/png": "" + }, + "metadata": {} + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "metadata": [], + "originalKey": "17cf3aa2-ef85-43b7-95cf-a1d6ece44ec9", + "showInput": false, + "outputsInitialized": false, + "bentoAICellStatus": "none", + "isCommentPanelOpen": false, + "bentoCellName": { + "origin": "initial", + "name": "Cell 17" + }, + "language": "python", + "executionStartTime": 1769815755528, + "executionStopTime": 1769815755942, + "serverExecutionDuration": 14.947107993066, + "collapsed": false, + "requestMsgId": "f5c8bf42-7623-402f-9f0e-543b80719280", + "customInput": null + }, + "source": [ + "## 8. Analyze Objective vs Simplicity trade-offs\n", + "\n", + "One of the key benefits of BONSAI is that it can prune irrelevant parameters (set them to default values). Let's analyze how the number of active parameters relates to the best objective value found." + ], + "attachments": {} + }, + { + "cell_type": "code", + "metadata": { + "metadata": [], + "originalKey": "672468bd-6dea-4d02-8adc-dc299dcf73fd", + "outputsInitialized": true, + "bentoAICellStatus": "none", + "isCommentPanelOpen": false, + "bentoCellName": { + "name": "Extract Trial Metrics", + "origin": "ai" + }, + "language": "python", + "executionStartTime": 1769817165873, + "executionStopTime": 1769817166644, + "serverExecutionDuration": 541.96859383956, + "collapsed": false, + "requestMsgId": "2a076b3b-3746-4f5a-ba28-182f2a9fd45a" + }, + "source": [ + "def count_active_parameters(\n", + " parameters: dict[str, float], default_value: float = 0.5, tol: float = 1e-6\n", + ") -> int:\n", + " \"\"\"Count the number of parameters that differ from the default value.\"\"\"\n", + " return sum(1 for v in parameters.values() if abs(v - default_value) > tol)\n", + "\n", + "\n", + "# Get all trials and their parameters\n", + "experiment = client._experiment\n", + "trials_data = []\n", + "\n", + "for trial_index, trial in experiment.trials.items():\n", + " arm = trial.arm\n", + " if arm is not None:\n", + " params = arm.parameters\n", + " num_active = count_active_parameters(params)\n", + " # Get the objective value for this trial\n", + " trial_data = trial.lookup_data()\n", + " if not trial_data.df.empty:\n", + " obj_value = trial_data.df[trial_data.df[\"metric_name\"] == metric_name][\n", + " \"mean\"\n", + " ].values[0]\n", + " trials_data.append(\n", + " {\n", + " \"trial_index\": trial_index,\n", + " \"num_active_params\": num_active,\n", + " \"objective_value\": obj_value,\n", + " }\n", + " )\n", + "\n", + "# Convert to arrays for plotting\n", + "num_active_params = [d[\"num_active_params\"] for d in trials_data]\n", + "\n", + "print(f\"Collected data for {len(trials_data)} trials\")\n", + "print(\"\\nNumber of active parameters per trial:\")\n", + "print(f\" Min: {min(num_active_params)}\")\n", + "print(f\" Max: {max(num_active_params)}\")\n", + "print(f\" Mean: {np.mean(num_active_params):.1f}\")\n", + "\n", + "# Compute best observed objective value for each number of active parameters\n", + "from collections import defaultdict\n", + "\n", + "# Group trials by number of active parameters\n", + "params_to_best_value = defaultdict(lambda: float('inf'))\n", + "for d in trials_data:\n", + " n_active = d[\"num_active_params\"]\n", + " obj_val = d[\"objective_value\"]\n", + " if obj_val < params_to_best_value[n_active]:\n", + " params_to_best_value[n_active] = obj_val\n", + "\n", + "# Sort by number of active parameters\n", + "sorted_n_active = sorted(params_to_best_value.keys())\n", + "\n", + "fig, ax = plt.subplots(figsize=(8, 5))\n", + "\n", + "# Modify best_values_by_n_active to represent the best objective for any point with <=k parameters active\n", + "cumulative_best_values = []\n", + "current_best = float('inf')\n", + "for n in sorted_n_active:\n", + " current_best = min(current_best, params_to_best_value[n])\n", + " cumulative_best_values.append(current_best)\n", + "\n", + "# Line plot: Best observed value versus number of active parameters based on cumulative best values\n", + "ax.plot(sorted_n_active, cumulative_best_values, color='steelblue', marker='o', linestyle='-', linewidth=2)\n", + "ax.axhline(y=-3.32237, color='g', linestyle=':', linewidth=2, label='Global optimum (-3.32)')\n", + "ax.axvline(x=6, color='r', linestyle='--', alpha=0.7, label='True relevant dims (6)')\n", + "ax.set_xlabel('Number of Active Parameters (<=k)', fontsize=12)\n", + "ax.set_ylabel('Best Objective Value', fontsize=12)\n", + "ax.set_title('Best Observed Value by Number of Active Parameters', fontsize=12)\n", + "ax.legend(loc='upper right')\n", + "ax.grid(True, alpha=0.3)\n", + "\n", + "plt.tight_layout()\n", + "plt.show()" + ], + "execution_count": 41, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collected data for 50 trials\n\nNumber of active parameters per trial:\n Min: 0\n Max: 50\n Mean: 12.8\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/png": "" + }, + "metadata": {} + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "originalKey": "b959fed1-3fc2-41a3-884b-200a1462106e", + "showInput": false, + "outputsInitialized": false, + "bentoAICellStatus": "none", + "isCommentPanelOpen": false, + "language": "markdown" + }, + "source": [ + "## 10. Key Takeaways\n", + "\n", + "### Why BONSAI + MAP-SAAS?\n", + "\n", + "1. **MAP-SAAS**: MAP-SAAS is a variant of the Sparse Axis Aligned Subspace prior (Eriksson & Jankowiak. High-dimensional Bayesian optimization with sparse axis-aligned subspace, UAI, 2021), and places a half-Cauchy prior on the GP lengthscales. As a result, SAAS models encourage model sparsity, where less relevant inputs are driven toward long lengthscales. This improves performance on high-dimensional tasks, and is synergistic with BONSAI. Standard SAAS models use a time consuming, Bayesian (MCMC) inference procedure; MAP-SAAS provides many of the benefits of the fully Bayesian MAP SAAS by ensembling over just a few models estimated via MAP with significantly lower computational costs.\n", + "\n", + "2. **BONSAI**: BONSAI prunes irrelevant dimensions via `simplify_parameter_changes=True` and sets them to the `pruning_target_parameterization` (the status quo/default/production values or a target point of interest). This simplifies the proposals so that they change fewer parameters, making the proposals more interpretable and more likely to avoid regressions in metrics not captured in the optimization objective.\n", + "\n", + "### When to Use This Approach\n", + "- **Real-world optimization** where simple, interpretable changes are desired." + ] + } + ] +}