diff --git a/.gitignore b/.gitignore index f9082380e..5e1dbc6eb 100644 --- a/.gitignore +++ b/.gitignore @@ -162,3 +162,4 @@ pufferlib/ocean/impulse_wars/*-release/ pufferlib/ocean/impulse_wars/debug-*/ pufferlib/ocean/impulse_wars/release-*/ pufferlib/ocean/impulse_wars/benchmark/ +pufferlib/ocean/dogfight/dogfight_test diff --git a/pufferlib/SWEEP_PERSISTENCE.md b/pufferlib/SWEEP_PERSISTENCE.md new file mode 100644 index 000000000..71df01c44 --- /dev/null +++ b/pufferlib/SWEEP_PERSISTENCE.md @@ -0,0 +1,407 @@ +# Sweep Persistence & Override + +This document describes the crash recovery and override injection features for Protein sweeps. + +## Overview + +Protein sweeps can run for days across hundreds of training runs. Two problems arise: + +1. **Crashes lose progress** - A crash at run 50 of 100 means starting over +2. **No way to inject knowledge** - Users can't guide the sweep based on observations + +This implementation solves both: + +- **Persistence**: Sweep state is saved to JSON after each run, enabling crash recovery +- **Override**: Users can inject specific hyperparameters mid-sweep via a JSON file + +## Flowchart + +``` +┌─────────────────────────────────────────────────────────────────────────────┐ +│ PROTEIN SWEEP FLOWCHART │ +│ ★ = PERSISTENCE FEATURES │ +└─────────────────────────────────────────────────────────────────────────────┘ + + ┌─────────────────┐ + │ Protein() │ + │ __init__ │ + └────────┬────────┘ + │ + ★ ┌───────────▼───────────┐ + │ Clean orphaned .tmp │ + │ files from last crash │ + └───────────┬───────────┘ + │ + ★ ┌───────────▼───────────┐ + │ State file exists? │ + └───────────┬───────────┘ + │ + ┌────────────┴────────────┐ + │ YES │ NO + ★ ┌───▼────────────────┐ │ + │ Load state: │ │ + │ • suggestion_idx │ │ + │ • observations │ │ + │ • score bounds │ │ + │ Print "Resumed..." │ │ + └───┬────────────────┘ │ + │ │ + └────────────┬────────────┘ + │ + ┌────────▼────────┐ + │ Ready for │ + │ sweep loop │ + └────────┬────────┘ + │ +═══════════════════════════════════════╪═══════════════════════════════════════ + SWEEP LOOP (per run) +═══════════════════════════════════════╪═══════════════════════════════════════ + │ + ┌─────────────────────────────►│ + │ ┌────────▼────────┐ + │ │ suggest() │ + │ │ suggestion_idx++│ ← in memory only + │ └────────┬────────┘ + │ │ + │ ★ ┌───────────▼───────────┐ + │ │ Override file exists? │ + │ └───────────┬───────────┘ + │ │ + │ ┌──────────────┴──────────────┐ + │ │ YES │ NO + │ ★ ┌───▼────────────────────┐ │ + │ │ Read override.json │ │ + │ │ Pop first suggestion │ │ + │ │ Atomic write remaining │ │ + │ │ Apply params to fill │ │ + │ │ Print "OVERRIDE:..." │ │ + │ └───┬────────────────────┘ │ + │ │ │ + │ │ ┌────────────▼────────────┐ + │ │ │ Normal suggestion: │ + │ │ │ • Sobol (early runs) │ + │ │ │ • GP optimization │ + │ │ └────────────┬────────────┘ + │ │ │ + │ └──────────────┬──────────────┘ + │ │ + │ ┌────────▼────────┐ + │ │ Return hypers │ + │ │ to pufferl.py │ + │ └────────┬────────┘ + │ │ + │ ┌────────▼────────┐ + │ │ TRAINING RUN │ + │ └────────┬────────┘ + │ │ + │ ┌────────────┴────────────┐ + │ │ │ + │ ┌─────▼─────┐ ┌──────▼──────┐ + │ │ CRASH │ │ COMPLETES │ + │ └─────┬─────┘ └──────┬──────┘ + │ │ │ + │ │ ┌────────▼────────┐ + │ │ │ observe() │ + │ │ └────────┬────────┘ + │ │ │ + │ │ ┌─────────────┴─────────────┐ + │ │ │ │ + │ │ ┌─────▼─────┐ ┌──────▼──────┐ + │ │ │ NaN/Fail │ │ Success │ + │ │ └─────┬─────┘ └──────┬──────┘ + │ │ │ │ + │ │ ★ ┌───▼───────────────┐ ★ ┌────▼────────────┐ + │ │ │ Add to │ │ Add to │ + │ │ │ failure_obs │ │ success_obs │ + │ │ │ _save_state() │ │ _save_state() │ + │ │ └─────────┬─────────┘ └────────┬────────┘ + │ │ │ │ + │ │ └────────────┬────────────┘ + │ │ │ + │ │ ▼ + │ │ ★ ┌─────────────────┐ + │ │ │ {project}_sweep │ + │ │ │ .json updated │ + │ │ └────────┬────────┘ + │ │ │ + │ ▼ │ + │ ┌─────────────┐ │ + │ │ Wait for │ │ + │ │ restart │ │ + │ └─────────────┘ │ + │ │ + └────────────────────────────────────────────┘ +``` + +## File Formats + +### State File: `{project}_sweep.json` + +Created automatically. Named after your wandb/neptune project. + +```json +{ + "suggestion_idx": 42, + "success_observations": [ + { + "input": [0.123, -0.456, ...], + "output": 0.85, + "cost": 100000000 + } + ], + "failure_observations": [ + { + "input": [0.789, -0.012, ...], + "output": NaN, + "cost": 50000000, + "is_failure": true + } + ], + "min_score": 0.12, + "max_score": 0.85, + "log_c_min": 17.42, + "log_c_max": 18.42 +} +``` + +### Override File: `{project}_override.json` + +Create this file manually to inject hyperparameters into the next run(s). + +```json +{ + "suggestions": [ + { + "params": { + "train/learning_rate": 0.0005, + "train/ent_coef": 0.01, + "env/reward_scale": 0.5 + }, + "reason": "Testing higher entropy for exploration" + }, + { + "params": { + "train/learning_rate": 0.0001 + }, + "reason": "Testing lower LR after seeing instability" + } + ] +} +``` + +**Behavior:** +- Each `suggest()` call pops the first entry from the list +- Partial params are merged with defaults (only specify what you want to override) +- File is deleted when all suggestions are consumed +- Invalid parameter paths are skipped with a warning + +## Usage + +### Basic Sweep (persistence automatic) + +```bash +python -m pufferlib.pufferl sweep puffer_dogfight --wandb --wandb-project df9 +``` + +State saves to `df9_sweep.json` after each completed run. + +### Crash Recovery + +Just restart the same command: + +```bash +# Crashed at run 47... +# Just run again: +python -m pufferlib.pufferl sweep puffer_dogfight --wandb --wandb-project df9 +# Output: [Protein] Resumed from df9_sweep.json: 46 obs, idx=47 +``` + +### Injecting Overrides + +While a sweep is running (or before starting): + +```bash +# Create override file +cat > df9_override.json << 'EOF' +{ + "suggestions": [ + { + "params": {"train/learning_rate": 0.0003, "train/ent_coef": 0.015}, + "reason": "Promising region from wandb analysis" + } + ] +} +EOF +``` + +Next `suggest()` call will use these params instead of GP suggestion: +``` +[Protein] OVERRIDE: Promising region from wandb analysis +``` + +## Crash Recovery Behavior + +| Scenario | What's Preserved | What's Lost | +|----------|------------------|-------------| +| Crash during training run | All completed runs | Current run only | +| Crash during `_save_state()` | All previous state | Nothing (atomic write) | +| Corrupted state file | Nothing | Starts fresh with warning | +| Corrupted override file | All state | Override deleted with warning | + +### Atomic Write Pattern + +All file writes use atomic replacement to prevent corruption: + +``` +1. Write to {file}.tmp +2. os.replace(tmp, file) ← atomic on POSIX +3. On failure: delete .tmp, original intact +``` + +## Error Handling + +| Error | Behavior | +|-------|----------| +| State file corrupted/truncated | Warning printed, starts fresh | +| State file deleted mid-load | Warning printed, starts fresh | +| Override file corrupted | Warning printed, file deleted, normal suggestion | +| Override path doesn't exist | Warning printed, path skipped, other params applied | +| Disk full during save | Warning printed, .tmp cleaned up, training continues | + +## API Reference + +### New Protein Methods + +```python +@staticmethod +def _json_default(obj): + """JSON serializer for numpy types.""" + +def _save_state(self): + """Save sweep state to JSON. Called after each observe().""" + +def _load_state_if_exists(self): + """Load state on init. Cleans orphaned .tmp files.""" + +def _check_override(self): + """Check for and consume override file. Returns params dict or None.""" +``` + +### Config Keys + +Added to sweep config (injected by `pufferl.py`): + +```python +sweep_config['state_file'] = f'{project}_sweep.json' +sweep_config['override_file'] = f'{project}_override.json' +``` + +## Testing + +```bash +python tests/test_sweep_persistence_and_override.py +``` + +Tests cover: +- Save/load round-trip +- Crash recovery with state preservation +- Override consumption (single and multiple) +- Partial override (merge with defaults) +- Corrupted/empty/missing file handling +- Race conditions (file deleted between check and open) +- Atomic write failure recovery +- Orphaned .tmp cleanup +- Analysis helper functions (read_sweep_results, create_override) + +**Note:** `tests/test_sweep_hyper.py` is a pre-existing file (not part of this PR) that seems to be a research script for tuning GP hyperparameters. It requires a manually-generated `sweep_observations.pkl` file and will fail if run standalone. + +## Implementation Notes + +1. **State saved in `observe()`, not `suggest()`**: If training crashes, `suggestion_idx` isn't persisted. On restart, may re-suggest similar params. This is intentional - we don't save until we have results. + +2. **Override params are not validated**: Values outside sweep ranges are allowed. This lets users intentionally explore outside the configured space. + +3. **Failure observations are tracked**: NaN scores go to `failure_observations`. The GP doesn't train on these, but they're preserved for debugging. + +4. **Near-duplicate filtering**: Success observations within `EPSILON` distance are deduplicated, keeping the most recent. + +## Analyzing Sweep Results + +The state file stores **normalized** hyperparameter values in [-1, 1] range, which is unreadable without the sweep config. Two helper functions make analysis easy: + +### read_sweep_results() + +Denormalizes state file to human-readable dicts: + +```python +from pufferlib.sweep import read_sweep_results +from pufferlib.pufferl import load_config_file + +# Load config +config = load_config_file('pufferlib/config/ocean/dogfight.ini') + +# Read and denormalize sweep state +results = read_sweep_results('df9_sweep.json', config['sweep']) + +# Results are sorted by score (best first) +best = results[0] +print(f"Best score: {best['score']:.3f}") +print(f"Learning rate: {best['params']['train/learning_rate']}") +``` + +### create_override() + +Programmatically inject hyperparameters into the next sweep run(s): + +```python +from pufferlib.sweep import create_override + +# Single override (note: params and reason must be lists) +create_override('df9_override.json', [{ + 'train/learning_rate': 0.00069, + 'train/ent_coef': 0.0069, +}], reason=['Testing hypothesis from correlation analysis']) + +# Multiple overrides +create_override('df9_override.json', [ + {'train/learning_rate': 0.001}, + {'train/learning_rate': 0.002}, + {'train/learning_rate': 0.003}, +], reason=['low LR', 'medium LR', 'high LR']) +``` + +### Jupyter Notebook Workflow + +```python +from pufferlib.sweep import read_sweep_results, create_override +from pufferlib.pufferl import load_config_file +import pandas as pd + +# Load config and read sweep state +config = load_config_file('pufferlib/config/ocean/dogfight.ini') +results = read_sweep_results('df9_sweep.json', config['sweep']) + +# Convert to DataFrame for analysis +df = pd.DataFrame([ + {**r['params'], 'score': r['score'], 'cost': r['cost']} + for r in results +]) + +# Find best runs +df.sort_values('score', ascending=False).head(10) + +# Correlation analysis - which params matter most? +df.corr()['score'].sort_values() + +# Based on analysis, inject promising params +create_override('df9_override.json', [{ + 'train/learning_rate': 0.00069, + 'train/ent_coef': 0.0069, +}], reason=['Testing correlation hypothesis']) +``` + +### Testing Analysis Functions + +```bash +python tests/test_sweep_persistence_and_override.py +``` diff --git a/pufferlib/config/default.ini b/pufferlib/config/default.ini index 6073c651e..025f1bf85 100644 --- a/pufferlib/config/default.ini +++ b/pufferlib/config/default.ini @@ -28,16 +28,16 @@ device = cuda optimizer = muon anneal_lr = True precision = float32 -total_timesteps = 10_000_000 +total_timesteps = 100_000_000 learning_rate = 0.015 gamma = 0.995 -gae_lambda = 0.90 +gae_lambda = 0.95 update_epochs = 1 clip_coef = 0.2 vf_coef = 2.0 vf_clip_coef = 0.2 max_grad_norm = 1.5 -ent_coef = 0.001 +ent_coef = 0.01 adam_beta1 = 0.95 adam_beta2 = 0.999 adam_eps = 1e-12 @@ -45,7 +45,7 @@ adam_eps = 1e-12 data_dir = experiments checkpoint_interval = 200 batch_size = auto -minibatch_size = 8192 +minibatch_size = 16384 # Accumulate gradients above this size max_minibatch_size = 32768 @@ -58,7 +58,7 @@ vtrace_rho_clip = 1.0 vtrace_c_clip = 1.0 prio_alpha = 0.8 -prio_beta0 = 0.2 +prio_beta0 = 0.5 [sweep] method = Protein @@ -75,24 +75,9 @@ prune_pareto = True #mean = 8 #scale = auto -# TODO: Elim from base -[sweep.train.total_timesteps] -distribution = log_normal -min = 3e7 -max = 1e10 -mean = 2e8 -scale = time - -[sweep.train.bptt_horizon] -distribution = uniform_pow2 -min = 16 -max = 64 -mean = 64 -scale = auto - [sweep.train.minibatch_size] distribution = uniform_pow2 -min = 8192 +min = 16384 max = 65536 mean = 32768 scale = auto diff --git a/pufferlib/config/ocean/dogfight.ini b/pufferlib/config/ocean/dogfight.ini new file mode 100644 index 000000000..cd6514b0a --- /dev/null +++ b/pufferlib/config/ocean/dogfight.ini @@ -0,0 +1,146 @@ +[base] +env_name = puffer_dogfight +package = ocean +policy_name = Policy +rnn_name = Recurrent + +[vec] +num_envs = 8 + +[env] +reward_aim_scale = 0.05 +reward_closing_scale = 0.003 +penalty_neg_g = 0.02 +speed_min = 50.0 + +max_steps = 3000 +num_envs = 1024 +obs_scheme = 1 + +curriculum_enabled = 1 +curriculum_randomize = 0 +advance_threshold = 0.7 + +[train] +adam_beta1 = 0.9768629406862324 +adam_beta2 = 0.999302214750495 +adam_eps = 6.906760212075045e-12 +batch_size = auto +bptt_horizon = 64 +checkpoint_interval = 200 +clip_coef = 0.4928184678032994 +ent_coef = 0.008 +gae_lambda = 0.8325103714810463 +gamma = 0.8767105842751813 +learning_rate = 0.00024 +max_grad_norm = 0.831714766100049 +max_minibatch_size = 65536 +minibatch_size = 65536 +prio_alpha = 0.8195880336315146 +prio_beta0 = 0.9429570720846501 +seed = 42 +total_timesteps = 400_000_000 +update_epochs = 4 +vf_clip_coef = 3.2638480501249436 +vf_coef = 4.293249868787825 +vtrace_c_clip = 1.911078435368836 +vtrace_rho_clip = 3.797866655513644 + +[sweep] +downsample = 1 +goal = maximize +method = Protein +metric = ultimate +prune_pareto = True +use_gpu = True + +[sweep.env.reward_aim_scale] +distribution = uniform +min = 0.02 +max = 0.1 +mean = 0.05 +scale = auto + +[sweep.env.reward_closing_scale] +distribution = uniform +min = 0.001 +max = 0.01 +mean = 0.003 +scale = auto + +[sweep.env.penalty_neg_g] +distribution = uniform +min = 0.01 +max = 0.05 +mean = 0.02 +scale = auto + +[sweep.env.obs_scheme] +distribution = int_uniform +max = 5 +mean = 0 +min = 0 +scale = 1.0 + +[sweep.env.advance_threshold] +distribution = uniform +min = 0.5 +max = 0.85 +mean = 0.7 +scale = auto + +[sweep.env.max_steps] +distribution = int_uniform +min = 300 +max = 1500 +mean = 900 +scale = 1.0 + +[sweep.train.learning_rate] +distribution = log_normal +max = 0.0005 +mean = 0.00025 +min = 0.0001 +scale = 0.5 + +[sweep.train.vf_coef] +distribution = uniform +min = 1.0 +max = 5.0 +mean = 3.0 +scale = auto + +[sweep.train.clip_coef] +distribution = uniform +min = 0.3 +max = 1.0 +mean = 0.5 +scale = auto + +[sweep.train.ent_coef] +distribution = log_normal +min = 0.002 +max = 0.02 +mean = 0.008 +scale = 0.5 + +[sweep.train.max_grad_norm] +distribution = uniform +min = 0.5 +max = 2.0 +mean = 1.0 +scale = auto + +[sweep.train.gae_lambda] +distribution = logit_normal +min = 0.9 +max = 0.999 +mean = 0.95 +scale = auto + +[sweep.train.gamma] +distribution = logit_normal +min = 0.95 +max = 0.9999 +mean = 0.99 +scale = auto diff --git a/pufferlib/environments/mani_skill/torch.py b/pufferlib/environments/mani_skill/torch.py index abb8eaa18..c2e5a795d 100644 --- a/pufferlib/environments/mani_skill/torch.py +++ b/pufferlib/environments/mani_skill/torch.py @@ -64,7 +64,7 @@ def decode_actions(self, hidden): '''Decodes a batch of hidden states into (multi)discrete actions. Assumes no time dimension (handled by LSTM wrappers).''' mean = self.decoder_mean(hidden) - logstd = self.decoder_logstd.expand_as(mean) + logstd = self.decoder_logstd.expand_as(mean).clamp(min=-20, max=2) std = torch.exp(logstd) logits = torch.distributions.Normal(mean, std) values = self.value(hidden) diff --git a/pufferlib/models.py b/pufferlib/models.py index fa43d7071..d81198343 100644 --- a/pufferlib/models.py +++ b/pufferlib/models.py @@ -88,7 +88,7 @@ def decode_actions(self, hidden): logits = self.decoder(hidden).split(self.action_nvec, dim=1) elif self.is_continuous: mean = self.decoder_mean(hidden) - logstd = self.decoder_logstd.expand_as(mean) + logstd = self.decoder_logstd.expand_as(mean).clamp(min=-20, max=2) std = torch.exp(logstd) logits = torch.distributions.Normal(mean, std) else: diff --git a/pufferlib/ocean/ENV_GUIDE.md b/pufferlib/ocean/ENV_GUIDE.md new file mode 100644 index 000000000..8e7fb1bd8 --- /dev/null +++ b/pufferlib/ocean/ENV_GUIDE.md @@ -0,0 +1,284 @@ +# PufferLib Ocean Environment Guide + +Quick reference for implementing C-based RL environments in PufferLib. + +## File Structure + +``` +pufferlib/ocean/{env_name}/ +├── {env_name}.h # C implementation (header-only) +├── binding.c # Python-C glue (~20 lines) +└── {env_name}.py # Python wrapper + +pufferlib/config/ocean/{env_name}.ini # Training config +``` + +Build: `python setup.py build_ext --inplace --force` + +## 1. C Header (`{env_name}.h`) + +### Required Structs + +```c +// Log: ONLY floats, last field must be `n` +typedef struct Log { + float episode_return; + float episode_length; + float score; + float perf; // 0-1 normalized metric + // ... custom metrics ... + float n; // REQUIRED last: episode count +} Log; + +// Main env struct +typedef struct EnvName { + float* observations; // or char* for discrete obs + float* actions; // ALWAYS float* (even discrete) + float* rewards; + unsigned char* terminals; + Log log; + Client* client; // raylib, NULL until render + // ... env state ... +} EnvName; +``` + +### Required Functions + +| Function | Purpose | +|----------|---------| +| `init(Env*)` | Allocate internal buffers | +| `c_reset(Env*)` | Reset episode state | +| `c_step(Env*)` | Advance simulation | +| `c_render(Env*)` | Raylib rendering | +| `c_close(Env*)` | Free memory | +| `compute_observations(Env*)` | Fill obs buffer | +| `add_log(Env*, ...)` | Accumulate stats | + +### Step Pattern + +```c +void c_step(Env* env) { + env->tick++; + env->rewards[0] = 0; + env->terminals[0] = 0; + + // ... physics/game logic ... + + if (terminal_condition) { + env->terminals[0] = 1; + add_log(env, ...); + c_reset(env); + return; + } + compute_observations(env); +} +``` + +### Logging Pattern + +```c +void add_log(Env* env) { + env->log.episode_return += env->episodic_return; + env->log.episode_length += env->tick; + env->log.score += env->score; + env->log.n += 1.0f; // increment episode count +} +``` + +## 2. Binding (`binding.c`) + +```c +#include "{env_name}.h" + +#define Env EnvName +#include "../env_binding.h" + +static int my_init(Env* env, PyObject* args, PyObject* kwargs) { + env->param1 = unpack(kwargs, "param1"); + env->param2 = unpack(kwargs, "param2"); + init(env); + return 0; +} + +static int my_log(PyObject* dict, Log* log) { + assign_to_dict(dict, "episode_return", log->episode_return); + assign_to_dict(dict, "episode_length", log->episode_length); + assign_to_dict(dict, "score", log->score); + assign_to_dict(dict, "perf", log->perf); + return 0; +} +``` + +## 3. Python Wrapper (`{env_name}.py`) + +```python +import numpy as np +import gymnasium +import pufferlib +from pufferlib.ocean.{env_name} import binding + +class EnvName(pufferlib.PufferEnv): + def __init__(self, num_envs=16, render_mode=None, buf=None, + param1=100, param2=0.5, **kwargs): + + self.single_observation_space = gymnasium.spaces.Box( + low=-1, high=1, shape=(OBS_SIZE,), dtype=np.float32 + ) + # Continuous: Box Discrete: Discrete(n) + self.single_action_space = gymnasium.spaces.Box( + low=-1, high=1, shape=(ACT_SIZE,), dtype=np.float32 + ) + + self.num_agents = num_envs + self.render_mode = render_mode + super().__init__(buf) + + # CRITICAL for continuous actions: + self.actions = self.actions.astype(np.float32) + + c_envs = [] + for i in range(num_envs): + c_envs.append(binding.env_init( + self.observations[i:i+1], + self.actions[i:i+1], + self.rewards[i:i+1], + self.terminals[i:i+1], + self.truncations[i:i+1], + i, # seed + param1=param1, + param2=param2, + )) + self.c_envs = binding.vectorize(*c_envs) + + def reset(self, seed=None): + self.tick = 0 + binding.vec_reset(self.c_envs, seed or 0) + return self.observations, [] + + def step(self, actions): + self.actions[:] = actions + self.tick += 1 + binding.vec_step(self.c_envs) + + info = [] + log = binding.vec_log(self.c_envs) + if log: + info.append(log) + return (self.observations, self.rewards, + self.terminals, self.truncations, info) + + def render(self): + binding.vec_render(self.c_envs, 0) + + def close(self): + binding.vec_close(self.c_envs) +``` + +## 4. Config (`pufferlib/config/ocean/{env_name}.ini`) + +```ini +[base] +package = ocean +env_name = puffer_{env_name} + +[vec] +num_envs = 8 + +[env] +num_envs = 1024 +param1 = 100 +param2 = 0.5 + +[train] +total_timesteps = 100_000_000 +learning_rate = 0.0003 +gamma = 0.99 +# ... PPO hyperparams ... +``` + +## Reference Environments + +| Env | Use For | +|-----|---------| +| `drone_race/` | Continuous actions, quaternions, RK4 physics | +| `drone_swarm/` | Multi-agent continuous | +| `snake/` | Multi-agent discrete, grid world | +| `target/` | Simple tutorial, well-commented | +| `impulse_wars/` | Box2D physics integration | + +## Common Patterns + +### Vector/Quaternion Math (from dronelib.h) + +```c +typedef struct { float x, y, z; } Vec3; +typedef struct { float w, x, y, z; } Quat; + +Vec3 add3(Vec3 a, Vec3 b); +Vec3 sub3(Vec3 a, Vec3 b); +Vec3 scalmul3(Vec3 a, float s); +float dot3(Vec3 a, Vec3 b); +float norm3(Vec3 a); + +Quat quat_mul(Quat a, Quat b); +void quat_normalize(Quat* q); +Vec3 quat_rotate(Quat q, Vec3 v); +Quat quat_inverse(Quat q); +``` + +### Observation Normalization + +```c +// Normalize to roughly [-1, 1] +env->observations[0] = position.x / MAX_X; +env->observations[1] = velocity.x / MAX_VEL; +env->observations[2] = quat.w; // already [-1, 1] +``` + +### Action Handling + +```c +// Continuous: actions already in [-1, 1] +float throttle = (env->actions[0] + 1.0f) * 0.5f; // remap to [0, 1] +float elevator = env->actions[1]; // keep [-1, 1] + +// Discrete trigger +bool fire = env->actions[4] > 0.5f; +``` + +### Raylib Rendering + +```c +void c_render(Env* env) { + if (env->client == NULL) { + InitWindow(WIDTH, HEIGHT, "Env Name"); + SetTargetFPS(60); + env->client = calloc(1, sizeof(Client)); + } + + if (IsKeyDown(KEY_ESCAPE)) exit(0); + + BeginDrawing(); + ClearBackground((Color){6, 24, 24, 255}); + // ... draw stuff ... + EndDrawing(); +} +``` + +## Performance Tips + +1. **No allocations after init** - malloc only in `init()` +2. **Pass structs by pointer** - avoid copies +3. **Inline small functions** - `static inline` +4. **Batch operations** - process all agents in tight loops +5. **Avoid divisions** - precompute `1/x` where possible + +## Checklist for New Env + +- [ ] Create folder `pufferlib/ocean/{name}/` +- [ ] Implement `{name}.h` with all required functions +- [ ] Create `binding.c` with `my_init()` and `my_log()` +- [ ] Create `{name}.py` Python wrapper +- [ ] Create `pufferlib/config/ocean/{name}.ini` +- [ ] Build: `python setup.py build_ext --inplace --force` +- [ ] Test: `from pufferlib.ocean.{name} import EnvName` diff --git a/pufferlib/ocean/dogfight/AIRCRAFT_PERFORMANCE_RL_GUIDE.md b/pufferlib/ocean/dogfight/AIRCRAFT_PERFORMANCE_RL_GUIDE.md new file mode 100644 index 000000000..588e337ab --- /dev/null +++ b/pufferlib/ocean/dogfight/AIRCRAFT_PERFORMANCE_RL_GUIDE.md @@ -0,0 +1,1073 @@ +# Aircraft Performance Approximation for High-Performance RL Environments + +## A Comprehensive Guide for WW2 Dogfighting Simulation + +**Purpose**: This document provides the mathematical foundations, equations, and implementation strategies for approximating aircraft performance in a headless reinforcement learning training environment. The goal is to achieve very high steps-per-second (SPS) while maintaining physically plausible flight dynamics suitable for WW2 dogfighting scenarios. + +**Target Audience**: Claude agents developing a WW2 dogfighting RL environment using PufferLib or similar frameworks. + +--- + +## Table of Contents + +1. [Philosophy: Fidelity vs. Performance Trade-offs](#1-philosophy-fidelity-vs-performance-trade-offs) +2. [Coordinate Systems and Reference Frames](#2-coordinate-systems-and-reference-frames) +3. [Equations of Motion](#3-equations-of-motion) +4. [Aerodynamic Force Models](#4-aerodynamic-force-models) +5. [The Drag Polar](#5-the-drag-polar) +6. [Lift Coefficient Modeling](#6-lift-coefficient-modeling) +7. [Propulsion: Piston Engine and Propeller Models](#7-propulsion-piston-engine-and-propeller-models) +8. [Atmospheric Model](#8-atmospheric-model) +9. [Performance Calculations](#9-performance-calculations) +10. [Implementation Strategies for High SPS](#10-implementation-strategies-for-high-sps) +11. [WW2 Aircraft Reference Data](#11-ww2-aircraft-reference-data) +12. [Validation and Sanity Checks](#12-validation-and-sanity-checks) +13. [Sources and References](#13-sources-and-references) + +--- + +## 1. Philosophy: Fidelity vs. Performance Trade-offs + +### The Core Challenge + +Full 6-DOF flight dynamics models (like JSBSim or Stevens & Lewis F-16 models) are computationally expensive. For RL training requiring millions of environment steps, we need simplifications that: + +1. **Preserve emergent behavior**: Aircraft should fly like aircraft—stall at high AoA, turn radius should increase with speed, climb rate should decrease with altitude +2. **Enable fast vectorized computation**: All operations should be expressible as numpy/JAX/PyTorch tensor operations +3. **Avoid lookup tables where possible**: Analytical approximations are faster than interpolation +4. **Capture the essence of dogfighting**: Energy management, turn performance, climb/dive dynamics + +### Recommended Model Hierarchy + +| Model Type | DOF | Use Case | Typical SPS (single env) | +|------------|-----|----------|--------------------------| +| Full 6-DOF with stability derivatives | 12+ states | Flight sim, detailed control | 1,000-10,000 | +| Point-mass 3-DOF (vertical plane) | 6 states | Trajectory optimization | 50,000-200,000 | +| **Point-mass 3-DOF (3D)** | **6-9 states** | **RL dogfighting (recommended)** | **100,000-500,000** | +| Energy-state approximation | 3-4 states | Strategic AI | 1,000,000+ | + +**Recommendation**: Use a 3-DOF point-mass model with instantaneous bank angle changes for maximum performance while retaining meaningful dogfight dynamics. + +--- + +## 2. Coordinate Systems and Reference Frames + +### Earth-Fixed Frame (Inertial, Flat Earth Approximation) +- Origin at some reference point on ground +- **x**: North (or arbitrary horizontal) +- **y**: East (or perpendicular horizontal) +- **z**: Down (positive toward Earth center) + +For WW2 dogfighting in a local area, flat Earth is entirely acceptable. + +### Body-Fixed Frame +- Origin at aircraft CG +- **x_b**: Forward along fuselage +- **y_b**: Right wing +- **z_b**: Down through belly + +### Wind/Velocity Frame +- **x_w**: Along velocity vector +- **z_w**: Perpendicular, in vertical plane containing velocity + +### Key Angles +``` +α (alpha) = Angle of Attack = angle between x_b and velocity vector (in vertical plane) +β (beta) = Sideslip angle = angle between velocity and x_b-z_b plane +γ (gamma) = Flight path angle = angle between velocity and horizontal +ψ (psi) = Heading angle = horizontal direction of velocity +φ (phi) = Bank/roll angle +θ (theta) = Pitch angle +``` + +For 3-DOF point-mass: we typically track (V, γ, ψ, x, y, h) where bank angle φ is a control input that changes instantaneously. + +--- + +## 3. Equations of Motion + +### 3-DOF Point-Mass Model (Recommended for RL) + +This model treats the aircraft as a point mass with forces applied. It captures the essential performance characteristics without modeling rotational dynamics. + +**State Vector**: `[V, γ, ψ, x, y, h]` or `[V, γ, ψ, x, y, h, m]` if tracking fuel + +**Control Inputs**: `[T (thrust/throttle), n (load factor) or φ (bank), α (angle of attack)]` + +#### Kinematic Equations +```python +dx/dt = V * cos(γ) * cos(ψ) +dy/dt = V * cos(γ) * sin(ψ) +dh/dt = V * sin(γ) # Note: h positive up, so dh/dt = -dz/dt +``` + +#### Dynamic Equations (Forces) + +In the wind-axes frame, summing forces parallel and perpendicular to velocity: + +```python +# Along velocity (tangent to flight path) +m * dV/dt = T * cos(α) - D - W * sin(γ) + +# Perpendicular to velocity, in vertical plane +m * V * dγ/dt = L * cos(φ) + T * sin(α) - W * cos(γ) + +# Perpendicular to velocity, horizontal (turning) +m * V * cos(γ) * dψ/dt = L * sin(φ) +``` + +Where: +- `T` = Thrust +- `D` = Drag +- `L` = Lift +- `W = m * g` = Weight +- `φ` = Bank angle +- `α` = Angle of attack (typically small, so cos(α) ≈ 1, sin(α) ≈ α) + +#### Simplified Form (Small α Approximation) + +For most flight conditions where α < 15°: + +```python +dV/dt = (T - D) / m - g * sin(γ) +dγ/dt = (L * cos(φ) - W * cos(γ)) / (m * V) +dψ/dt = (L * sin(φ)) / (m * V * cos(γ)) +``` + +#### Load Factor Formulation (Often More Convenient) + +Define load factor `n = L / W`: + +```python +dV/dt = (T - D) / m - g * sin(γ) +dγ/dt = (g / V) * (n * cos(φ) - cos(γ)) +dψ/dt = (g * n * sin(φ)) / (V * cos(γ)) +``` + +For a **coordinated turn** (no sideslip), the relationship is: +```python +n = 1 / cos(φ) # for level turn +``` + +So for a 60° bank, n = 2 ("2g turn"). + +--- + +## 4. Aerodynamic Force Models + +### Dynamic Pressure + +The fundamental scaling quantity: +```python +q = 0.5 * ρ * V² +``` +Where `ρ` is air density (kg/m³) and `V` is true airspeed (m/s). + +### Lift Force +```python +L = q * S * C_L +``` +Where: +- `S` = Wing reference area (m²) +- `C_L` = Lift coefficient (dimensionless) + +### Drag Force +```python +D = q * S * C_D +``` +Where `C_D` = Drag coefficient (dimensionless) + +### Converting to Accelerations +```python +# Specific forces (acceleration per unit mass) +L/m = q * S * C_L / m = (q/W) * g * S * C_L = (ρ * V² * S * C_L) / (2 * m) + +# Wing loading W/S is a key aircraft parameter +# Let W/S = wing loading in N/m² or lb/ft² +L/W = (q * C_L) / (W/S) +``` + +--- + +## 5. The Drag Polar + +### The Parabolic Drag Polar (Primary Model) + +The most important equation for aircraft performance: + +```python +C_D = C_D0 + K * C_L² +``` + +Where: +- `C_D0` = Zero-lift drag coefficient (parasite drag) +- `K` = Induced drag factor = 1 / (π * AR * e) +- `AR` = Aspect Ratio = b² / S (wingspan squared over wing area) +- `e` = Oswald efficiency factor (typically 0.7-0.85 for WW2 fighters) + +### Computing K from Aircraft Geometry + +```python +AR = b² / S # Aspect ratio +e = 0.78 # Typical for straight-wing WW2 fighter (Oswald efficiency) +K = 1 / (π * AR * e) +``` + +**Empirical Formulas for Oswald Efficiency**: + +For straight wings (Raymer approximation): +```python +e ≈ 1.78 * (1 - 0.045 * AR^0.68) - 0.64 # Raymer +e ≈ 0.7 + 0.1 * (AR - 6) / 4 # Linear approximation for AR 4-10 +``` + +For WW2 aircraft, using `e = 0.75-0.85` is reasonable. + +### Typical WW2 Fighter Values + +| Parameter | P-51D Mustang | Spitfire Mk IX | Bf 109G | Fw 190A | +|-----------|---------------|----------------|---------|---------| +| C_D0 | 0.0163-0.020 | 0.020-0.021 | 0.024-0.028 | 0.021-0.024 | +| AR | 5.86 | 6.48 | 6.07 | 5.74 | +| e (estimated) | 0.80 | 0.85 | 0.78 | 0.78 | +| K | 0.0686 | 0.058 | 0.069 | 0.071 | + +### Generalized Drag Polar (More Accurate) + +```python +C_D = C_D_min + K * (C_L - C_L_min_drag)² +``` + +Where `C_L_min_drag` is typically 0.1-0.2 for cambered airfoils. For simplicity in RL, the standard parabolic form is usually sufficient. + +### Mach Number Effects (Compressibility) + +For transonic flight (M > 0.6), drag rises significantly. Simple approximation: + +```python +if M < M_crit: + C_D0_M = C_D0 +elif M < 1.0: + # Transonic drag rise + C_D0_M = C_D0 * (1 + 10 * (M - M_crit)²) +else: + # Supersonic (unlikely for WW2) + C_D0_M = C_D0 * (1 + 0.5) + +# M_crit typically 0.7-0.75 for WW2 aircraft +``` + +For WW2 fighters operating below M=0.6 in normal combat, Mach effects can often be ignored. + +--- + +## 6. Lift Coefficient Modeling + +### Linear Region (Pre-Stall) + +In the linear region of the lift curve: + +```python +C_L = C_Lα * (α - α_0) +``` + +Where: +- `C_Lα` = Lift curve slope (per radian) +- `α` = Angle of attack +- `α_0` = Zero-lift angle of attack (negative for cambered airfoils) + +### Lift Curve Slope + +**2D Airfoil (Thin Airfoil Theory)**: +```python +c_lα = 2 * π # per radian ≈ 0.11 per degree +``` + +**3D Finite Wing (Lifting Line Theory)**: +```python +C_Lα = c_lα / (1 + c_lα / (π * AR)) # Approximation for elliptic wing +C_Lα = c_lα * AR / (AR + 2) # Alternative approximation +``` + +For typical AR=6 WW2 fighter: +```python +C_Lα ≈ 2π * 6 / (6 + 2) ≈ 4.71 per radian ≈ 0.082 per degree +``` + +### Stall Modeling + +**Critical**: For dogfighting, stall behavior matters! + +Simple piecewise model: +```python +def C_L(α, C_Lα, α_0, α_stall, C_L_max): + α_effective = α - α_0 + if α < α_stall: + return C_Lα * α_effective + else: + # Post-stall: lift drops off + # Simple linear dropoff + return C_L_max - 0.5 * (α - α_stall) * C_Lα +``` + +**Smooth stall model** (better for gradient-based methods): +```python +def C_L_smooth(α, C_Lα, α_0, α_stall, C_L_max): + """Sigmoid-smoothed stall transition""" + α_eff = α - α_0 + C_L_linear = C_Lα * α_eff + + # Smooth saturation using tanh + k = 10 # Sharpness of stall transition + C_L = C_L_max * np.tanh(C_L_linear / C_L_max) + + return C_L +``` + +**Typical values for WW2 fighters**: +- `α_stall` ≈ 14-18° (clean configuration) +- `C_L_max` ≈ 1.3-1.6 (clean), up to 2.0+ with flaps + +### Relating Lift Coefficient to Load Factor + +In a maneuver: +```python +C_L = (n * W) / (q * S) = (2 * n * W) / (ρ * V² * S) +``` + +So for a given load factor and speed, you can find the required C_L, and from that, the required α. + +--- + +## 7. Propulsion: Piston Engine and Propeller Models + +### Engine Power vs. Altitude + +Piston engines lose power with altitude due to decreasing air density. For naturally aspirated engines: + +```python +P(h) = P_SL * σ # Where σ = ρ/ρ_SL (density ratio) +``` + +For supercharged engines (most WW2 fighters): +```python +if h < h_critical: + P(h) = P_rated # Full power up to critical altitude +else: + P(h) = P_rated * (ρ(h) / ρ(h_critical)) +``` + +**Critical altitude** is where the supercharger can no longer maintain sea-level manifold pressure. Typical values: +- Single-stage supercharger: 15,000-20,000 ft +- Two-stage supercharger: 25,000-30,000 ft (stepped) + +### Propeller Model + +For prop aircraft, thrust depends on power and propeller efficiency: + +```python +T = η_p * P / V +``` + +Where `η_p` = propeller efficiency (typically 0.7-0.85 in cruise). + +**Problem**: At V=0, this gives infinite thrust! + +### Propeller Efficiency Model + +Simple model for variable-pitch propeller: +```python +def prop_efficiency(V, P, D_prop, rho): + """ + V: airspeed (m/s) + P: shaft power (W) + D_prop: propeller diameter (m) + rho: air density (kg/m³) + """ + # Advance ratio proxy + if V < 1: + V = 1 # Avoid division by zero + + # Maximum theoretical efficiency from momentum theory + T_ideal = P / V + disk_area = π * (D_prop/2)² + v_induced = np.sqrt(T_ideal / (2 * rho * disk_area)) + η_ideal = 1 / (1 + v_induced / V) + + # Practical efficiency (80-90% of ideal) + η_p = 0.85 * η_ideal + + # Clamp to reasonable range + η_p = np.clip(η_p, 0, 0.88) + + return η_p +``` + +### Simplified Thrust Model (Recommended for RL) + +Rather than modeling η_p complexly, use an empirical fit: + +```python +def thrust_model(V, P_max, V_max, rho, rho_SL): + """ + Simple thrust model for WW2 prop aircraft + + At low speed: T approaches static thrust + At high speed: T = η * P / V with η ≈ 0.8 + """ + # Power available (with altitude correction) + P_avail = P_max * (rho / rho_SL) # Simplified; use supercharger model for better accuracy + + # Static thrust approximation (from momentum theory) + # T_static ≈ (P² * rho * disk_area * 2)^(1/3) + D_prop = 3.0 # meters, typical WW2 fighter + disk_area = π * (D_prop/2)**2 + T_static = (P_avail**2 * 2 * rho * disk_area)**(1/3) + + # High-speed thrust + η_cruise = 0.80 + T_cruise = η_cruise * P_avail / max(V, 1) + + # Blend between static and cruise + # Smooth transition around V_transition + V_transition = 50 # m/s + blend = np.tanh(V / V_transition) + + T = T_static * (1 - blend) + T_cruise * blend + + return T +``` + +### Even Simpler: Polynomial Thrust Model + +For maximum speed, fit thrust vs velocity from known aircraft data: + +```python +def thrust_polynomial(V, T_static, T_max_speed, V_max): + """ + T(V) = T_static - k*V² approximately, where aircraft reaches V_max when T = D + """ + k = (T_static - T_max_speed) / V_max**2 + T = T_static - k * V**2 + return max(T, 0) +``` + +--- + +## 8. Atmospheric Model + +### International Standard Atmosphere (ISA) + +For troposphere (h < 11,000 m / 36,089 ft): + +```python +# Constants +T_SL = 288.15 # K (15°C) +P_SL = 101325 # Pa +ρ_SL = 1.225 # kg/m³ +g = 9.80665 # m/s² +R = 287.05 # J/(kg·K), specific gas constant for air +γ_air = 1.4 # Ratio of specific heats +λ = 0.0065 # Temperature lapse rate, K/m + +def atmosphere_troposphere(h): + """ + ISA atmospheric properties for h in meters (h < 11000 m) + """ + T = T_SL - λ * h + P = P_SL * (T / T_SL) ** (g / (R * λ)) + ρ = ρ_SL * (T / T_SL) ** (g / (R * λ) - 1) + a = np.sqrt(γ_air * R * T) # Speed of sound + + return T, P, ρ, a +``` + +**Numerical values**: +```python +# Exponents +g / (R * λ) = 9.80665 / (287.05 * 0.0065) ≈ 5.256 +g / (R * λ) - 1 ≈ 4.256 +``` + +So: +```python +T = 288.15 - 0.0065 * h +P = 101325 * (T / 288.15) ** 5.256 +ρ = 1.225 * (T / 288.15) ** 4.256 +``` + +### Density Ratio (Most Important for Performance) + +```python +σ = ρ / ρ_SL = (T / T_SL) ** 4.256 = (1 - h/44330) ** 4.256 +``` + +Quick approximation: +```python +σ ≈ np.exp(-h / 9000) # Rough exponential fit, h in meters +``` + +### Altitude in Feet (Common in Aviation) + +```python +def atmosphere_ISA_feet(h_ft): + """h_ft in feet""" + h_m = h_ft * 0.3048 + T = 288.15 - 0.0065 * h_m + σ = (T / 288.15) ** 4.256 + ρ = 1.225 * σ + a = np.sqrt(1.4 * 287.05 * T) + return T, ρ, a, σ +``` + +--- + +## 9. Performance Calculations + +### Maximum Level Flight Speed + +At maximum speed, Thrust = Drag: +```python +T_max = D = q * S * C_D = 0.5 * ρ * V_max² * S * C_D0 # At high speed, induced drag small +``` + +Solving: +```python +V_max ≈ sqrt(2 * T_max / (ρ * S * C_D0)) +``` + +### Stall Speed + +At stall, L = W at C_L_max: +```python +V_stall = sqrt(2 * W / (ρ * S * C_L_max)) +``` + +Or in terms of wing loading: +```python +V_stall = sqrt(2 * (W/S) / (ρ * C_L_max)) +``` + +### Best Climb Speed and Rate + +**Maximum rate of climb** occurs at the speed where excess power is maximum: +```python +P_excess = P_avail - P_required +P_required = D * V = 0.5 * ρ * V³ * S * C_D +``` + +For prop aircraft, best climb occurs roughly at: +```python +V_best_climb ≈ sqrt(2 * (W/S) / (ρ * sqrt(3 * C_D0 / K))) # Minimum power speed +``` + +**Rate of climb**: +```python +RC = P_excess / W = (P_avail - D*V) / W +``` + +Or in terms of specific excess power: +```python +P_s = (T - D) * V / W = V * (T/W - D/W) +RC = P_s # for small climb angles +``` + +### Turn Performance + +**Turn radius**: +```python +R = V² / (g * sqrt(n² - 1)) +``` + +For n >> 1: +```python +R ≈ V² / (g * n) +``` + +**Turn rate** (angular velocity): +```python +ω = V / R = g * sqrt(n² - 1) / V +``` + +**Maximum instantaneous turn rate** (limited by C_L_max): +```python +n_max_aero = q * S * C_L_max / W +ω_max = g * sqrt(n_max_aero² - 1) / V +``` + +**Maximum sustained turn rate** (limited by thrust = drag): +Must have T = D at the required C_L: +```python +# At sustained turn, T = D = q * S * (C_D0 + K * C_L²) +# And L = n * W = q * S * C_L +# Solve for n_sustained given T_avail +``` + +### Energy Management + +**Specific energy** (energy height): +```python +E_s = h + V² / (2 * g) +``` + +**Specific excess power**: +```python +P_s = dE_s/dt = (T - D) * V / W +``` + +This is THE key parameter for dogfighting—aircraft with higher P_s at a given flight condition will "win" the energy game. + +--- + +## 10. Implementation Strategies for High SPS + +### Vectorization is Everything + +Write all computations to operate on batched tensors: + +```python +import numpy as np + +def step_vectorized(state, action, aircraft_params): + """ + state: (N, 6) array of [V, γ, ψ, x, y, h] for N environments + action: (N, 3) array of [throttle, bank_cmd, pitch_cmd] + """ + V, γ, ψ, x, y, h = state.T + throttle, φ_cmd, α_cmd = action.T + + # Atmospheric properties (vectorized) + T_atm = 288.15 - 0.0065 * h + σ = (T_atm / 288.15) ** 4.256 + ρ = 1.225 * σ + + # Dynamic pressure + q = 0.5 * ρ * V**2 + + # Aerodynamic coefficients + C_L = compute_CL_vectorized(α_cmd, aircraft_params) + C_D = aircraft_params['CD0'] + aircraft_params['K'] * C_L**2 + + # Forces + L = q * aircraft_params['S'] * C_L + D = q * aircraft_params['S'] * C_D + T = compute_thrust_vectorized(V, throttle, σ, aircraft_params) + W = aircraft_params['mass'] * 9.81 + + # Equations of motion + n = L / W + dV_dt = (T - D) / aircraft_params['mass'] - 9.81 * np.sin(γ) + dγ_dt = (9.81 / V) * (n * np.cos(φ_cmd) - np.cos(γ)) + dψ_dt = (9.81 * n * np.sin(φ_cmd)) / (V * np.cos(γ) + 1e-6) + + # Kinematics + dx_dt = V * np.cos(γ) * np.cos(ψ) + dy_dt = V * np.cos(γ) * np.sin(ψ) + dh_dt = V * np.sin(γ) + + # Euler integration + dt = 0.02 # 50 Hz + new_state = state + dt * np.stack([dV_dt, dγ_dt, dψ_dt, dx_dt, dy_dt, dh_dt], axis=1) + + return new_state +``` + +### Avoid These Performance Killers + +1. **Python loops over environments** - Always use vectorized operations +2. **Conditionals on per-environment basis** - Use `np.where` or `np.clip` instead +3. **Complex lookup tables** - Replace with polynomial/analytical approximations +4. **Trigonometric functions** - Cache sin/cos when possible; consider small-angle approximations +5. **Division by small numbers** - Add epsilon to avoid NaN/Inf + +### JAX Implementation for GPU + +```python +import jax +import jax.numpy as jnp +from functools import partial + +@partial(jax.jit, static_argnums=(2,)) +def step_jax(state, action, aircraft_params): + """JIT-compiled step function for maximum GPU performance""" + V, γ, ψ, x, y, h = state[..., 0], state[..., 1], state[..., 2], state[..., 3], state[..., 4], state[..., 5] + + # ... same logic as numpy version ... + + return new_state + +# Vectorize over batch dimension +step_batched = jax.vmap(step_jax, in_axes=(0, 0, None)) +``` + +### Numerical Integration + +For high SPS, use simple Euler integration with small timestep: + +```python +dt = 0.02 # 50 Hz (20ms timestep) +state_new = state + dt * state_derivative +``` + +For better accuracy without much overhead, use semi-implicit Euler: +```python +# Update velocities first +V_new = V + dt * dV_dt +# Use new velocity for positions +x_new = x + dt * V_new * cos(γ) * cos(ψ) +``` + +RK4 is typically overkill for RL training and adds 4x computational cost. + +### State Normalization + +Normalize states for neural network input: +```python +state_normalized = (state - state_mean) / state_std + +# Typical normalization values for WW2 dogfight: +# V: mean=150 m/s, std=50 m/s +# γ: mean=0, std=0.5 rad +# ψ: mean=π, std=π (or use sin/cos representation) +# x, y: mean=0, std=5000 m +# h: mean=3000 m, std=2000 m +``` + +--- + +## 11. WW2 Aircraft Reference Data + +### P-51D Mustang + +```python +P51D = { + 'name': 'P-51D Mustang', + 'mass': 4175, # kg (loaded) + 'S': 21.65, # m² wing area + 'b': 11.28, # m wingspan + 'AR': 5.86, # aspect ratio + 'CD0': 0.0170, # zero-lift drag + 'e': 0.80, # Oswald efficiency + 'K': 0.068, # induced drag factor + 'CL_max': 1.49, # max lift coefficient (clean) + 'CL_alpha': 4.7, # per radian + 'alpha_stall': 16, # degrees + 'P_max': 1230000, # W (1650 hp at WEP) + 'h_critical': 7620, # m (25,000 ft) with 2-stage supercharger + 'V_max': 180, # m/s (703 km/h at altitude) + 'V_stall': 46, # m/s (100 mph clean) + 'RC_max': 17.5, # m/s (3450 ft/min) + 'service_ceiling': 12770, # m (41,900 ft) + 'n_limit': 8.0, # structural g limit +} +``` + +### Supermarine Spitfire Mk IX + +```python +SpitfireIX = { + 'name': 'Spitfire Mk IX', + 'mass': 3400, # kg (loaded) + 'S': 22.48, # m² wing area + 'b': 11.23, # m wingspan (clipped) + 'AR': 5.61, # aspect ratio + 'CD0': 0.0210, # zero-lift drag + 'e': 0.85, # Oswald efficiency (elliptical wing) + 'K': 0.067, # induced drag factor + 'CL_max': 1.36, # max lift coefficient + 'CL_alpha': 4.5, # per radian + 'alpha_stall': 15, # degrees + 'P_max': 1100000, # W (1475 hp) + 'h_critical': 6100, # m (20,000 ft) + 'V_max': 182, # m/s (657 km/h) + 'V_stall': 42, # m/s (82 kt) + 'RC_max': 21, # m/s (4100 ft/min) + 'service_ceiling': 13100, # m (43,000 ft) + 'n_limit': 8.0, +} +``` + +### Messerschmitt Bf 109G-6 + +```python +Bf109G = { + 'name': 'Bf 109G-6', + 'mass': 3100, # kg (loaded) + 'S': 16.05, # m² wing area + 'b': 9.92, # m wingspan + 'AR': 6.13, # aspect ratio + 'CD0': 0.0260, # zero-lift drag (higher due to design) + 'e': 0.78, # Oswald efficiency + 'K': 0.066, # induced drag factor + 'CL_max': 1.52, # max lift coefficient + 'CL_alpha': 4.8, # per radian + 'alpha_stall': 17, # degrees + 'P_max': 1050000, # W (1410 hp) + 'h_critical': 5700, # m (18,700 ft) + 'V_max': 170, # m/s (621 km/h) + 'V_stall': 50, # m/s (97 kt) + 'RC_max': 19, # m/s (3750 ft/min) + 'service_ceiling': 11550, # m (37,900 ft) + 'n_limit': 7.5, +} +``` + +### Focke-Wulf Fw 190A-8 + +```python +Fw190A = { + 'name': 'Fw 190A-8', + 'mass': 4400, # kg (loaded) + 'S': 18.30, # m² wing area + 'b': 10.51, # m wingspan + 'AR': 6.04, # aspect ratio + 'CD0': 0.0220, # zero-lift drag + 'e': 0.78, # Oswald efficiency + 'K': 0.068, # induced drag factor + 'CL_max': 1.45, # max lift coefficient + 'CL_alpha': 4.6, # per radian + 'alpha_stall': 16, # degrees + 'P_max': 1270000, # W (1700 hp with MW 50) + 'h_critical': 6300, # m (20,700 ft) + 'V_max': 171, # m/s (615 km/h) + 'V_stall': 55, # m/s (107 kt) + 'RC_max': 15, # m/s (2950 ft/min) + 'service_ceiling': 10300, # m (33,800 ft) + 'n_limit': 8.5, +} +``` + +### Mitsubishi A6M5 Zero + +```python +A6M5 = { + 'name': 'A6M5 Zero', + 'mass': 2750, # kg (loaded) + 'S': 22.44, # m² wing area + 'b': 11.0, # m wingspan + 'AR': 5.39, # aspect ratio + 'CD0': 0.0230, # zero-lift drag + 'e': 0.80, # Oswald efficiency + 'K': 0.074, # induced drag factor + 'CL_max': 1.40, # max lift coefficient + 'CL_alpha': 4.3, # per radian + 'alpha_stall': 15, # degrees + 'P_max': 840000, # W (1130 hp) + 'h_critical': 4500, # m (14,800 ft) + 'V_max': 156, # m/s (565 km/h) + 'V_stall': 40, # m/s (78 kt) + 'RC_max': 16, # m/s (3150 ft/min) + 'service_ceiling': 11740, # m (38,520 ft) + 'n_limit': 7.0, # Structural limit (lighter construction) +} +``` + +--- + +## 12. Validation and Sanity Checks + +### Must-Pass Tests + +Before deploying the environment, verify these behaviors: + +1. **Level flight equilibrium**: At trim conditions, aircraft should maintain altitude + ```python + assert abs(dh_dt) < 0.1 # m/s at trim + ``` + +2. **Stall speed matches data**: + ```python + V_stall_computed = np.sqrt(2 * W / (ρ_SL * S * CL_max)) + assert abs(V_stall_computed - V_stall_data) / V_stall_data < 0.05 + ``` + +3. **Max speed matches data** (approximately): + ```python + # At altitude where V_max occurs + V_max_computed = compute_max_speed(h_optimal) + assert abs(V_max_computed - V_max_data) / V_max_data < 0.10 + ``` + +4. **Turn physics are correct**: + ```python + # 60° bank should give 2g and specific turn radius + n = 1 / np.cos(np.radians(60)) + assert abs(n - 2.0) < 0.01 + R = V**2 / (g * np.sqrt(n**2 - 1)) + # At V=100 m/s, R ≈ 588 m + ``` + +5. **Energy conservation** (with zero thrust/drag): + ```python + E_s = h + V**2 / (2*g) + # With T=D=0, dE_s/dt should be zero + ``` + +6. **Climb rate matches data**: + ```python + RC_computed = compute_max_RC(h=0) + assert abs(RC_computed - RC_max_data) / RC_max_data < 0.15 + ``` + +### Behavioral Checks for Dogfighting + +1. **Energy advantage matters**: Aircraft starting with more altitude/speed should have an advantage +2. **Turn fights favor appropriate aircraft**: Low wing-loading aircraft should out-turn heavy ones +3. **Boom-and-zoom works**: Fast diving attacks followed by climb-away should be viable +4. **Stall is dangerous**: Aircraft that stall should lose controllability temporarily +5. **Altitude matters**: Engine performance should degrade at high altitude + +--- + +## 13. Sources and References + +### Primary Textbooks + +1. **Anderson, J.D.** - "Introduction to Flight" (McGraw-Hill) + - Chapters on aircraft performance, drag polar, climb/turn performance + - Standard reference for undergraduate aerodynamics + +2. **Stevens, B.L. & Lewis, F.L.** - "Aircraft Control and Simulation" (Wiley, 3rd Ed. 2015) + - Gold standard for flight dynamics modeling + - F-16 model used in many research implementations + - GitHub implementations: [isrlab/F16-Model-Matlab](https://github.com/isrlab/F16-Model-Matlab) + +3. **Raymer, D.P.** - "Aircraft Design: A Conceptual Approach" (AIAA) + - Empirical formulas for Oswald efficiency, drag estimation + - Excellent for quick approximations + +4. **Roskam, J.** - "Methods for Estimating Drag Polars of Subsonic Airplanes" + - Detailed component buildup methods + +5. **Stengel, R.E.** - "Flight Dynamics" (Princeton University Press) + - Excellent lecture notes available at: https://stengel.mycpanel.princeton.edu/ + - MATLAB code for 6-DOF simulation: [FLIGHTv2](https://stengel.mycpanel.princeton.edu/FDcodeB.html) + +### Open Source Flight Dynamics Models + +1. **JSBSim** - https://github.com/JSBSim-Team/jsbsim + - Industry-standard open-source FDM + - Used in FlightGear, DARPA ACE program + - XML-based aircraft configuration files + - Has WW2 aircraft models (P-51, etc.) + +2. **AeroBenchVV** - https://github.com/pheidlauf/AeroBenchVV + - F-16 model for verification and validation + - MATLAB implementation of Stevens & Lewis model + +3. **F16 Flight Dynamics (Python/C++)** - https://github.com/EthanJamesLew/f16-flight-dynamics + - Efficient C++ implementation with Python bindings + - Good reference for high-performance implementation + +### RL Environment Implementations + +1. **LAG (Light Aircraft Game)** - https://github.com/liuqh16/LAG + - JSBSim-based air combat environment + - PPO/MAPPO implementations included + - Good reference for reward shaping in dogfights + +2. **BVR Gym** - https://arxiv.org/html/2403.17533 + - Beyond-visual-range air combat environment + - Built on JSBSim with OpenAI Gym interface + +3. **Tunnel** - https://arxiv.org/html/2505.01953v1 + - Lightweight F-16 RL environment + - Focus on simplicity and accessibility + +4. **DBRL** - https://github.com/mrwangyou/DBRL + - Dogfighting benchmark for RL research + +### High-Performance RL Infrastructure + +1. **EnvPool** - https://github.com/sail-sg/envpool + - C++-based parallel environment execution + - 1M+ steps/second demonstrated + - Good patterns for vectorized environments + +2. **Isaac Gym** - https://github.com/isaac-sim/IsaacGymEnvs + - GPU-accelerated physics simulation + - Demonstrates full GPU pipeline for RL + +3. **PufferLib** - https://github.com/PufferAI/PufferLib + - Clean, fast RL training framework + - Good target platform for implementation + +### WW2 Aircraft Data + +1. **WWII Aircraft Performance** - https://www.wwiiaircraftperformance.org/ + - Comprehensive primary source documents + - Flight test data, performance charts + +2. **CFD Evaluation of WW2 Fighters** - Lednicer (1995) + - ResearchGate: "A CFD evaluation of three prominent World War II fighter aircraft" + - Spitfire, P-51, Fw 190 aerodynamic comparison + +3. **Aerodynamics of the Spitfire** - Royal Aeronautical Society + - Detailed analysis of Spitfire aerodynamics + - CD0 ≈ 0.020-0.021 for Spitfire + +### Atmospheric Models + +1. **International Standard Atmosphere** - ISO 2533:1975 + - Official ISA specification + - Wikipedia summary is accurate and sufficient + +2. **ICAO Standard Atmosphere** - ICAO Doc 7488-CD + - Extended to 80 km altitude + +--- + +## Quick Reference Card + +### Core Equations (Copy-Paste Ready) + +```python +# === ATMOSPHERE (ISA, troposphere) === +T = 288.15 - 0.0065 * h # Temperature [K], h in meters +σ = (T / 288.15) ** 4.256 # Density ratio +ρ = 1.225 * σ # Density [kg/m³] +a = 20.05 * np.sqrt(T) # Speed of sound [m/s] + +# === AERODYNAMICS === +q = 0.5 * ρ * V**2 # Dynamic pressure [Pa] +C_L = C_Lα * α # Lift coefficient (linear region) +C_D = C_D0 + K * C_L**2 # Drag polar +L = q * S * C_L # Lift [N] +D = q * S * C_D # Drag [N] + +# === PROPULSION (simple) === +P = P_max * σ * throttle # Power available [W] +T = η_p * P / V # Thrust [N], η_p ≈ 0.80 + +# === PERFORMANCE === +V_stall = np.sqrt(2 * W / (ρ * S * C_L_max)) # Stall speed +R_turn = V**2 / (g * np.sqrt(n**2 - 1)) # Turn radius +ω_turn = g * np.sqrt(n**2 - 1) / V # Turn rate [rad/s] +RC = (P * η_p - D * V) / W # Rate of climb [m/s] + +# === EQUATIONS OF MOTION (3DOF point mass) === +dV_dt = (T - D) / m - g * np.sin(γ) +dγ_dt = (g / V) * (n * np.cos(φ) - np.cos(γ)) +dψ_dt = g * n * np.sin(φ) / (V * np.cos(γ)) +dx_dt = V * np.cos(γ) * np.cos(ψ) +dy_dt = V * np.cos(γ) * np.sin(ψ) +dh_dt = V * np.sin(γ) +``` + +### Typical Values to Remember + +| Parameter | Typical Value | Notes | +|-----------|--------------|-------| +| C_D0 | 0.017-0.028 | Lower = cleaner aircraft | +| e (Oswald) | 0.75-0.85 | Elliptic wing ≈ 0.85 | +| K | 0.06-0.08 | K = 1/(π·AR·e) | +| C_L_max | 1.3-1.6 | Clean configuration | +| C_Lα | 4.5-5.0 /rad | 3D wing | +| α_stall | 14-18° | Depends on airfoil | +| η_propeller | 0.75-0.85 | Cruise conditions | +| σ at 20,000 ft | 0.53 | Density ratio | +| σ at 30,000 ft | 0.37 | Density ratio | + +--- + +*Document prepared for Claude agents developing WW2 dogfighting RL environments. Focus on computational efficiency while maintaining physical plausibility.* diff --git a/pufferlib/ocean/dogfight/AUTOPILOT_TODO.md b/pufferlib/ocean/dogfight/AUTOPILOT_TODO.md new file mode 100644 index 000000000..a5b0c1d7d --- /dev/null +++ b/pufferlib/ocean/dogfight/AUTOPILOT_TODO.md @@ -0,0 +1,172 @@ +# Autopilot System TODO + +Technical debt and future improvements for the target aircraft autopilot system. + +--- + +## Critical Issues + +### 1. Python/C Enum Sync Problem +**Risk: Silent breakage if enums diverge** + +```c +// autopilot.h +typedef enum { AP_STRAIGHT = 0, AP_LEVEL, ... } AutopilotMode; +``` +```python +# dogfight.py +class AutopilotMode: + STRAIGHT = 0 # Must manually match C +``` + +**Fix options:** +- [ ] Generate Python constants from C header at build time +- [ ] Add runtime validation that checks enum values match +- [ ] Add static_assert in C for enum count + +### 2. ~~No Vectorized set_autopilot~~ DONE (80bcf31e) +```python +def set_autopilot(self, env_idx=0, ...): # Must call N times for N envs +``` + +**Fix:** +- [x] Accept `env_idx=None` to mean "all environments" +- [x] Add C binding `vec_set_autopilot()` for efficiency + +### 3. force_state() Doesn't Reset PID State +When teleporting plane via `force_state()`, autopilot PID state (`prev_vz`, `prev_bank_error`) retains stale values causing derivative spikes. + +**Fix:** +- [ ] Reset autopilot PID state in `force_state()` C function +- [ ] Or add `reset_pid` parameter to force_state + +### 4. No Mode Bounds Check +Invalid mode values (e.g., `mode=99`) silently become AP_STRAIGHT. + +**Fix:** +- [ ] Add bounds check in `autopilot_set_mode()` +- [ ] Return error or clamp to valid range + +--- + +## Curriculum Learning Gaps + +### ~~Mode Weights for Non-Uniform Selection~~ DONE (0a1c2e6d) +```python +env.set_mode_weights(level=0.5, turn_left=0.25, turn_right=0.25, climb=0.0, descend=0.0) +``` + +**Tasks:** +- [x] Add `mode_weights` array to AutopilotState +- [x] Implement weighted random selection in `autopilot_randomize()` +- [x] Add Python API: `set_mode_weights()` +- [x] Default weights = uniform +- [x] Add `get_autopilot_mode()` for testing/debugging +- [x] Add unit test in test_flight.py + +### Per-Episode Parameter Variance +Bank angle and climb rate are fixed at `set_autopilot()` time. + +**Need:** +- [ ] `bank_deg_min`, `bank_deg_max` fields - randomize within range each reset +- [ ] `climb_rate_min`, `climb_rate_max` fields +- [ ] `throttle_min`, `throttle_max` fields + +### Difficulty Abstraction +Single 0.0-1.0 difficulty scale that controls multiple parameters. + +```python +# Desired API: +env.set_difficulty(0.3) # Maps to mode weights, bank angles, etc. +``` + +**Tasks:** +- [ ] Design difficulty mapping (what parameters at what difficulty) +- [ ] Implement `set_difficulty()` in Python +- [ ] Document difficulty levels + +### Per-Environment Difficulty +In vectorized envs, all opponents currently share settings. + +**Need:** +- [ ] Allow different autopilot settings per env index +- [ ] Or difficulty gradient across env indices + +--- + +## Test Integration Gaps + +### Player Autopilot for test_flight.py +Currently autopilot only controls opponent. test_flight.py tests player with Python PID. + +**Tasks:** +- [ ] Add `AutopilotState player_ap` to Dogfight struct +- [ ] Add `player_autopilot_enabled` flag +- [ ] Add Python API `set_player_autopilot()` +- [ ] Migrate test_flight.py PID tests to use C autopilot + +### Query Autopilot State +~~No way to verify autopilot mode from Python.~~ Partial (0a1c2e6d) + +**Tasks:** +- [x] Add `get_autopilot_mode()` C binding +- [ ] Return current mode, bank, climb_rate, etc. (only mode implemented) +- [x] Add to Python wrapper + +--- + +## Missing Maneuvers + +### Basic Extensions +- [ ] `AP_WEAVE` - S-turns with configurable period +- [ ] `AP_CLIMBING_TURN` - Combined climb + bank +- [ ] `AP_DESCENDING_TURN` - Combined descent + bank + +### Evasive Maneuvers (Hard difficulty) +- [ ] `AP_JINK` - Random direction changes at intervals +- [ ] `AP_BREAK` - Hard turn away from threat +- [ ] `AP_BARREL_ROLL` - Defensive roll + +### Pursuit Behaviors +- [ ] `AP_PURSUIT` - Turn toward player +- [ ] `AP_LEAD_PURSUIT` - Aim ahead of player +- [ ] `AP_LAG_PURSUIT` - Trail behind player + +### Opponent Combat +- [ ] Enable opponent firing (`actions[4]` currently hardcoded to -1) +- [ ] Accuracy scaling based on difficulty +- [ ] Reaction time/delay modeling + +--- + +## Code Quality + +### Explicit Random Mode List +Current implicit range assumption is fragile: +```c +int mode = 1 + (rand() % (AP_COUNT - 2)); // Assumes modes 1..5 are valid +``` + +**Fix:** +- [ ] Replace with explicit array of randomizable modes +```c +static const AutopilotMode RANDOM_MODES[] = {AP_LEVEL, AP_TURN_LEFT, ...}; +``` + +### PID Derivative Smoothing +First step after reset may have derivative spike. + +**Fix:** +- [ ] Initialize `prev_vz` to current `vz` on first step +- [ ] Or use filtered derivative + +--- + +## Priority Order + +1. ~~**High:** Vectorized set_autopilot~~ DONE (80bcf31e) +2. ~~**High:** Mode weights (core curriculum feature)~~ DONE (0a1c2e6d) +3. **Medium:** Per-episode parameter variance +4. **Medium:** Player autopilot for tests +5. **Low:** Additional maneuvers +6. **Low:** Opponent combat diff --git a/pufferlib/ocean/dogfight/BISECTION.md b/pufferlib/ocean/dogfight/BISECTION.md new file mode 100644 index 000000000..dd0fce9dd --- /dev/null +++ b/pufferlib/ocean/dogfight/BISECTION.md @@ -0,0 +1,287 @@ +# Training Regression Bisection + +## Problem + +Agent used to train to ~1.0 kills easily. After adding features (curriculum, stages), even the simplest scenario (TAIL_CHASE) only achieves ~0.5 kills. + +**Goal**: Find the commit where training regressed. + +--- + +## Instructions + +### Git rules: + +- **OK**: `git stash`, `git checkout`, `git diff`, `git log` +- **NOT OK**: `git add`, `git commit`, `git push` - DO NOT modify git history + +### For each commit you test: + +```bash +# 1. Stash any local changes if needed +git stash + +# 2. Checkout the commit +git checkout HASH + +# 3. Build +python setup.py build_ext --inplace --force + +# 4. Run training 3 times, each to a separate log +python -m pufferlib.pufferl train puffer_dogfight 2>&1 | tee pufferlib/ocean/dogfight/baselines/bisect_HASH_r1.log +python -m pufferlib.pufferl train puffer_dogfight 2>&1 | tee pufferlib/ocean/dogfight/baselines/bisect_HASH_r2.log +python -m pufferlib.pufferl train puffer_dogfight 2>&1 | tee pufferlib/ocean/dogfight/baselines/bisect_HASH_r3.log + +# 5. Return to working branch and restore stash +git checkout dogfight +git stash pop # if you stashed earlier +``` + +### How to fill in the table: + +1. **Kills**: Average the final `kills` metric from all 3 runs +2. **Return**: Average the final `episode_return` from all 3 runs +3. **Notes**: + - If one run is a major outlier (e.g., 0.3, 0.9, 0.35), note it: "outlier: 0.9" + - Note anything unusual: "no curriculum in this version", "different reward scale", etc. + +### Choosing which commit to test next: + +**DO NOT blindly bisect.** Use the commit messages to make informed choices. + +1. Start with **a4172661** (baseline) to confirm training once worked +2. Use binary search as a GUIDE, but prioritize commits with meaningful names: + - "Fix Reward and Score" - likely affects kills metric + - "Simplify Penalties and Rewards" - definitely relevant + - "Rewards Fixed - Sweepable" - relevant + - "Debug Prints" - probably NOT relevant, skip unless adjacent to regression +3. If bisect lands you on "Debug Prints" but "Fix Reward and Score" is one commit away, test the meaningful one instead +4. The goal is to UNDERSTAND what broke, not just find a hash + +**Think like a debugger**: What change COULD have broken training? Test those commits first. + +--- + +## Results + +Kills = 3-run average. Return = episode return average. Perf = older metric (same as kills in later commits). + +| Hash | Message | Kills | Perf | Return | Notes | +|------|---------|-------|------|--------|-------| +| 4e640ee0 | Trying to Fix Curriculum - Agent Trains Poorly | ~0.5 | | | Current state, KNOWN BAD | +| f6c821d7 | Still Trying to Fix Blowups | | | | | +| 2c3073f5 | Debug Prints | | | | | +| b68d1b22 | Simplify Penalties and Rewards | | | | 6 obs schemes, default=0 | +| b68_obs0 | ^ scheme 0 (size 12) | 0.35 | 0.35 | -9 | BAD, eps_per_stage=120 | +| b68_obs1 | ^ scheme 1 (size 17) | | | | | +| b68_obs2 | ^ scheme 2 (size 10) | | | | | +| b68_obs3 | ^ scheme 3 (size 10) | | | | | +| b68_obs4 | ^ scheme 4 (size 13) | | | | | +| b68_obs5 | ^ scheme 5 (size 15) | | | | | +| 9dca5c67 | Reduce Prints | | | | | +| ab222bfc | Increase Batch Size for Speed | | | | eps_per_stage=15000 | +| ab2_obs5 | ^ scheme 5 (size 15) | 1.0 | 1.0 | -0.8 | **GOOD** last good commit | +| 7fd88f1c | Next Sweep Improvements - Likes to Aileron Roll too Much | | | | 6 obs, eps_per_stage=60 | +| 7fd_obs5 | ^ scheme 5 (size 15) | 0.02 | 0.02 | -82 | **TERRIBLE** eps_per_stage=60 | +| 7fd_obs0 | ^ scheme 0, eps=100k | 0.62 | 0.62 | -23 | All 3 runs consistent | +| 7fd_obs1 | ^ scheme 1, eps=100k | 0.71 | 0.71 | -18 | outlier r3=0.84 | +| 7fd_obs2 | ^ scheme 2, eps=100k | 0.81 | 0.81 | -26 | outlier r2=0.99! | +| 7fd_obs3 | ^ scheme 3, eps=100k | 0.55 | 0.55 | -45 | WORST, outlier r3=0.30 | +| 7fd_obs4 | ^ scheme 4, eps=100k | 0.62 | 0.62 | -35 | All 3 runs consistent | +| 7fd_obs5 | ^ scheme 5, eps=100k | 0.78 | 0.78 | -14 | outlier r3=0.96 | +| 30fa9fed | Fix Obs 5 Schema and Adjust Penalties | 0.94 | 0.94 | -34 | GOOD, outlier r3=0.81 | +| 652ab7a6 | Fix Elevator Problems | | | | | +| fe7e26a2 | Roll Penalty - Elevator Might Be Inversed | | | | | +| bc728368 | New Obs Schemas - New Sweep Prep | | | | 6 obs schemes (0-5) | +| bc7_obs0 | ^ scheme 0 (size 12) | | | | | +| bc7_obs1 | ^ scheme 1 (size 17) | | | | | +| bc7_obs2 | ^ scheme 2 (size 10) | | | | | +| bc7_obs3 | ^ scheme 3 (size 10) | | | | | +| bc7_obs4 | ^ scheme 4 (size 13) | | | | | +| bc7_obs5 | ^ scheme 5 (size 15) | 1.0 | 1.0 | 0.1 | **GOOD** ini default, very short eps | +| 2606e20e | Apply Sweep df1 84 u5i33hej | | | | 6 obs schemes (0-5) | +| 260_obs0 | ^ scheme 0 (size 19) | | | | | +| 260_obs1 | ^ scheme 1 (size 21) | | | | | +| 260_obs2 | ^ scheme 2 (size 12) | | | | | +| 260_obs3 | ^ scheme 3 (size 17) | | | | | +| 260_obs4 | ^ scheme 4 (size 10) | | | | | +| 260_obs5 | ^ scheme 5 (size 43) | | | | | +| 17f18c19 | Fix Reward and Score | | | | kills tracking added here | +| 3cc5b588 | More Sweep Prep | | | | | +| a31d1dc7 | Fix Terminals and Loggin | | | | | +| 26709b93 | Preparing for Sweeps | | | | | +| 04dd0167 | Rewards Fixed - Sweepable | | | | 6 obs schemes, default=0 | +| 04d_obs0 | ^ scheme 0 (size 19) | 3.4 | 0.93 | 41 | **GOOD BASELINE** | +| 04d_obs1 | ^ scheme 1 (size 21) | | | | | +| 04d_obs2 | ^ scheme 2 (size 12) | | | | | +| 04d_obs3 | ^ scheme 3 (size 17) | | | | | +| 04d_obs4 | ^ scheme 4 (size 10) | | | | | +| 04d_obs5 | ^ scheme 5 (size 43) | | | | | +| 63a7aaed | Observation Schemas Swept | | | | | +| 0a1c2e6d | Weighted Random Actions | | | | | +| 80bcf31e | Vectorized Autopilot | | | | | +| 85980679 | Autopilot Seperate File | | | | | +| 374871df | Small Perf - Move cosf Out of Loop | | | | | +| 1131e836 | Simple Optimizations | | | | | +| 1c30c546 | Coordinated Turn Tests | | | | | +| 3582d2d4 | Physics in Own File - Test Flights | | | | | +| 95eb2efd | Moved Physics to File | | | | | +| b29bf5ac | Renamed md Files | | | | | +| 0116b97c | Physics model: incidence, comments, test suite | | | | | +| 332a9ae0 | Good Claude - Wireframe Planes | | | | | +| daaf9024 | Rendered with spheres or something | | | | | +| 49af2d49 | Reward Changes | | | | | +| a4172661 | Trains and Evals | 0 | N/A | -43 | no kills tracking yet | + +--- + +## Commit Summaries + +Run `git diff HASH~1 HASH` to see what changed. Summarize each commit as you test it. Keep in chronological order (newest first). + +### 4e640ee0 +(summarize after reviewing diff) + +### f6c821d7 +(summarize after reviewing diff) + +### 2c3073f5 +(summarize after reviewing diff) + +### b68d1b22 +(summarize after reviewing diff) + +### 9dca5c67 +(summarize after reviewing diff) + +### 7fd88f1c +(summarize after reviewing diff) + +### 30fa9fed +(summarize after reviewing diff) + +### 652ab7a6 +(summarize after reviewing diff) + +### fe7e26a2 +(summarize after reviewing diff) + +### bc728368 +(summarize after reviewing diff) + +### 2606e20e +(summarize after reviewing diff) + +### 17f18c19 +(summarize after reviewing diff) + +### 3cc5b588 +(summarize after reviewing diff) + +### a31d1dc7 +(summarize after reviewing diff) + +### 26709b93 +(summarize after reviewing diff) + +### 04dd0167 +Rewards Fixed - Sweepable. Proper kills/perf tracking. perf=0.93 (93% episodes get ≥1 kill), kills=3.4 avg, return=41. **GOOD BASELINE - training works here.** + +### 63a7aaed +(summarize after reviewing diff) + +### 0a1c2e6d +(summarize after reviewing diff) + +### 80bcf31e +(summarize after reviewing diff) + +### 85980679 +(summarize after reviewing diff) + +### 374871df +(summarize after reviewing diff) + +### 1131e836 +(summarize after reviewing diff) + +### 1c30c546 +(summarize after reviewing diff) + +### 3582d2d4 +(summarize after reviewing diff) + +### 95eb2efd +(summarize after reviewing diff) + +### b29bf5ac +(summarize after reviewing diff) + +### 0116b97c +(summarize after reviewing diff) + +### 332a9ae0 +(summarize after reviewing diff) + +### daaf9024 +(summarize after reviewing diff) + +### 49af2d49 +(summarize after reviewing diff) + +### a4172661 +Initial dogfight commit. Creates entire environment from scratch. No kills/perf tracking - only logs episode_return, episode_length, n. Returns: -43.9, -109.6, -57.4 (avg -70). Cannot use as baseline for kills metric. + +--- + +## Findings + +### Possible Cause: `episodes_per_stage` Config Change + +| Commit | episodes_per_stage | perf | +|--------|-------------------|------| +| 30fa9fed (GOOD) | 15000 | 0.94 | +| 7fd88f1c (BAD) | 60 | 0.02 | +| 7fd_edit (PARTIAL) | 100000 | 0.76 | +| b68d1b22 (BAD) | 120 | 0.35 | + +**Finding**: `episodes_per_stage` is a MAJOR factor (0.02 → 0.76 when increased), but code changes in 7fd88f1c also matter (0.76 vs 1.0 in last good commit). + +### First Bad Commit: 7fd88f1c + +**Config changes** (ab222bfc → 7fd88f1c): +- `episodes_per_stage`: 15000 → 60 (250x fewer!) +- `penalty_roll`: 0.0015 → 0.003 (2x) +- NEW: `penalty_aileron = 0.1` +- NEW: `penalty_bias = 0.01` +- NEW: `reward_approach = 0.005` +- NEW: `reward_level = 0.02` +- Swapped reward_firing_solution ↔ reward_tracking + +**Code changes**: 454 insertions, 101 deletions in dogfight.h, autopilot.h, etc. + +### All Obs Schemes at 7fd88f1c (eps_per_stage=100000) + +| Scheme | Size | r1 | r2 | r3 | Avg Perf | Notes | +|--------|------|-----|-----|-----|----------|-------| +| 0 | 12 | 0.62 | 0.63 | 0.62 | 0.62 | consistent | +| 1 | 17 | 0.64 | 0.63 | 0.84 | 0.71 | high variance | +| 2 | 10 | 0.80 | 0.99 | 0.64 | 0.81 | r2 nearly perfect! | +| 3 | 10 | 0.70 | 0.65 | 0.30 | 0.55 | WORST scheme | +| 4 | 13 | 0.66 | 0.58 | 0.62 | 0.62 | consistent | +| 5 | 15 | 0.70 | 0.67 | 0.96 | 0.78 | high variance | + +**Key Finding**: ALL obs schemes underperform vs last good commit (1.0). This is NOT just one bad scheme - it's a deeper code problem in 7fd88f1c. + +Observations: +- High variance across runs for all schemes +- Scheme 2 had one run hit 0.99, scheme 5 hit 0.96 - so 1.0 is achievable but inconsistent +- Scheme 3 is worst (0.55 avg) +- Problem affects all observation schemes equally + +### To Investigate + +1. ~~Test 7fd88f1c with `episodes_per_stage=100000` to isolate config vs code~~ **DONE**: perf varies by scheme, 0.55-0.81 +2. ~~Test all obs schemes at 7fd88f1c~~ **DONE**: ALL schemes underperform, not one bad scheme +3. Diff dogfight.h code between ab222bfc and 7fd88f1c to find the breaking change +4. Focus on: new penalties (aileron=0.1, bias=0.01), new rewards (approach, level), or code logic bugs diff --git a/pufferlib/ocean/dogfight/OBSERVATION_EXPERIMENTS.md b/pufferlib/ocean/dogfight/OBSERVATION_EXPERIMENTS.md new file mode 100644 index 000000000..3b62395a7 --- /dev/null +++ b/pufferlib/ocean/dogfight/OBSERVATION_EXPERIMENTS.md @@ -0,0 +1,452 @@ +# Observation Space Experiments + +Design doc for systematic observation scheme comparison. + +**Goal**: Empirically determine the best observation representation for dogfight learning. + +**Method**: PufferLib sweep across observation schemes, all else constant. + +--- + +## Observation Scheme Candidates + +### Scheme 0: CURRENT (Baseline) +World-frame Cartesian. + +``` +Player: pos(3), vel(3), quat(4), up(3) = 13 +Relative: pos(3), vel(3) = 6 +Total: 19 +``` + +Issues: +- Rel pos/vel in world frame - agent must learn quaternion rotation +- No direct aiming info + +--- + +### Scheme 1: BODY_FRAME +Transform relative quantities to player body frame. + +``` +Player: pos(3), vel_body(3), quat(4), up(3) = 13 +Relative: pos_body(3), vel_body(3) = 6 +Aiming: aim_dot(1), dist_norm(1) = 2 +Total: 21 +``` + +Computation: +```c +Quat q_inv = quat_inverse(p->ori); +Vec3 rel_pos_body = quat_rotate(q_inv, sub3(o->pos, p->pos)); +Vec3 rel_vel_body = quat_rotate(q_inv, sub3(o->vel, p->vel)); +Vec3 vel_body = quat_rotate(q_inv, p->vel); +``` + +Benefit: `rel_pos_body.x > 0` always means "ahead of me" + +--- + +### Scheme 2: ANGLES +Spherical coordinates - azimuth, elevation, distance. + +``` +Player: pos(3), speed(1), pitch(1), roll(1), yaw(1) = 7 +Target: azimuth(1), elevation(1), distance(1), closing_rate(1) = 4 +Opponent: heading_rel(1) = 1 +Total: 13 +``` + +Computation: +```c +// Target angles in body frame +Vec3 to_target_body = quat_rotate(q_inv, rel_pos); +float azimuth = atan2f(to_target_body.y, to_target_body.x); // -pi to pi +float elevation = asinf(to_target_body.z / norm3(to_target_body)); // -pi/2 to pi/2 +float distance = norm3(rel_pos); + +// Player attitude (Euler from quaternion) +// pitch = asin(2*(qw*qy - qz*qx)) +// roll = atan2(2*(qw*qx + qy*qz), 1 - 2*(qx*qx + qy*qy)) +// yaw = atan2(2*(qw*qz + qx*qy), 1 - 2*(qy*qy + qz*qz)) +``` + +Benefit: Directly answers "how far off am I pointing?" +Risk: Discontinuities at ±180° azimuth, ±90° elevation + +--- + +### Scheme 3: CONTROL_ERROR +What control inputs would point at target? + +``` +Player: pos(3), speed(1), quat(4), up(3) = 11 +Aiming: pitch_error(1), yaw_error(1), roll_to_turn(1), dist(1) = 4 +Target: closing_rate(1), opp_heading_rel(1) = 2 +Total: 17 +``` + +Computation: +```c +// Error = angle from nose to target +Vec3 to_target_body = quat_rotate(q_inv, rel_pos); +Vec3 to_target_norm = normalize3(to_target_body); + +// Pitch error: positive = need to pitch up +float pitch_error = asinf(to_target_norm.z); + +// Yaw error: positive = need to yaw right +float yaw_error = atan2f(to_target_norm.y, to_target_norm.x); + +// Roll to align turn: if target is right, roll right helps +float roll_to_turn = ... // bank angle that would help turn toward target +``` + +Benefit: Minimal transformation from observation to action +Risk: May lose situational awareness info + +--- + +### Scheme 4: REALISTIC (Cockpit Instruments) +Only what a WW2 pilot could actually see/read. + +``` +Instruments: airspeed(1), altitude(1), pitch(1), roll(1) = 4 +Gunsight: target_az(1), target_el(1), target_size(1) = 3 +Visual: target_aspect(1), horizon_visible(1) = 2 +Total: 10 +``` + +Benefit: Most constrained - if this works, simpler is better +Risk: May lack critical information + +--- + +### Scheme 5: MAXIMALIST +Everything potentially useful, let network sort it out. + +``` +Player: pos(3), vel_world(3), vel_body(3), quat(4), up(3), + speed(1), pitch(1), roll(1), yaw(1) = 20 +Target: rel_pos_world(3), rel_pos_body(3), rel_vel_world(3), rel_vel_body(3), + azimuth(1), elevation(1), distance(1), aim_dot(1), closing_rate(1) = 17 +Opponent: opp_fwd_world(3), opp_fwd_body(3) = 6 +Total: 43 +``` + +Benefit: Network has all possible information +Risk: Larger network needed, slower training, redundancy + +--- + +## Implementation Strategy + +### Option A: Compile-Time Schemes +```c +#define OBS_SCHEME 1 // 0=CURRENT, 1=BODY_FRAME, 2=ANGLES, etc. + +#if OBS_SCHEME == 0 +#define OBS_SIZE 19 +#elif OBS_SCHEME == 1 +#define OBS_SIZE 21 +// ... +#endif + +void compute_observations(Dogfight *env) { +#if OBS_SCHEME == 0 + // Current implementation +#elif OBS_SCHEME == 1 + // Body frame implementation +#endif +} +``` + +Pro: No runtime overhead +Con: Requires recompilation for each scheme + +### Option B: Runtime Config +```c +typedef enum { + OBS_CURRENT = 0, + OBS_BODY_FRAME, + OBS_ANGLES, + OBS_CONTROL_ERROR, + OBS_REALISTIC, + OBS_MAXIMALIST +} ObsScheme; + +// In Dogfight struct +ObsScheme obs_scheme; +int obs_size; // Set at init based on scheme +``` + +Pro: Single binary, config-driven sweeps +Con: Slight runtime overhead, more complex code + +### Recommendation: Option B (Runtime Config) + +Enables PufferLib sweep integration without recompilation. + +--- + +## PufferLib Sweep Integration + +### INI Config Addition +```ini +[dogfight] +obs_scheme = 1 # 0-5, maps to enum +``` + +### Sweep Config +```yaml +# sweeps/dogfight_obs.yaml +base_config: puffer_dogfight +sweep: + obs_scheme: [0, 1, 2, 3, 4, 5] + +# Or focused comparison: +sweep: + obs_scheme: [0, 1, 2] # Current vs Body Frame vs Angles +``` + +### Python Integration +```python +# dogfight.py +def __init__(self, ..., obs_scheme=0): + self.obs_scheme = obs_scheme + self.obs_size = OBS_SIZES[obs_scheme] # Lookup table + + self.observation_space = gymnasium.spaces.Box( + low=-1, high=1, + shape=(self.obs_size,), + dtype=np.float32 + ) +``` + +--- + +## Experimental Protocol + +### Phase 1: Focused Comparison (Recommended First) +Compare 3 most promising schemes: +- Scheme 0 (CURRENT) - baseline +- Scheme 1 (BODY_FRAME) - expected best +- Scheme 2 (ANGLES) - alternative representation + +Run: 3 schemes × 3 seeds × 100M steps = 9 runs + +### Phase 2: Extended Comparison +If Phase 1 inconclusive, add: +- Scheme 3 (CONTROL_ERROR) +- Scheme 5 (MAXIMALIST) + +### Phase 3: Ablation Studies +On best scheme, ablate individual observations: +- Remove aim_dot: does it hurt? +- Remove opponent heading: does it matter? +- etc. + +--- + +## Metrics + +Primary: +- Episode return (mean, std over 3 seeds) +- Kills per episode +- Accuracy (hits/shots) + +Secondary: +- Learning speed (steps to reach threshold performance) +- Final policy variance +- Behavioral analysis (does agent pursue? aim? fire appropriately?) + +--- + +## Expected Outcomes (Pre-Experiment Hypotheses) + +| Scheme | Hypothesis | +|--------|------------| +| 0 CURRENT | Baseline - agent struggles with aiming | +| 1 BODY_FRAME | **Best** - easiest for network to learn spatial relationships | +| 2 ANGLES | Good for aiming, may have discontinuity issues | +| 3 CONTROL_ERROR | Fast initial learning, may plateau | +| 4 REALISTIC | Insufficient information | +| 5 MAXIMALIST | Works but slower, needs bigger network | + +--- + +## Actual Results (2026-01-14) + +| Scheme | Obs | Return | Kills | Combat? | Notes | +|--------|-----|--------|-------|---------|-------| +| 0 WORLD_FRAME | 19 | +30.30 | **0.30** | **YES** | Best for combat | +| 1 BODY_FRAME | 21 | +22.27 | 0.12 | Weak | Worse than baseline | +| 2 ANGLES | 12 | +128.90 | 0.11 | No | Exploits pursuit reward | +| 3 CONTROL_ERROR | 17 | +166.40 | 0.00 | No | Exploits pursuit reward | +| 4 REALISTIC | 10 | +168.39 | 0.00 | No | Exploits pursuit reward | +| 5 MAXIMALIST | 43 | +83.39 | 0.13 | 1/3 | High variance | + +**Key Finding:** WORLD_FRAME (scheme 0) is the only scheme where all 3 runs consistently learned combat. The "engineered" observation schemes (2-4) achieved very high return by exploiting pursuit reward shaping without learning to fire. The hypothesis that BODY_FRAME would be best was **wrong** - the network actually learns combat better with world-frame observations. + +**Insight:** The pursuit reward shaping is too strong. Agents can maximize return by chasing without ever shooting. World-frame observations may make this exploitation harder because the spatial relationships aren't pre-computed for the agent. + +--- + +## Implementation Checklist + +- [x] Add `obs_scheme` to Dogfight struct +- [x] Add `obs_scheme` to INI config parsing +- [x] Implement OBS_SIZE lookup table +- [x] Implement compute_observations_scheme_X() for each scheme (all 6) +- [x] Update Python observation_space based on scheme +- [x] Create sweep config file +- [x] Run Phase 1 experiments (all 6 schemes, 3 runs each) +- [x] Analyze results +- [x] Document findings in BASELINE_SUMMARY.md + +--- + +## Code Sketch: Body Frame Implementation + +```c +void compute_observations_body_frame(Dogfight *env) { + Plane *p = &env->player; + Plane *o = &env->opponent; + + // Inverse quaternion for world->body transforms + Quat q_inv = {p->ori.w, -p->ori.x, -p->ori.y, -p->ori.z}; + + // Player state + Vec3 vel_body = quat_rotate(q_inv, p->vel); + Vec3 up = quat_rotate(p->ori, vec3(0, 0, 1)); + + // Relative state in body frame + Vec3 rel_pos = sub3(o->pos, p->pos); + Vec3 rel_vel = sub3(o->vel, p->vel); + Vec3 rel_pos_body = quat_rotate(q_inv, rel_pos); + Vec3 rel_vel_body = quat_rotate(q_inv, rel_vel); + + // Aiming info + float dist = norm3(rel_pos); + Vec3 fwd = vec3(1, 0, 0); // In body frame, forward is always +X + Vec3 to_target = normalize3(rel_pos_body); + float aim_dot = dot3(to_target, fwd); // 1.0 = perfect aim + + int i = 0; + // Player position (world frame - needed for bounds awareness) + env->observations[i++] = p->pos.x * INV_WORLD_HALF_X; + env->observations[i++] = p->pos.y * INV_WORLD_HALF_Y; + env->observations[i++] = p->pos.z * INV_WORLD_MAX_Z; + + // Player velocity (body frame) + env->observations[i++] = vel_body.x * INV_MAX_SPEED; // Forward speed + env->observations[i++] = vel_body.y * INV_MAX_SPEED; // Sideslip + env->observations[i++] = vel_body.z * INV_MAX_SPEED; // Climb rate + + // Player orientation + env->observations[i++] = p->ori.w; + env->observations[i++] = p->ori.x; + env->observations[i++] = p->ori.y; + env->observations[i++] = p->ori.z; + + // Player up vector (world frame) + env->observations[i++] = up.x; + env->observations[i++] = up.y; + env->observations[i++] = up.z; + + // Relative position (body frame) - THE KEY CHANGE + env->observations[i++] = rel_pos_body.x * INV_WORLD_HALF_X; + env->observations[i++] = rel_pos_body.y * INV_WORLD_HALF_Y; + env->observations[i++] = rel_pos_body.z * INV_WORLD_MAX_Z; + + // Relative velocity (body frame) + env->observations[i++] = rel_vel_body.x * INV_MAX_SPEED; + env->observations[i++] = rel_vel_body.y * INV_MAX_SPEED; + env->observations[i++] = rel_vel_body.z * INV_MAX_SPEED; + + // Aiming helpers + env->observations[i++] = aim_dot; // -1 to 1, 1 = perfect aim + env->observations[i++] = dist / GUN_RANGE; // ~0-4, 1 = at gun range + + // OBS_SIZE = 21 for this scheme +} +``` + +--- + +## Code Sketch: Angles Implementation + +```c +void compute_observations_angles(Dogfight *env) { + Plane *p = &env->player; + Plane *o = &env->opponent; + + Quat q_inv = {p->ori.w, -p->ori.x, -p->ori.y, -p->ori.z}; + + // Player Euler angles from quaternion + float pitch = asinf(2.0f * (p->ori.w * p->ori.y - p->ori.z * p->ori.x)); + float roll = atan2f(2.0f * (p->ori.w * p->ori.x + p->ori.y * p->ori.z), + 1.0f - 2.0f * (p->ori.x * p->ori.x + p->ori.y * p->ori.y)); + float yaw = atan2f(2.0f * (p->ori.w * p->ori.z + p->ori.x * p->ori.y), + 1.0f - 2.0f * (p->ori.y * p->ori.y + p->ori.z * p->ori.z)); + + // Target in body frame -> spherical + Vec3 rel_pos = sub3(o->pos, p->pos); + Vec3 rel_pos_body = quat_rotate(q_inv, rel_pos); + float dist = norm3(rel_pos); + + float azimuth = atan2f(rel_pos_body.y, rel_pos_body.x); // -pi to pi + float r_horiz = sqrtf(rel_pos_body.x * rel_pos_body.x + rel_pos_body.y * rel_pos_body.y); + float elevation = atan2f(rel_pos_body.z, r_horiz); // -pi/2 to pi/2 + + // Closing rate + Vec3 rel_vel = sub3(p->vel, o->vel); // Note: p - o for closing + float closing_rate = dot3(rel_vel, normalize3(rel_pos)); + + // Opponent heading relative to player + Vec3 opp_fwd = quat_rotate(o->ori, vec3(1, 0, 0)); + Vec3 opp_fwd_body = quat_rotate(q_inv, opp_fwd); + float opp_heading_rel = atan2f(opp_fwd_body.y, opp_fwd_body.x); + + int i = 0; + // Player state + env->observations[i++] = p->pos.x * INV_WORLD_HALF_X; + env->observations[i++] = p->pos.y * INV_WORLD_HALF_Y; + env->observations[i++] = p->pos.z * INV_WORLD_MAX_Z; + env->observations[i++] = norm3(p->vel) * INV_MAX_SPEED; // Speed scalar + env->observations[i++] = pitch / PI; // -0.5 to 0.5 + env->observations[i++] = roll / PI; // -1 to 1 + env->observations[i++] = yaw / PI; // -1 to 1 (or omit - world symmetric) + + // Target angles + env->observations[i++] = azimuth / PI; // -1 to 1 + env->observations[i++] = elevation / (PI * 0.5f); // -1 to 1 + env->observations[i++] = dist / (GUN_RANGE * 2.0f); // ~0-2 + env->observations[i++] = closing_rate * INV_MAX_SPEED; + + // Opponent info + env->observations[i++] = opp_heading_rel / PI; // -1 to 1 + + // OBS_SIZE = 12 for this scheme +} +``` + +--- + +## Notes + +- Angle normalization: divide by PI to get [-1, 1] range +- Discontinuity handling: atan2 handles ±180° gracefully, but crossing still causes jump +- Alternative: use sin/cos of angles instead of raw angles (no discontinuity) + - `sin(azimuth), cos(azimuth)` instead of `azimuth` + - Doubles the observation count but removes discontinuities + +--- + +## References + +- drone_race.h: Body-frame transform pattern (lines 79-85) +- TRAINING_IMPROVEMENTS.md: Original observation improvement ideas +- **REALISTIC_SCHEMES.md**: Proposed schemes 6-9 (MINIMAL, GUNSIGHT, ENERGY, PLUS) for Aces High III transfer - tests minimal observations needed for dogfighting +- aceshigh/DLL_SPEC.md: How observation choice affects DLL data requirements +- PufferLib sweep docs: (link to docs if available) diff --git a/pufferlib/ocean/dogfight/P51d_REFERENCE_DATA.md b/pufferlib/ocean/dogfight/P51d_REFERENCE_DATA.md new file mode 100644 index 000000000..d63910193 --- /dev/null +++ b/pufferlib/ocean/dogfight/P51d_REFERENCE_DATA.md @@ -0,0 +1,815 @@ +# P-51D Mustang Reference Data for RL Simulation Validation + +## Document Purpose + +This document provides authoritative reference data for the P-51D Mustang to enable validation of a simplified RL flight simulation. The goal is NOT perfect simulation fidelity, but rather reasonable agreement with historical performance data to ensure the RL environment conveys realistic WW2 fighter dynamics. + +**Key Philosophy**: Run automated test scripts that "hijack" the policy (e.g., "maintain level flight at 100% throttle") and compare simulated performance against these reference values. + +--- + +## 1. STANDARD TEST CONDITION + +For all validation tests, use this standardized configuration: + +| Parameter | Value | Notes | +|-----------|-------|-------| +| **Weight** | 9,000 lb (4,082 kg) | ~25% internal fuel (~45 gal remaining) | +| **Fuel Load** | 45 US gal | From 180 gal full internal | +| **Altitude** | Sea level (unless noted) | ISA conditions | +| **Configuration** | Clean | No external stores, gear up | +| **Power Setting** | As specified per test | | + +**Why 9,000 lb?** This represents a combat-ready fighter after burning fuel en route to engagement: +- Empty weight: 7,635 lb +- Pilot + equipment: ~200 lb +- Ammo (full): ~330 lb +- Oil: ~90 lb +- 45 gal fuel: ~270 lb +- Misc: ~475 lb +- **Total: ~9,000 lb** + +Historical reference: NAA report NA-46-130 uses 9,611 lb (full 180 gal internal fuel). + +--- + +## 2. PHYSICAL DIMENSIONS + +### 2.1 Overall Dimensions + +| Parameter | Imperial | Metric | +|-----------|----------|--------| +| Length | 32.25 ft | 9.83 m | +| Wingspan | 37.0 ft | 11.28 m | +| Height (tail down) | 13.67 ft | 4.17 m | +| Wing Area | 233 ft² | 21.65 m² | + +### 2.2 Wing Geometry + +| Parameter | Value | Notes | +|-----------|-------|-------| +| Aspect Ratio | 5.86 | AR = b²/S = 37²/233 | +| Mean Aerodynamic Chord (MAC) | 6.63 ft | 2.02 m | +| Root Chord | 8.48 ft | 2.58 m | +| Tip Chord | 3.87 ft | 1.18 m | +| Taper Ratio | 0.456 | λ = c_tip/c_root | +| **Wing Incidence** | **+1° to +2°** | Root chord to fuselage datum | +| Washout (twist) | 2-3° | Tip relative to root (reduces tip stall) | +| Dihedral | ~5° | | + +### 2.3 Control Surfaces + +| Surface | Span | Chord (% wing) | Area | +|---------|------|----------------|------| +| Aileron | 8.5 ft each | ~25% | ~9 ft² each | +| Flap | ~5.5 ft each | ~25% | ~12 ft² each | + +### 2.4 Tail Geometry + +| Parameter | Horizontal Stabilizer | Vertical Stabilizer | +|-----------|----------------------|---------------------| +| Area | 45.4 ft² | 14.8 ft² | +| Span | 13.1 ft | 4.7 ft | +| Root Chord | 4.6 ft | 4.7 ft | +| Tip Chord | 2.3 ft | 1.6 ft | + +--- + +## 3. AERODYNAMIC DATA + +### 3.1 Airfoil + +**Profile**: NAA/NACA 45-100 (laminar flow) +- Root: 15.1% thick, 1.6% camber at 50% chord +- Tip: 11.4% thick, 1.6% camber at 50% chord +- Max thickness at ~39% chord (laminar flow design) + +### 3.2 Lift Characteristics + +| Parameter | Value | Source/Notes | +|-----------|-------|--------------| +| **C_L_α (lift curve slope)** | 0.095-0.10 /deg | ~5.4-5.7 /rad; 3D wing | +| **α₀ (zero-lift angle)** | **-1.0° to -1.5°** | Due to 1.6% camber | +| **C_L_max (clean)** | 1.45 - 1.50 | Flaps up | +| **C_L_max (flaps 50°)** | 1.80 - 1.90 | Landing configuration | +| **α_stall (clean)** | 19.1° | From IL-2 data | +| **α_stall (flaps)** | 16.3° | Landing configuration | + +**Lift Equation**: +``` +C_L = C_L_α × (α - α₀) +C_L = 0.097 × (α + 1.2°) [deg input] +C_L = 5.56 × (α + 0.021) [rad input, α₀ in rad] +``` + +### 3.3 Drag Characteristics + +| Parameter | Value | Source/Notes | +|-----------|-------|--------------| +| **C_D0 (zero-lift drag)** | 0.0163 | Published Wikipedia/museum data | +| **Oswald Efficiency (e)** | 0.75 - 0.80 | Typical for tapered wing | +| **K (induced drag factor)** | 0.072 | K = 1/(π × AR × e) = 1/(π × 5.86 × 0.75) | + +**Drag Polar**: +``` +C_D = C_D0 + K × C_L² +C_D = 0.0163 + 0.072 × C_L² +``` + +**Drag Area**: 3.80 ft² (0.35 m²) + +### 3.4 Lift-to-Drag Ratio + +| Condition | L/D | Notes | +|-----------|-----|-------| +| Maximum L/D | 14.6 | At optimal C_L | +| Cruise (~350 mph) | ~12-13 | | +| Combat maneuvering | 6-10 | Higher C_L, more induced drag | + +**Optimal L/D occurs at**: +``` +C_L_opt = sqrt(C_D0 / K) = sqrt(0.0163 / 0.072) = 0.476 +L/D_max = 1 / (2 × sqrt(C_D0 × K)) = 1 / (2 × sqrt(0.0163 × 0.072)) = 14.6 +``` + +--- + +## 4. PROPULSION DATA + +### 4.1 Engine: Packard V-1650-7 (Merlin 66) + +| Parameter | Value | +|-----------|-------| +| Type | Liquid-cooled V-12, 2-stage 2-speed supercharger | +| Displacement | 1,647 in³ (27 L) | +| Bore × Stroke | 5.4" × 6.0" | +| Compression Ratio | 6.0:1 | +| Propeller Gear Ratio | 0.479:1 | + +### 4.2 Power Settings + +| Rating | MAP (in Hg) | RPM | BHP (SL) | Time Limit | +|--------|-------------|-----|----------|------------| +| **Military** | 61" | 3,000 | 1,490 hp | 15 min | +| **WEP (67")** | 67" | 3,000 | 1,650 hp | 5 min | +| **WEP (150 grade)** | 75" | 3,000 | 1,860 hp | 5 min | +| Cruise | 46" | 2,700 | ~1,100 hp | Unlimited | + +### 4.3 Power vs Altitude (Military Power, 61" Hg) + +| Altitude (ft) | BHP | Notes | +|---------------|-----|-------| +| Sea Level | 1,490 | Low blower | +| 2,600 | 1,580 | Peak low blower | +| 6,400 | 1,400 | High blower | +| 9,700 | ~1,400 | Near optimal high blower | +| 15,000 | ~1,350 | | +| 25,000 | ~1,100 | | +| 35,000 | ~750 | | + +**Simplified Altitude Correction** (for sim): +```python +def get_power(P_rated, altitude_ft, throttle=1.0): + """Simplified Merlin power model""" + sigma = get_density_ratio(altitude_ft) + + # Two-stage supercharger approximation + if altitude_ft < 4000: + # Low blower - slight power increase + P_available = P_rated * min(1.0, sigma * 1.05) + elif altitude_ft < 10000: + # Low blower critical altitude region + P_available = P_rated * 1.0 + elif altitude_ft < 25000: + # High blower + P_available = P_rated * (0.95 - 0.01 * (altitude_ft - 10000) / 1000) + else: + # Above critical altitude, power drops + P_available = P_rated * sigma * 1.2 + + return P_available * throttle +``` + +### 4.4 Propeller + +| Parameter | Value | +|-----------|-------| +| Type | Hamilton Standard 24D50 constant-speed | +| Diameter | 11 ft 2 in (3.40 m) | +| Blades | 4 | +| Propeller RPM at 3000 engine RPM | 1,437 | +| **Propeller Efficiency (cruise)** | 0.80 - 0.85 | +| **Propeller Efficiency (climb)** | 0.75 - 0.80 | +| **Propeller Efficiency (static)** | 0.50 - 0.60 | + +**Thrust Calculation**: +```python +def get_thrust(power_hp, velocity_fps, eta_prop=0.80): + """ + T = η_p × P / V + + power_hp: shaft horsepower + velocity_fps: true airspeed in ft/s + eta_prop: propeller efficiency (0.50-0.85 depending on V) + returns: thrust in lbf + """ + power_ftlb_s = power_hp * 550 # Convert HP to ft⋅lb/s + + if velocity_fps < 50: # Low speed / static + # Use momentum theory approximation + eta_prop = 0.55 + velocity_fps = max(velocity_fps, 30) # Avoid division by zero + + thrust_lbf = eta_prop * power_ftlb_s / velocity_fps + return thrust_lbf +``` + +--- + +## 5. PERFORMANCE VALIDATION TARGETS + +### 5.1 Level Flight - Maximum Speed + +**Test**: Set throttle to specified power, maintain level flight (γ = 0), record stabilized TAS. + +| Condition | Power | Altitude | Target Speed | Tolerance | +|-----------|-------|----------|--------------|-----------| +| WEP | 67" Hg | Sea Level | 368 mph (592 km/h) | ±10 mph | +| Military | 61" Hg | Sea Level | 355 mph (571 km/h) | ±10 mph | +| WEP | 67" Hg | 11,300 ft | 414 mph (666 km/h) | ±10 mph | +| Military | 61" Hg | 13,300 ft | 412 mph (663 km/h) | ±10 mph | +| WEP | 67" Hg | 24,500 ft | 440 mph (708 km/h) | ±10 mph | +| Military | 61" Hg | 26,200 ft | 435 mph (700 km/h) | ±10 mph | + +**Test Script Logic**: +```python +def test_max_speed(sim, altitude_ft, power_setting): + """ + 1. Initialize at altitude, moderate speed + 2. Set throttle to power_setting + 3. Command pitch to maintain level (gamma = 0) + 4. Run until speed stabilizes (d|V|/dt < 0.1 ft/s²) + 5. Record stabilized TAS + """ + sim.reset(altitude=altitude_ft, speed_fps=400) + sim.set_throttle(power_setting) + + for step in range(10000): # Max 200 seconds at 50 Hz + # Simple level-flight controller + gamma = sim.get_flight_path_angle() + pitch_cmd = -gamma * 2.0 # P controller + sim.set_pitch_rate_cmd(pitch_cmd) + + sim.step() + + if abs(sim.get_acceleration()) < 0.1: + return sim.get_TAS_mph() + + return sim.get_TAS_mph() # Return final value +``` + +### 5.2 Stall Speed + +**Test**: At constant altitude, gradually reduce power while maintaining level flight until stall. + +| Weight (lb) | Configuration | Target Stall Speed | Notes | +|-------------|---------------|-------------------|-------| +| 9,071 | Clean | ~100 mph (161 km/h) IAS | | +| 9,071 | Flaps down | 95.4 mph (154 km/h) IAS | NAA data | +| 9,000 | Clean | 99 mph (159 km/h) IAS | Test weight | +| 10,000 | Clean | 104 mph (168 km/h) IAS | Heavier | + +**Stall Speed Formula**: +```python +def calculate_stall_speed(weight_lb, rho_slugft3, wing_area_ft2, CL_max): + """ + V_stall = sqrt(2W / (ρ × S × CL_max)) + + At sea level (ρ = 0.002377 slug/ft³), S = 233 ft², CL_max = 1.48: + V_stall = sqrt(2 × 9000 / (0.002377 × 233 × 1.48)) + V_stall = sqrt(18000 / 0.820) = sqrt(21951) = 148 ft/s = 101 mph + """ + V_stall_fps = math.sqrt(2 * weight_lb / (rho_slugft3 * wing_area_ft2 * CL_max)) + return V_stall_fps * 0.6818 # Convert to mph +``` + +### 5.3 Rate of Climb + +**Test**: Set throttle to specified power, maintain V_y (best climb speed ~165 mph IAS), record climb rate. + +| Power | Altitude | Target ROC | Tolerance | +|-------|----------|------------|-----------| +| WEP (67") | Sea Level | 3,410 ft/min | ±200 ft/min | +| Military (61") | Sea Level | 3,030 ft/min | ±200 ft/min | +| WEP | 7,500 ft | 3,510 ft/min | ±200 ft/min | +| WEP | 21,200 ft | 2,680 ft/min | ±200 ft/min | +| Military | 9,700 ft | 3,170 ft/min | ±200 ft/min | +| Military | 23,200 ft | 2,300 ft/min | ±200 ft/min | + +**Rate of Climb Formula** (steady): +```python +def calculate_ROC(thrust_lb, drag_lb, velocity_fps, weight_lb): + """ + ROC = (T - D) × V / W = V × sin(γ) + + Or: ROC = (P_available × η_p - P_required) / W + """ + excess_thrust = thrust_lb - drag_lb + sin_gamma = excess_thrust / weight_lb + ROC_fps = velocity_fps * sin_gamma + return ROC_fps * 60 # Convert to ft/min +``` + +**Test Script**: +```python +def test_climb_rate(sim, altitude_ft, power_setting, climb_speed_mph=165): + """ + 1. Initialize at altitude, at Vy + 2. Set throttle, maintain constant IAS + 3. Record stabilized climb rate + """ + V_target_fps = climb_speed_mph * 1.467 + sim.reset(altitude=altitude_ft, speed_fps=V_target_fps) + sim.set_throttle(power_setting) + + for step in range(5000): + # Maintain constant airspeed in climb + V_error = V_target_fps - sim.get_TAS_fps() + pitch_cmd = -V_error * 0.1 # Speed-hold via pitch + sim.set_pitch_rate_cmd(pitch_cmd) + sim.step() + + return sim.get_climb_rate_fpm() +``` + +### 5.4 Level Turn Performance + +**Test**: Maintain altitude and constant speed in coordinated turn, measure turn rate and radius. + +| Speed (IAS) | Load Factor | Turn Rate | Turn Radius | Turn Time | +|-------------|-------------|-----------|-------------|-----------| +| 180 mph (290 km/h) | ~3.5g | 18°/s | ~800 ft | 20 sec | +| 250 mph | ~2.5g | 10°/s | ~1,500 ft | 36 sec | +| 350 mph | ~2.0g | 5°/s | ~3,500 ft | 72 sec | + +**Turn Physics**: +```python +def calculate_turn(velocity_fps, load_factor, g=32.174): + """ + In a level turn: + L = n × W (lift = load factor × weight) + L_vertical = W (to maintain altitude) + L_horizontal = W × sqrt(n² - 1) (centripetal force) + + Turn radius: R = V² / (g × sqrt(n² - 1)) + Turn rate: ω = g × sqrt(n² - 1) / V [rad/s] + """ + sqrt_term = math.sqrt(load_factor**2 - 1) + + radius_ft = velocity_fps**2 / (g * sqrt_term) + turn_rate_rad_s = g * sqrt_term / velocity_fps + turn_rate_deg_s = math.degrees(turn_rate_rad_s) + turn_time_sec = 360 / turn_rate_deg_s + + return { + 'radius_ft': radius_ft, + 'turn_rate_deg_s': turn_rate_deg_s, + 'turn_time_sec': turn_time_sec + } + +# Example: 290 km/h (180 mph = 264 ft/s) at 3.5g +# sqrt(3.5² - 1) = sqrt(11.25) = 3.35 +# R = 264² / (32.17 × 3.35) = 69696 / 107.8 = 647 ft +# ω = 32.17 × 3.35 / 264 = 0.408 rad/s = 23.4°/s +``` + +### 5.5 Level Flight at Zero AoA + +**Critical Test**: What speed maintains level flight at α = 0°? + +With wing incidence of ~+1.5°, the wing is at α_wing = +1.5° when fuselage is level. +At this AoA, considering α₀ ≈ -1.2°: +``` +C_L = C_L_α × (α_wing - α₀) +C_L = 0.097 × (1.5 - (-1.2)) = 0.097 × 2.7° = 0.262 +``` + +**Speed for Level Flight at 0° Fuselage Pitch (α_wing = +1.5°)**: +```python +# L = W (level flight) +# q × S × C_L = W +# 0.5 × ρ × V² × S × C_L = W +# V = sqrt(2W / (ρ × S × C_L)) + +W = 9000 # lb +rho = 0.002377 # slug/ft³ at sea level +S = 233 # ft² +C_L = 0.262 + +V = math.sqrt(2 * W / (rho * S * C_L)) +# V = sqrt(18000 / 0.145) = sqrt(124138) = 352 ft/s = 240 mph +``` + +**At true 0° wing AoA** (fuselage pitched down 1.5°): +``` +C_L = 0.097 × (0 - (-1.2)) = 0.097 × 1.2° = 0.116 + +V = sqrt(2 × 9000 / (0.002377 × 233 × 0.116)) +V = sqrt(18000 / 0.064) = sqrt(281250) = 530 ft/s = 361 mph +``` + +**Summary Table - Level Flight AoA vs Speed**: + +| Fuselage Pitch | Wing α | C_L | Speed (mph) | Speed (ft/s) | +|----------------|--------|-----|-------------|--------------| +| -1.5° | 0° | 0.116 | 361 | 530 | +| 0° | 1.5° | 0.262 | 240 | 352 | +| +2° | 3.5° | 0.456 | 182 | 267 | +| +5° | 6.5° | 0.747 | 142 | 208 | +| +10° | 11.5° | 1.233 | 111 | 163 | +| +15° | 16.5° (near stall) | 1.48 | 101 | 148 | + +--- + +## 6. FLIGHT ENVELOPE LIMITS + +### 6.1 Speed Limits + +| Limit | Speed | Notes | +|-------|-------|-------| +| V_NE (never exceed) | 505 mph IAS | 812 km/h | +| V_max dive | 525-550 mph | Pilots reported exceeding redline | +| M_crit | 0.75-0.80 | Onset of compressibility | +| Max Mach achieved | ~0.85 | With structural risk | + +### 6.2 G-Limits + +| Condition | Limit | Notes | +|-----------|-------|-------| +| Design limit (positive) | +8g | At 8,000 lb | +| Design limit (negative) | -4g | | +| With external stores | +6.5g | Reduced for safety | +| Ultimate load | +12g | Structural failure | + +### 6.3 Altitude Limits + +| Limit | Value | Notes | +|-------|-------|-------| +| Service ceiling | 41,900 ft | 100 ft/min ROC | +| Absolute ceiling | ~44,000 ft | | +| Combat ceiling | 36,900 ft | At 3000 RPM | + +--- + +## 7. IMPLEMENTATION CONSTANTS + +Copy-paste ready Python constants: + +```python +# ============================================ +# P-51D MUSTANG SIMULATION CONSTANTS +# Reference Weight: 9000 lb (combat weight) +# ============================================ + +# Physical Dimensions +WINGSPAN_FT = 37.0 +WINGSPAN_M = 11.28 +WING_AREA_FT2 = 233.0 +WING_AREA_M2 = 21.65 +MAC_FT = 6.63 +ASPECT_RATIO = 5.86 + +# Mass Properties (test condition) +WEIGHT_LB = 9000.0 +WEIGHT_KG = 4082.0 +MASS_SLUG = WEIGHT_LB / 32.174 # 279.8 slug +MASS_KG = WEIGHT_KG + +# Wing Geometry +WING_INCIDENCE_DEG = 1.5 # Root chord to fuselage +WING_INCIDENCE_RAD = 0.0262 + +# Aerodynamic Coefficients +CL_ALPHA_PER_DEG = 0.097 # 3D wing lift curve slope +CL_ALPHA_PER_RAD = 5.56 +ALPHA_ZERO_LIFT_DEG = -1.2 # Zero-lift angle (cambered airfoil) +ALPHA_ZERO_LIFT_RAD = -0.021 +CL_MAX_CLEAN = 1.48 +CL_MAX_FLAPS = 1.85 +CD0 = 0.0163 # Zero-lift drag coefficient +OSWALD_E = 0.75 # Oswald efficiency factor +K_INDUCED = 1.0 / (3.14159 * ASPECT_RATIO * OSWALD_E) # 0.072 + +# Stall +ALPHA_STALL_CLEAN_DEG = 19.1 +ALPHA_STALL_FLAPS_DEG = 16.3 + +# Propulsion +ENGINE_POWER_WEP_HP = 1650 # 67" Hg, 3000 RPM +ENGINE_POWER_MIL_HP = 1490 # 61" Hg, 3000 RPM +ENGINE_POWER_CRUISE_HP = 1100 # 46" Hg, 2700 RPM +PROP_DIAMETER_FT = 11.167 # 11 ft 2 in +PROP_EFFICIENCY_CRUISE = 0.82 +PROP_EFFICIENCY_CLIMB = 0.78 +PROP_EFFICIENCY_STATIC = 0.55 + +# Limits +VNE_MPH = 505 +VNE_FPS = 740 +G_LIMIT_POS = 8.0 +G_LIMIT_NEG = -4.0 +SERVICE_CEILING_FT = 41900 + +# Atmosphere (sea level ISA) +RHO_SL_SLUGFT3 = 0.002377 +RHO_SL_KGM3 = 1.225 +TEMP_SL_K = 288.15 +PRESSURE_SL_PA = 101325 +LAPSE_RATE_K_PER_M = 0.0065 + +# Unit Conversions +FPS_TO_MPH = 0.6818 +MPH_TO_FPS = 1.467 +FPS_TO_KTS = 0.5925 +KTS_TO_FPS = 1.688 +FT_TO_M = 0.3048 +M_TO_FT = 3.281 +HP_TO_WATTS = 745.7 +LBF_TO_N = 4.448 +``` + +--- + +## 8. VALIDATION TEST SUITE + +### 8.1 Test Categories + +| Test | Priority | Tolerance | Notes | +|------|----------|-----------|-------| +| Stall speed | HIGH | ±5 mph | Fundamental lift validation | +| Max speed (SL) | HIGH | ±10 mph | Drag + power validation | +| Max speed (altitude) | MEDIUM | ±15 mph | Supercharger model | +| ROC (SL) | HIGH | ±200 ft/min | Excess power validation | +| ROC (altitude) | MEDIUM | ±300 ft/min | | +| Level turn time | MEDIUM | ±2 sec | Turn physics | +| Level flight speed @ α=0 | HIGH | ±15 mph | Incidence angle validation | + +### 8.2 Example Test Script + +```python +import numpy as np + +class P51DValidationSuite: + """Validation tests for P-51D flight model""" + + def __init__(self, sim_env): + self.sim = sim_env + self.results = {} + + def test_stall_speed(self): + """Test: Gradually reduce speed until stall""" + # Initialize at safe speed, level flight + self.sim.reset(altitude_ft=5000, speed_fps=250, gamma_deg=0) + self.sim.set_weight(9000) + + # Gradually reduce throttle while maintaining level + for throttle in np.linspace(1.0, 0.0, 100): + self.sim.set_throttle(throttle) + + # Run for 5 seconds to stabilize + for _ in range(250): # 50 Hz × 5 sec + # Level flight controller + gamma = self.sim.get_gamma() + self.sim.command_pitch_rate(-gamma * 2.0) + self.sim.step() + + # Check for stall + if self.sim.get_alpha() > 18 or self.sim.is_stalled(): + stall_speed_mph = self.sim.get_speed_fps() * 0.6818 + break + + expected = 100 # mph + actual = stall_speed_mph + passed = abs(actual - expected) < 5 + + self.results['stall_speed'] = { + 'expected': expected, + 'actual': actual, + 'passed': passed, + 'tolerance': 5 + } + return passed + + def test_max_speed_sea_level(self): + """Test: Maximum speed at sea level with WEP""" + self.sim.reset(altitude_ft=0, speed_fps=400, gamma_deg=0) + self.sim.set_weight(9000) + self.sim.set_throttle(1.0) # WEP + + # Accelerate until stable + prev_speed = 0 + for step in range(10000): + # Maintain level flight + gamma = self.sim.get_gamma() + self.sim.command_pitch_rate(-gamma * 2.0) + self.sim.step() + + # Check for stabilization + current_speed = self.sim.get_speed_fps() + if abs(current_speed - prev_speed) < 0.01: + break + prev_speed = current_speed + + max_speed_mph = self.sim.get_speed_fps() * 0.6818 + + expected = 368 # mph at WEP, SL + actual = max_speed_mph + passed = abs(actual - expected) < 10 + + self.results['max_speed_sl'] = { + 'expected': expected, + 'actual': actual, + 'passed': passed, + 'tolerance': 10 + } + return passed + + def test_climb_rate_sea_level(self): + """Test: Rate of climb at sea level with WEP""" + self.sim.reset(altitude_ft=1000, speed_fps=242, gamma_deg=10) # ~165 mph + self.sim.set_weight(9000) + self.sim.set_throttle(1.0) + + target_speed_fps = 242 # 165 mph Vy + + # Climb at constant airspeed + for step in range(5000): + V_error = target_speed_fps - self.sim.get_speed_fps() + self.sim.command_pitch_rate(-V_error * 0.05) + self.sim.step() + + roc_fpm = self.sim.get_vertical_speed_fps() * 60 + + expected = 3410 # ft/min at WEP, SL + actual = roc_fpm + passed = abs(actual - expected) < 200 + + self.results['climb_rate_sl'] = { + 'expected': expected, + 'actual': actual, + 'passed': passed, + 'tolerance': 200 + } + return passed + + def test_turn_time(self): + """Test: 360° turn time at 290 km/h (180 mph)""" + V_fps = 264 # 180 mph + self.sim.reset(altitude_ft=5000, speed_fps=V_fps, gamma_deg=0) + self.sim.set_weight(9000) + self.sim.set_throttle(1.0) + + # Bank to ~70° for ~3.5g turn + bank_target_rad = np.radians(70) + initial_heading = self.sim.get_heading() + + time_elapsed = 0 + while True: + # Maintain bank angle + bank_error = bank_target_rad - self.sim.get_bank() + self.sim.command_roll_rate(bank_error * 2.0) + + # Maintain altitude + # ... (pull back to compensate) + + self.sim.step() + time_elapsed += self.sim.dt + + # Check for 360° turn completion + heading_change = self.sim.get_heading() - initial_heading + if heading_change >= 2 * np.pi: + break + + if time_elapsed > 60: # Timeout + break + + expected = 20 # seconds + actual = time_elapsed + passed = abs(actual - expected) < 2 + + self.results['turn_time'] = { + 'expected': expected, + 'actual': actual, + 'passed': passed, + 'tolerance': 2 + } + return passed + + def run_all_tests(self): + """Run complete validation suite""" + tests = [ + ('Stall Speed', self.test_stall_speed), + ('Max Speed (SL)', self.test_max_speed_sea_level), + ('Climb Rate (SL)', self.test_climb_rate_sea_level), + ('Turn Time', self.test_turn_time), + ] + + print("=" * 60) + print("P-51D MUSTANG FLIGHT MODEL VALIDATION") + print("=" * 60) + + all_passed = True + for name, test_func in tests: + try: + passed = test_func() + result = self.results.get(name.lower().replace(' ', '_').replace('(', '').replace(')', ''), {}) + status = "✓ PASS" if passed else "✗ FAIL" + print(f"{name:20s}: {status}") + print(f" Expected: {result.get('expected', 'N/A')}") + print(f" Actual: {result.get('actual', 'N/A'):.1f}") + print(f" Tolerance: ±{result.get('tolerance', 'N/A')}") + all_passed = all_passed and passed + except Exception as e: + print(f"{name:20s}: ✗ ERROR - {e}") + all_passed = False + + print("=" * 60) + print(f"OVERALL: {'ALL TESTS PASSED' if all_passed else 'SOME TESTS FAILED'}") + print("=" * 60) + + return all_passed +``` + +--- + +## 9. QUICK REFERENCE CARD + +### For Simulator Implementation + +**Level Flight Speed by AoA (9000 lb, sea level)**: +| Wing AoA | Speed | +|----------|-------| +| 0° | 361 mph | +| 2° | 263 mph | +| 5° | 166 mph | +| 10° | 117 mph | +| 15° | 105 mph (near stall) | + +**Key Performance Numbers (Military Power, 9000 lb)**: +- Max Speed (SL): 355 mph +- Max Speed (25,000 ft): 435 mph +- Stall Speed (clean): 100 mph +- ROC (SL): 3,000 ft/min +- Best Climb Speed: 165 mph IAS + +**Equations to Implement**: +``` +C_L = 5.56 × (α - (-0.021)) [rad] +C_D = 0.0163 + 0.072 × C_L² +L = 0.5 × ρ × V² × 233 × C_L [lb] +D = 0.5 × ρ × V² × 233 × C_D [lb] +T = η_p × (P × 550) / V [lb] +``` + +--- + +## 10. SOURCES AND REFERENCES + +1. **NAA Report NA-46-130**: Performance Calculations for P-51D Airplane + - Source: wwiiaircraftperformance.org + - Primary source for validated performance data + +2. **IL-2 Great Battles**: P-51D-15 Specifications + - Source: aergistal.github.io/il2/planes/p51d15.html + - Extensively validated against historical records + +3. **Virginia Tech Aerospace Archive**: P-51D Mustang Student Report + - Source: archive.aoe.vt.edu/mason/Mason_f/P51DMustang.pdf + - Aerodynamic data and wing geometry + +4. **Mid America Flight Museum**: P-51 Mustang Specifications + - Source: midamericaflightmuseum.com + - CD0, drag area, L/D data + +5. **WW2Aircraft.net Forums**: Technical discussions + - CL_max, wing incidence, airfoil data + - Model builder and pilot experiences + +6. **Packard V-1650 Merlin Wikipedia**: Engine specifications + - Power curves, supercharger data + +7. **NACA Airfoil Theory**: Thin airfoil theory for α₀ + - Zero-lift angle calculations + +--- + +## Document Version + +- **Version**: 1.0 +- **Date**: January 2026 +- **Purpose**: RL Environment Validation Reference +- **Author**: Claude (Anthropic) + Research + +--- + +*Note: This document is intended for simulation purposes. Actual aircraft performance varies with exact configuration, atmospheric conditions, and pilot technique. Tolerances are provided to account for these variations and simplifications inherent in the simulation model.* diff --git a/pufferlib/ocean/dogfight/PLAN.md b/pufferlib/ocean/dogfight/PLAN.md new file mode 100644 index 000000000..0907a6f53 --- /dev/null +++ b/pufferlib/ocean/dogfight/PLAN.md @@ -0,0 +1,161 @@ +# Dogfight Implementation Plan + +**Note: This plan is a living document and may be adjusted as development progresses.** + +First checkbox: initial implementation complete +Second checkbox: audited and verified + +--- + +## Phase 0: Scaffolding +- [x] [ ] 0.1 Create pufferlib/ocean/dogfight/ folder +- [x] [ ] 0.2 Create dogfight.h with basic Dogfight struct (observations, actions, rewards, terminals, Log) +- [x] [ ] 0.2b Define Log struct with ONLY float fields (env_binding.h iterates as floats): + episode_return, episode_length, score, kills, deaths, shots_fired, shots_hit, n +- [x] [ ] 0.3 Create binding.c: implement my_init() (unpack kwargs, call init()) and my_log() (map Log fields to dict) +- [x] [ ] 0.4 Create dogfight.py following drone_race.py pattern: + - Box(5) continuous actions [-1, 1] + - self.actions = self.actions.astype(np.float32) # REQUIRED for continuous +- [x] [ ] 0.5 Create pufferlib/config/ocean/dogfight.ini: + [base] package=ocean, env_name=puffer_dogfight + [vec] num_envs=8 + [env] num_envs=128, max_steps=3000 + [train] hyperparameters +- [x] [ ] 0.6 Verify setup.py compiles binding (already configured at line 192) +- [x] [ ] 0.7 Verify env can be instantiated: `from pufferlib.ocean.dogfight.dogfight import Dogfight` + +## Phase 1: Minimal Viable Environment +- [x] [ ] 1.1 Implement init(), c_close() → test_init() +- [x] [ ] 1.2 Define Plane struct (pos, vel, ori quat) → test_reset_plane() +- [x] [ ] 1.3 Implement c_reset(): spawn plane random pos → test_c_reset() +- [x] [ ] 1.4 Implement c_step(): plane moves forward → test_c_step_moves_forward() +- [x] [ ] 1.5 Implement compute_observations(): pos/vel/ori/up normalized → test_compute_observations() +- [x] [ ] 1.6 Wire up actions array (float* for 5 floats) - read but ignore for now +- [x] [ ] 1.7 Episode terminates: OOB → test_oob_terminates(), max_steps → test_max_steps_terminates() +- [x] [ ] 1.8 Python integration: env.reset() and env.step() work + +## Phase 2: Target Plane (Scripted Opponent) +- [x] [ ] 2.1 Add second Plane struct for opponent → test_opponent_spawns() +- [x] [ ] 2.2 Opponent spawns ahead of player, flies straight → test_opponent_spawns() +- [x] [ ] 2.3 Add relative position to opponent in observations → test_relative_observations() +- [x] [ ] 2.4 Add relative velocity to opponent in observations → test_relative_observations() +- [x] [ ] 2.5 Define observation space size in Python: OBS_SIZE=19 +- [x] [ ] 2.6 Basic reward: negative distance to opponent → test_pursuit_reward() +- [x] [ ] 2.7 Python integration: obs shape (19,), negative reward working + +## Phase 3: Flight Physics (Controls + Aerodynamics merged - correct order) +- [x] [ ] 3.1 Add aircraft parameters → test_aircraft_params() +- [x] [ ] 3.2 Quaternion orientation → done in Phase 1 +- [x] [ ] 3.3 Map throttle action [0] to engine power → test_throttle_accelerates() +- [x] [ ] 3.4 Map elevator action [1] to pitch rate → test_controls_affect_orientation() +- [x] [ ] 3.5 Map ailerons action [2] to roll rate → test_controls_affect_orientation() +- [x] [ ] 3.6 Map rudder action [3] to yaw rate → step_plane_with_physics() +- [x] [ ] 3.7 Add rate limits → MAX_PITCH_RATE, MAX_ROLL_RATE, MAX_YAW_RATE +- [x] [ ] 3.8 Integrate orientation: q_dot = 0.5 * q * omega_quat → test_controls_affect_orientation() +- [x] [ ] 3.9 Compute angle of attack → step_plane_with_physics() +- [x] [ ] 3.10 Compute C_L clamped to C_L_max → test_stall_clamps_lift() +- [x] [ ] 3.11 Implement dynamic pressure → test_dynamic_pressure() +- [x] [ ] 3.12 Compute lift magnitude → test_lift_opposes_gravity() +- [x] [ ] 3.13 Compute drag magnitude → test_drag_slows_plane() +- [x] [ ] 3.14 Velocity-dependent propeller thrust → test_throttle_accelerates() +- [x] [ ] 3.15 Compute weight → test_plane_falls_without_lift() +- [x] [ ] 3.16 Transform forces to world frame → step_plane_with_physics() +- [x] [ ] 3.17 Sum forces → test_forces_sum_correctly() +- [x] [ ] 3.18 Integrate: a = F/m, v += a*dt, pos += v*dt → test_integration_updates_state() +- [x] [ ] 3.19 Enforce C_L ≤ C_L_max (stall) → test_stall_clamps_lift() +- [x] [ ] 3.20 Enforce n ≤ 8 (g-limit) → test_glimit_clamps_acceleration() +- [x] [ ] 3.21 Test: all Phase 3 tests pass (23 total) + +## Phase 3.5: Reward Shaping +Current pursuit reward (-dist/10000 per step) is too weak for effective learning. + +- [x] [ ] 3.5.1 Add closing velocity reward: +bonus when distance decreasing → test_closing_velocity_reward() +- [x] [ ] 3.5.2 Add tail position reward: +bonus when behind opponent (angle from opponent's forward) → test_tail_position_reward() +- [x] [ ] 3.5.3 Add altitude maintenance: small penalty for z < 200m or z > 2500m → test_altitude_penalty() +- [x] [ ] 3.5.4 Add speed maintenance: small penalty for V < 50 m/s (stall risk) → test_speed_penalty() +- [x] [ ] 3.5.5 Scale rewards appropriately (total episode reward ~10-100 for good policy) +- [x] [ ] 3.5.6 Test: training shows faster convergence with new rewards (2/3 runs positive) + +## Phase 4: Rendering +**Moved before Combat** - Can't debug combat without seeing planes. + +**Implementation Guide**: See `RENDERING.md` for code patterns, templates, and Raylib reference. + +Camera and visibility: +- [x] [ ] 4.1 Fix camera: chase cam behind player, ~80m back → test_chase_camera_behind_player() +- [x] [ ] 4.2 Camera follows player position and orientation → test_chase_camera_behind_player() +- [x] [ ] 4.3 Add mouse controls for camera orbit (like drone_race) → test_camera_orbit_updates() + +Drawing planes: +- [x] [ ] 4.4 Draw player plane: green sphere + forward line → dogfight.h:469-478 +- [x] [ ] 4.5 Draw opponent plane: red sphere + forward line → dogfight.h:480-490 +- [ ] [ ] 4.6 Draw velocity vectors for debugging (optional, toggle with key) + +Environment: +- [x] [ ] 4.7 Draw ground plane at z=0 → dogfight.h:462-463 +- [ ] [ ] 4.8 Draw sky gradient or horizon reference +- [x] [ ] 4.9 Draw world bounds (wireframe box) → dogfight.h:465-467 + +HUD: +- [x] [ ] 4.10 Display: speed (m/s), altitude (m), throttle (%) → dogfight.h:498-500 +- [x] [ ] 4.11 Display: distance to opponent, episode tick → dogfight.h:501-502 +- [x] [ ] 4.12 Display: episode return → dogfight.h:503 + +## Phase 5: Combat Mechanics +**Struct additions:** +- Add to Plane: `int fire_cooldown`, `bool alive` (or `float health`) + +**Constants:** +- `GUN_RANGE` = 500.0f (meters) +- `GUN_CONE_ANGLE` = 0.087f (5 degrees in radians) +- `FIRE_COOLDOWN` = 10 (ticks = 0.2 seconds) + +**Implementation:** +- [x] [ ] 5.1 Add fire_cooldown field to Plane struct → dogfight.h:96 +- [x] [ ] 5.2 Add combat constants → dogfight.h:35-38, test_combat_constants() +- [x] [ ] 5.3 Map trigger action [4] to fire → test_trigger_fires(), test_fire_cooldown() +- [x] [ ] 5.4 Implement cone check hit detection → test_cone_hit_detection() +- [x] [ ] 5.5 Track shots_fired in Log when trigger pulled → test_trigger_fires() +- [x] [ ] 5.6 Track shots_hit in Log when hit detected → test_hit_reward() +- [x] [ ] 5.7 Reward for hit: +1.0 → test_hit_reward() +- [x] [ ] 5.8 On kill: respawn opponent, +10.0 reward → test_kill_respawns_opponent() +- [x] [ ] 5.9 Episode does NOT terminate on kill → test_kill_respawns_opponent() +- [x] [ ] 5.10 All combat tests pass (6 tests) → 36 total tests PASS + +## Phase 6: Opponent AI +**Physics fix:** Both planes must use same physics model. + +- [ ] [ ] 6.1 Add `float opponent_actions[5]` array (computed by AI each step) +- [ ] [ ] 6.2 Call `step_plane_with_physics(&env->opponent, opponent_actions, DT)` instead of `step_plane()` +- [ ] [ ] 6.3 Remove old `step_plane()` function (no longer needed) + +**AI behaviors (compute_opponent_ai function):** +- [ ] [ ] 6.4 Pure pursuit: turn toward player → test_opponent_pursues() +- [ ] [ ] 6.5 Lead pursuit: aim ahead of player based on closure rate +- [ ] [ ] 6.6 Fire when player in gun cone → test_opponent_fires() +- [ ] [ ] 6.7 Throttle management: speed up when far, maintain when close +- [ ] [ ] 6.8 Basic evasion: break turn when player behind + +**Difficulty scaling:** +- [ ] [ ] 6.9 Add `float ai_skill` parameter (0.0 = random, 1.0 = perfect) +- [ ] [ ] 6.10 Scale AI accuracy/reaction time with skill level +- [ ] [ ] 6.11 Test: opponent provides meaningful challenge at skill=0.5 + +## Phase 7: Tuning & Polish +- [ ] [ ] 7.1 Tune aircraft parameters to match WW2 fighter specs: + - Max level speed: ~180-200 m/s (400-450 mph) + - Climb rate: ~15-20 m/s (3000-4000 ft/min) + - Sustained turn: 4-5g at combat speed + - Corner velocity: ~130-150 m/s (260-300 knots) +- [ ] [ ] 7.2 Verify add_log() populates all fields (score, kills, deaths, shots) +- [ ] [ ] 7.3 Performance profiling +- [ ] [ ] 7.4 Optimize hot paths for 1M+ steps/sec +- [ ] [ ] 7.5 Verify no memory leaks or allocations per step + +## Phase 8: Validation & Audit +- [ ] [ ] 8.1 Full test suite passes +- [ ] [ ] 8.2 Performance benchmark: confirm 1M+ steps/sec +- [ ] [ ] 8.3 Flight model validation: verify corner velocity, sustained turn rate, stall behavior +- [ ] [ ] 8.4 Training run: agent learns to pursue and shoot +- [ ] [ ] 8.5 Code review: all phases second checkbox +- [ ] [ ] 8.6 Documentation complete diff --git a/pufferlib/ocean/dogfight/RENDERING.md b/pufferlib/ocean/dogfight/RENDERING.md new file mode 100644 index 000000000..5a951bf23 --- /dev/null +++ b/pufferlib/ocean/dogfight/RENDERING.md @@ -0,0 +1,208 @@ +# Dogfight Rendering Guide + +Reference patterns extracted from PufferLib ocean environments for Phase 4 implementation. + +## Current State +- `dogfight.h` lines 375-406: basic `c_render()` skeleton with placeholder camera +- Need: chase camera, plane drawing, ground, bounds, HUD + +## Client Struct + +Update the existing Client struct (~line 104) to support camera controls: + +```c +typedef struct Client { + Camera3D camera; + float width; + float height; + // Camera orbit state (for mouse control) + float cam_distance; + float cam_azimuth; + float cam_elevation; + bool is_dragging; + float last_mouse_x, last_mouse_y; +} Client; +``` + +## Chase Camera + +Calculate camera position behind and above player using quaternion orientation: + +```c +// Get player forward vector from quaternion +Vec3 fwd = quat_rotate(player->ori, vec3(1, 0, 0)); + +// Camera position: behind and above player +float dist = 80.0f, height = 30.0f; +Vector3 cam_pos = { + player->pos.x - fwd.x * dist, + player->pos.y - fwd.y * dist, + player->pos.z + height +}; + +// Look at player +Vector3 cam_target = {player->pos.x, player->pos.y, player->pos.z}; +``` + +## Raylib Quick Reference + +| Task | Code | +|------|------| +| Init window | `InitWindow(1280, 720, "Dogfight"); SetTargetFPS(60);` | +| Camera setup | `camera.up = (Vector3){0, 0, 1}; camera.fovy = 45; camera.projection = CAMERA_PERSPECTIVE;` | +| Draw sphere | `DrawSphere((Vector3){x, y, z}, radius, color);` | +| Draw line | `DrawLine3D(start, end, color);` | +| Draw ground | `DrawPlane((Vector3){0, 0, 0}, (Vector2){4000, 4000}, DARKGREEN);` | +| Draw bounds | `DrawCubeWires((Vector3){0, 0, 1500}, 4000, 4000, 3000, WHITE);` | +| HUD text | `DrawText(TextFormat("Speed: %.0f", speed), 10, 10, 20, WHITE);` | + +## Mouse Orbit Controls + +Pattern from `drone_race.h` - allows user to orbit camera with mouse drag: + +```c +void handle_camera_controls(Client *c) { + Vector2 mouse = GetMousePosition(); + + if (IsMouseButtonPressed(MOUSE_LEFT)) { + c->is_dragging = true; + c->last_mouse_x = mouse.x; + c->last_mouse_y = mouse.y; + } + if (IsMouseButtonReleased(MOUSE_LEFT)) { + c->is_dragging = false; + } + + if (c->is_dragging) { + float sensitivity = 0.005f; + c->cam_azimuth -= (mouse.x - c->last_mouse_x) * sensitivity; + c->cam_elevation += (mouse.y - c->last_mouse_y) * sensitivity; + c->cam_elevation = clampf(c->cam_elevation, -1.4f, 1.4f); // prevent gimbal lock + c->last_mouse_x = mouse.x; + c->last_mouse_y = mouse.y; + } + + // Mouse wheel zoom + float wheel = GetMouseWheelMove(); + if (wheel != 0) { + c->cam_distance = clampf(c->cam_distance - wheel * 5.0f, 30.0f, 200.0f); + } +} +``` + +## Complete c_render() Template + +```c +void c_render(Dogfight *env) { + // 1. Lazy init + if (env->client == NULL) { + env->client = (Client *)calloc(1, sizeof(Client)); + env->client->width = 1280; + env->client->height = 720; + env->client->cam_distance = 80.0f; + env->client->cam_azimuth = 0.0f; + env->client->cam_elevation = 0.3f; + + InitWindow(1280, 720, "Dogfight"); + SetTargetFPS(60); + + env->client->camera.up = (Vector3){0.0f, 0.0f, 1.0f}; + env->client->camera.fovy = 45.0f; + env->client->camera.projection = CAMERA_PERSPECTIVE; + } + + // 2. Handle window close + if (WindowShouldClose() || IsKeyDown(KEY_ESCAPE)) { + c_close(env); + exit(0); + } + + // 3. Update chase camera + Plane *p = &env->player; + Vec3 fwd = quat_rotate(p->ori, vec3(1, 0, 0)); + float dist = env->client->cam_distance; + + env->client->camera.position = (Vector3){ + p->pos.x - fwd.x * dist, + p->pos.y - fwd.y * dist, + p->pos.z + dist * 0.4f + }; + env->client->camera.target = (Vector3){p->pos.x, p->pos.y, p->pos.z}; + + // 4. Optional: handle mouse orbit + // handle_camera_controls(env->client); + + // 5. Draw + BeginDrawing(); + ClearBackground((Color){6, 24, 24, 255}); + + BeginMode3D(env->client->camera); + + // Ground plane at z=0 + DrawPlane((Vector3){0, 0, 0}, (Vector2){4000, 4000}, (Color){20, 60, 20, 255}); + + // World bounds wireframe + DrawCubeWires((Vector3){0, 0, 1500}, 4000, 4000, 3000, (Color){100, 100, 100, 255}); + + // Player plane (green) + Vector3 player_pos = {p->pos.x, p->pos.y, p->pos.z}; + DrawSphere(player_pos, 5.0f, GREEN); + // Forward direction indicator + Vector3 player_fwd = {p->pos.x + fwd.x * 30, p->pos.y + fwd.y * 30, p->pos.z + fwd.z * 30}; + DrawLine3D(player_pos, player_fwd, GREEN); + + // Opponent plane (red) + Plane *o = &env->opponent; + Vector3 opp_pos = {o->pos.x, o->pos.y, o->pos.z}; + DrawSphere(opp_pos, 5.0f, RED); + Vec3 opp_fwd = quat_rotate(o->ori, vec3(1, 0, 0)); + Vector3 opp_fwd_end = {o->pos.x + opp_fwd.x * 30, o->pos.y + opp_fwd.y * 30, o->pos.z + opp_fwd.z * 30}; + DrawLine3D(opp_pos, opp_fwd_end, RED); + + EndMode3D(); + + // HUD + float speed = norm3(p->vel); + float dist_to_opp = norm3(sub3(o->pos, p->pos)); + + DrawText(TextFormat("Speed: %.0f m/s", speed), 10, 10, 20, WHITE); + DrawText(TextFormat("Alt: %.0f m", p->pos.z), 10, 40, 20, WHITE); + DrawText(TextFormat("Throttle: %.0f%%", p->throttle * 100.0f), 10, 70, 20, WHITE); + DrawText(TextFormat("Distance: %.0f m", dist_to_opp), 10, 100, 20, WHITE); + DrawText(TextFormat("Tick: %d", env->tick), 10, 130, 20, WHITE); + DrawText(TextFormat("Return: %.2f", env->episode_return), 10, 160, 20, WHITE); + + // Camera controls hint + DrawText("ESC: Exit", 10, env->client->height - 30, 16, GRAY); + + EndDrawing(); +} +``` + +## Coordinate System + +- Dogfight uses: **X=forward, Y=right, Z=up** +- Set `camera.up = {0, 0, 1}` to match +- World bounds: ±2000 X/Y, 0-3000 Z (from `dogfight.h` defines) + +## Reference Environments + +| File | Key Patterns | +|------|--------------| +| `drone_race/drone_race.h` | Spherical orbit camera, mouse controls, trail effects | +| `drive/drive.h` | FPV chase camera using heading angle | +| `battle/battle.h` | Quaternion orientation, `CAMERA_THIRD_PERSON`, 3D models | +| `impulse_wars/render.h` | Smooth camera lerp, bloom effects, sophisticated UI | + +## Build & Test + +```bash +# Build +python setup.py build_ext --inplace --force + +# Run with rendering +python -m pufferlib.pufferl train puffer_dogfight --render + +# Run tests +gcc -I raylib-5.5_linux_amd64/include -o pufferlib/ocean/dogfight/dogfight_test pufferlib/ocean/dogfight/dogfight_test.c raylib-5.5_linux_amd64/lib/libraylib.a -lm -lpthread -ldl && ./pufferlib/ocean/dogfight/dogfight_test +``` diff --git a/pufferlib/ocean/dogfight/SPEC.md b/pufferlib/ocean/dogfight/SPEC.md new file mode 100644 index 000000000..93bdebec8 --- /dev/null +++ b/pufferlib/ocean/dogfight/SPEC.md @@ -0,0 +1,51 @@ +Objective: Implement a high-performance simulation of world war 2 dogfighting as an RL environment into PufferLib. + +Requirements: +1. 1M+ steps per second simulation on a single CPU core. This is easily attainable by following the practice of other environments in PufferLib - no memory allocations after initialization, pass all structs by reference +2. Single-file C implementation header-only. A small, separate .c file will be used for testing. +3. Match PufferLib C API for RL environments +4. TDD Test Driven Development + +Environment details: +- Must model: managing throttle, aileron, rudder, elevator, and trigger to win dogfights in a real physics simulator with real approximation of Drag Polar, aerodynamic stall, etc +- Action space: Box(5) continuous [-1, 1]: throttle, elevator, ailerons, rudder, trigger (fire if > 0.5) +- Optional future: flaps +- Not modeling: full CFD, air turbulence, structural damage + +- Reasonably accurate physics, thrust, lift, drag, kinetic and potential energy, conservation of momentum, conservation of energy, lift changing with angle of attack, etc +- Try to approximately match the performance of real world war 2 aircraft +- Agents to learn to manage energy and air combat maneuvers to win dogfights + +Physics (3DOF point-mass, metric units): +- ρ = 1.225 kg/m³ (fixed sea level) +- q = 0.5 * ρ * V² (dynamic pressure, Pa) +- L = C_L * q * S (lift, N) +- D = (C_D0 + K * C_L²) * q * S (drag, N) +- T = min(P·η/V, 0.3·P) where P = ENGINE_POWER × throttle, η = 0.80 (propeller thrust, N) +- W = m * g (weight, N) + +Constraints: +- C_L ≤ 1.4 (stall) +- n = L/W ≤ 8 (structural g-limit) + +Approximations (valid for WW2, Mach < 0.6): +- Incompressible flow, flat earth, ignore prop torque/weather + +Instructions: +- Read pufferlib/ocean/[target, snake] for simple examples of API compatibility and code standards +- Read pufferlib/ocean/nmmo3/nmmo3.h for a much more complex environment with the same game tick system as the desired Olm environment +- The implementation will live in pufferlib/ocean/dogfight with dogfight.h being the source and dogfight.c being a tiny main file. +- Build with: python setup.py build_ext --inplace --force +- Use pufferlib/ocean/drone_race/ as a template +- Opponent will be generated programatically, very simple at first like just flying straight, adding maneuvers later + +References: +Links in CLAUDE.md +PufferAI docs: https://puffer.ai/docs.html +Reference environments: pufferlib/ocean. Source code is in .h files. Ignore .pyx. The .py files only contain bindings. + +Code style and optimization: +- Use the environments "squared," "target," and "template" as API references. You must implement c_step, c_reset, and c_render +- The only dependency is raylib, which is for rendering only +- Match the code style of "snake" and "nmmo3" closely: procedural C with minimal abstraction, functions mainly split out to avoid duplicating code. +- No memory allocations after initialization. Pass all structs by reference. diff --git a/pufferlib/ocean/dogfight/TRAINING_IMPROVEMENTS.md b/pufferlib/ocean/dogfight/TRAINING_IMPROVEMENTS.md new file mode 100644 index 000000000..3c410a70b --- /dev/null +++ b/pufferlib/ocean/dogfight/TRAINING_IMPROVEMENTS.md @@ -0,0 +1,274 @@ +# Dogfight Training Improvements + +Analysis and recommendations for improving agent training before implementing opponent AI. + +**Date**: 2026-01-13 +**Current Performance**: Phase 5 baseline - +23.44 mean return, 0.19 kills/episode, ~1.6% accuracy + +--- + +## Problem Analysis + +### Current Target Behavior + +From `step_plane()` (dogfight.h:317-324): +```c +void step_plane(Plane *p, float dt) { + Vec3 forward = quat_rotate(p->ori, vec3(1, 0, 0)); + float speed = norm3(p->vel); + if (speed < 1.0f) speed = 80.0f; + p->vel = mul3(forward, speed); + p->pos = add3(p->pos, mul3(p->vel, dt)); +} +``` + +From `respawn_opponent()` (dogfight.h:340-352): +```c +Vec3 vel = vec3(80, 0, 0); // Always flies +X direction +reset_plane(&env->opponent, opp_pos, vel); +``` + +**Result**: Target flies straight at 80 m/s, always in +X direction, forever. Never turns, never changes altitude. Orientation quaternion stays at identity. + +### Why Training Is Hard + +| Issue | Impact | +|-------|--------| +| Target always flies +X | If player spawns heading different direction, target flies away | +| No orientation variation | Target always faces +X regardless of spawn position | +| Constant speed = easy overshoot | Player accelerates to catch up, overshoots, target keeps going | +| 5° gun cone at 200m = ~17m radius | Very precise aiming required | +| No aiming reward | Agent gets no feedback until actual hit | + +### Current Results Breakdown + +- **0.19 kills/episode** - less than 1 kill per 5 episodes +- **~12 shots/episode** - agent learned to fire +- **1.6% accuracy** - agent did NOT learn to aim + +The agent learned pursuit and firing, but not aiming. + +--- + +## Improvement Recommendations + +### Priority 1: Fix Target Spawn Direction + +**Problem**: Target always flies +X regardless of where player is facing. + +**Solution A - Match player direction**: +```c +void respawn_opponent(Dogfight *env) { + Plane *p = &env->player; + Vec3 fwd = quat_rotate(p->ori, vec3(1, 0, 0)); + + Vec3 opp_pos = vec3( + p->pos.x + fwd.x * rndf(300, 600) + rndf(-100, 100), + p->pos.y + fwd.y * rndf(300, 600) + rndf(-100, 100), + clampf(p->pos.z + rndf(-100, 100), 200, 2500) + ); + + // KEY CHANGE: Opponent flies same direction as player + Vec3 vel = mul3(fwd, 80.0f); + reset_plane(&env->opponent, opp_pos, vel); + env->opponent.ori = p->ori; // Match orientation too +} +``` + +### Priority 2: Add Aiming Reward + +**Problem**: No feedback on aim quality until actual hit. + +**Solution**: Reward for having target near gun cone: +```c +// In c_step(), after pursuit rewards: +Vec3 to_opp = sub3(o->pos, p->pos); +float dist = norm3(to_opp); +Vec3 to_opp_norm = normalize3(to_opp); +Vec3 player_fwd = quat_rotate(p->ori, vec3(1, 0, 0)); +float aim_dot = dot3(to_opp_norm, player_fwd); // 1.0 = perfect aim + +// Reward for tracking (within 2x gun cone and in range) +if (aim_dot > cosf(GUN_CONE_ANGLE * 2) && dist < GUN_RANGE) { + reward += 0.05f; // Small continuous reward for good tracking +} + +// Bonus for very close aim (within gun cone but didn't fire) +if (aim_dot > cosf(GUN_CONE_ANGLE) && dist < GUN_RANGE) { + reward += 0.1f; // Stronger reward for firing solution +} +``` + +### Priority 3: Wider Gun Cone (Training Wheels) + +**Current**: 5° (0.087 rad) - realistic but hard +**Proposed**: Start with 10-15° for initial training + +```c +// Option: Make gun cone a parameter instead of constant +// In Dogfight struct: +float gun_cone_angle; // Set via config + +// Or just widen temporarily: +#define GUN_CONE_ANGLE 0.175f // ~10 degrees +``` + +### Priority 4: Target Behavior Modes + +Add different target behaviors for curriculum: + +```c +typedef enum { + TARGET_STRAIGHT = 0, // Current: flies straight + TARGET_CIRCLE = 1, // Constant gentle turn + TARGET_WEAVE = 2, // Sinusoidal lateral movement + TARGET_RANDOM = 3 // Occasional random turns +} TargetMode; + +void step_plane(Plane *p, float dt, TargetMode mode) { + switch (mode) { + case TARGET_STRAIGHT: + // Current behavior + break; + + case TARGET_CIRCLE: + // Constant turn rate + float turn_rate = 0.3f; // rad/s, ~17 deg/s + Quat turn = quat_from_axis_angle(vec3(0, 0, 1), turn_rate * dt); + p->ori = quat_mul(turn, p->ori); + quat_normalize(&p->ori); + break; + + case TARGET_WEAVE: + // Sinusoidal yaw + static float phase = 0; + phase += dt; + float yaw_rate = 0.5f * sinf(phase * 0.5f); + Quat weave = quat_from_axis_angle(vec3(0, 0, 1), yaw_rate * dt); + p->ori = quat_mul(weave, p->ori); + quat_normalize(&p->ori); + break; + } + + Vec3 forward = quat_rotate(p->ori, vec3(1, 0, 0)); + p->vel = mul3(forward, 80.0f); + p->pos = add3(p->pos, mul3(p->vel, dt)); +} +``` + +### Priority 5: Better Observations for Aiming + +Current observations (19 total): +- Player state: pos(3), vel(3), ori(4), up(3) = 13 +- Relative: pos(3), vel(3) = 6 + +**Missing**: Direct aim information + +**Add**: +```c +// After existing observations: + +// Aim dot product (1.0 = perfect aim, -1.0 = facing away) +Vec3 to_opp_norm = normalize3(sub3(o->pos, p->pos)); +Vec3 player_fwd = quat_rotate(p->ori, vec3(1, 0, 0)); +float aim_dot = dot3(to_opp_norm, player_fwd); +env->observations[i++] = aim_dot; + +// Distance normalized by gun range (0-1 = in range, >1 = out of range) +float dist = norm3(sub3(o->pos, p->pos)); +env->observations[i++] = dist / GUN_RANGE; + +// Update OBS_SIZE from 19 to 21 +``` + +--- + +## Curriculum Learning Plan + +| Level | Target Behavior | Speed | Gun Cone | Spawn Distance | +|-------|----------------|-------|----------|----------------| +| 1 | Stationary | 0 m/s | 15° | 200-300m | +| 2 | Slow straight | 40 m/s | 12° | 200-400m | +| 3 | Medium straight | 80 m/s | 10° | 300-500m | +| 4 | Gentle circles | 80 m/s | 7° | 300-600m | +| 5 | Variable | 80 m/s | 5° | 300-600m | + +--- + +## Implementation Order + +1. **Quick wins** (do first): + - [x] Fix opponent spawn to match player direction → **FAILED, REVERTED** (made things worse) + - [x] Add aiming reward → **SUCCESS** (+58% return, +89% kills, +125% accuracy) + - [x] Run benchmark to compare + +2. **If still struggling**: + - [ ] Widen gun cone temporarily + - [x] Add aim_dot and distance observations → **TESTED** in obs_scheme sweep (schemes 1-5 include aim helpers, but WORLD_FRAME scheme 0 still best - see OBSERVATION_EXPERIMENTS.md) + - [x] Run benchmark + +3. **For polish**: + - [x] Add target behavior modes → **DONE** autopilot system with 7 modes + - [x] Implement curriculum → **DONE** mode_weights for curriculum learning + +--- + +## Debug Tools + +### DEBUG flag in dogfight.h +Set `#define DEBUG 1` at the top of dogfight.h to enable verbose per-step logging: +- Actions (throttle, elevator, ailerons, rudder, trigger) +- Physics (speed, AoA, lift, drag, thrust, g-force) +- Target state (speed, position, direction) +- Reward breakdown (each component) +- Combat (aim angle, distance, in_cone, in_range) + +### Python sanity tests +Run `python pufferlib/ocean/dogfight/test_flight.py` to verify physics: +- Full throttle straight flight → ~150 m/s max +- Stall speed → ~50 m/s +- Climb rate → ~16 m/s +- Glide L/D → ~14.7 +- Turn tests → 30° and 60° bank with PID control + +--- + +## Test Ideas + +```c +void test_opponent_spawns_same_direction() { + Dogfight env = make_env(1000); + c_reset(&env); + + // Player and opponent should be flying roughly same direction + Vec3 player_fwd = quat_rotate(env.player.ori, vec3(1, 0, 0)); + Vec3 opp_fwd = quat_rotate(env.opponent.ori, vec3(1, 0, 0)); + float alignment = dot3(player_fwd, opp_fwd); + + assert(alignment > 0.9f); // Should be nearly parallel +} + +void test_aiming_reward() { + Dogfight env = make_env(1000); + c_reset(&env); + + // Place player aimed directly at opponent + env.player.pos = vec3(0, 0, 500); + env.player.ori = quat(1, 0, 0, 0); + env.opponent.pos = vec3(200, 0, 500); // Directly ahead + + c_step(&env); + float reward_aimed = env.rewards[0]; + + // Place player aimed away + c_reset(&env); + env.player.pos = vec3(0, 0, 500); + env.player.ori = quat_from_axis_angle(vec3(0, 0, 1), PI / 2); // 90° off + env.opponent.pos = vec3(200, 0, 500); + + c_step(&env); + float reward_not_aimed = env.rewards[0]; + + assert(reward_aimed > reward_not_aimed); +} +``` diff --git a/pufferlib/ocean/dogfight/autopilot.h b/pufferlib/ocean/dogfight/autopilot.h new file mode 100644 index 000000000..281b50fca --- /dev/null +++ b/pufferlib/ocean/dogfight/autopilot.h @@ -0,0 +1,346 @@ +/** + * autopilot.h - Target aircraft flight maneuvers + * + * Provides autopilot modes for opponent aircraft during training. + * Can be set randomly at reset or forced via API for curriculum learning. + */ + +#ifndef AUTOPILOT_H +#define AUTOPILOT_H + +#include "flightlib.h" +#include + +// Autopilot mode enumeration +typedef enum { + AP_STRAIGHT = 0, // Fly straight (current/default behavior) + AP_LEVEL, // Level flight with PD on vz + AP_TURN_LEFT, // Coordinated left turn + AP_TURN_RIGHT, // Coordinated right turn + AP_CLIMB, // Constant climb rate + AP_DESCEND, // Constant descent rate + AP_HARD_TURN_LEFT, // Aggressive 70° left turn + AP_HARD_TURN_RIGHT, // Aggressive 70° right turn + AP_WEAVE, // Sine wave jinking (S-turns) + AP_EVASIVE, // Break turn when threat behind + AP_RANDOM, // Random mode selection at reset + AP_COUNT +} AutopilotMode; + +// PID gains (from test_flight.py) +#define AP_LEVEL_KP 0.001f +#define AP_LEVEL_KD 0.001f +#define AP_TURN_ELEV_KP -0.05f +#define AP_TURN_ELEV_KD 0.005f +#define AP_TURN_ROLL_KP -2.0f +#define AP_TURN_ROLL_KD -0.1f + +// Default parameters +#define AP_DEFAULT_THROTTLE 1.0f +#define AP_DEFAULT_BANK_DEG 30.0f // Base gentle turns +#define AP_DEFAULT_CLIMB_RATE 5.0f + +// Stage-specific bank angles (curriculum progression) +#define AP_STAGE4_BANK_DEG 30.0f // MANEUVERING - gentle 30° turns +#define AP_STAGE5_BANK_DEG 45.0f // FULL_RANDOM - medium 45° turns +#define AP_STAGE6_BANK_DEG 60.0f // HARD_MANEUVERING - steep 60° turns +#define AP_HARD_BANK_DEG 70.0f // EVASIVE - aggressive 70° turns +#define AP_WEAVE_AMPLITUDE 0.6f // ~35° bank amplitude (radians) +#define AP_WEAVE_PERIOD 3.0f // 3 second full cycle + +// Autopilot state for a plane +typedef struct { + AutopilotMode mode; + int randomize_on_reset; // If true, pick random mode each reset + float throttle; // Target throttle [0,1] + float target_bank; // Target bank angle (radians) + float target_vz; // Target vertical velocity (m/s) + + // Curriculum: mode selection weights (sum to 1.0) + float mode_weights[AP_COUNT]; + + // Own RNG state (not affected by srand() calls) + unsigned int rng_state; + + // PID gains + float pitch_kp, pitch_kd; + float roll_kp, roll_kd; + + // PID state (for derivative terms) + float prev_vz; + float prev_bank_error; + + // AP_WEAVE state + float phase; // Sine wave phase for weave oscillation + + // AP_EVASIVE state (set by caller each step) + Vec3 threat_pos; // Position of threat to evade +} AutopilotState; + +// Simple LCG random for autopilot (not affected by srand) +static inline float ap_rand(AutopilotState* ap) { + ap->rng_state = ap->rng_state * 1103515245 + 12345; + return (float)((ap->rng_state >> 16) & 0x7FFF) / 32767.0f; +} + +// Initialize autopilot with defaults +static inline void autopilot_init(AutopilotState* ap) { + ap->mode = AP_STRAIGHT; + ap->randomize_on_reset = 0; + ap->throttle = AP_DEFAULT_THROTTLE; + ap->target_bank = AP_DEFAULT_BANK_DEG * (PI / 180.0f); + ap->target_vz = AP_DEFAULT_CLIMB_RATE; + + // Default: uniform weights for modes 1-5 (skip STRAIGHT and RANDOM) + for (int i = 0; i < AP_COUNT; i++) { + ap->mode_weights[i] = 0.0f; + } + float uniform = 1.0f / 5.0f; // 5 modes: LEVEL, TURN_L, TURN_R, CLIMB, DESCEND + ap->mode_weights[AP_LEVEL] = uniform; + ap->mode_weights[AP_TURN_LEFT] = uniform; + ap->mode_weights[AP_TURN_RIGHT] = uniform; + ap->mode_weights[AP_CLIMB] = uniform; + ap->mode_weights[AP_DESCEND] = uniform; + + // Seed autopilot RNG from system rand (called once at init, not affected by later srand) + ap->rng_state = (unsigned int)rand(); + + ap->pitch_kp = AP_LEVEL_KP; + ap->pitch_kd = AP_LEVEL_KD; + ap->roll_kp = AP_TURN_ROLL_KP; + ap->roll_kd = AP_TURN_ROLL_KD; + + ap->prev_vz = 0.0f; + ap->prev_bank_error = 0.0f; + + // New mode state + ap->phase = 0.0f; + ap->threat_pos = vec3(0, 0, 0); +} + +// Set autopilot mode with parameters +static inline void autopilot_set_mode(AutopilotState* ap, AutopilotMode mode, + float throttle, float bank_deg, float climb_rate) { + ap->mode = mode; + ap->randomize_on_reset = (mode == AP_RANDOM) ? 1 : 0; + ap->throttle = throttle; + ap->target_bank = bank_deg * (PI / 180.0f); + ap->target_vz = climb_rate; + + // Reset PID state on mode change + ap->prev_vz = 0.0f; + ap->prev_bank_error = 0.0f; + + // Set appropriate gains based on mode + if (mode == AP_LEVEL || mode == AP_CLIMB || mode == AP_DESCEND) { + ap->pitch_kp = AP_LEVEL_KP; + ap->pitch_kd = AP_LEVEL_KD; + } else if (mode == AP_TURN_LEFT || mode == AP_TURN_RIGHT) { + ap->pitch_kp = AP_TURN_ELEV_KP; + ap->pitch_kd = AP_TURN_ELEV_KD; + ap->roll_kp = AP_TURN_ROLL_KP; + ap->roll_kd = AP_TURN_ROLL_KD; + } +} + +// Randomize autopilot mode using weighted selection (for AP_RANDOM at reset) +static inline void autopilot_randomize(AutopilotState* ap) { + float r = ap_rand(ap); // Use own RNG, not affected by srand() + float cumsum = 0.0f; + AutopilotMode selected = AP_LEVEL; // Default fallback + + for (int i = 1; i < AP_COUNT - 1; i++) { // Skip STRAIGHT(0) and RANDOM(6) + cumsum += ap->mode_weights[i]; + if (r <= cumsum) { + selected = (AutopilotMode)i; + break; + } + } + + // Save randomize flag (autopilot_set_mode would clear it) + int save_randomize = ap->randomize_on_reset; + autopilot_set_mode(ap, selected, AP_DEFAULT_THROTTLE, + AP_DEFAULT_BANK_DEG, AP_DEFAULT_CLIMB_RATE); + ap->randomize_on_reset = save_randomize; +} + +// Get bank angle from plane orientation +// Returns positive for right bank, negative for left bank +static inline float ap_get_bank_angle(Plane* p) { + Vec3 up = quat_rotate(p->ori, vec3(0, 0, 1)); + float bank = acosf(fminf(fmaxf(up.z, -1.0f), 1.0f)); + if (up.y < 0) bank = -bank; + return bank; +} + +// Get vertical velocity from plane +static inline float ap_get_vz(Plane* p) { + return p->vel.z; +} + +// Clamp value to range +static inline float ap_clamp(float v, float lo, float hi) { + return fminf(fmaxf(v, lo), hi); +} + +// Main autopilot step function +// Computes actions[5] = [throttle, elevator, ailerons, rudder, trigger] +static inline void autopilot_step(AutopilotState* ap, Plane* p, float* actions, float dt) { + // Initialize all actions to zero + actions[0] = 0.0f; // throttle (will be set below) + actions[1] = 0.0f; // elevator + actions[2] = 0.0f; // ailerons + actions[3] = 0.0f; // rudder + actions[4] = -1.0f; // trigger (never fire) + + // Set throttle (convert from [0,1] to [-1,1] action space) + actions[0] = ap->throttle * 2.0f - 1.0f; + + float vz = ap_get_vz(p); + float bank = ap_get_bank_angle(p); + + switch (ap->mode) { + case AP_STRAIGHT: + // Do nothing - just fly straight with throttle + break; + + case AP_LEVEL: { + // PD control on vz to maintain level flight + float vz_error = -vz; // Target vz = 0 + float vz_deriv = (vz - ap->prev_vz) / dt; + float elevator = ap->pitch_kp * vz_error + ap->pitch_kd * vz_deriv; + actions[1] = ap_clamp(elevator, -1.0f, 1.0f); + ap->prev_vz = vz; + break; + } + + case AP_TURN_LEFT: + case AP_TURN_RIGHT: { + // Dual PID: roll to target bank, pitch to maintain altitude + float target_bank = ap->target_bank; + if (ap->mode == AP_TURN_LEFT) target_bank = -target_bank; + + // Elevator PID (maintain vz = 0) + float vz_error = -vz; + float vz_deriv = (vz - ap->prev_vz) / dt; + float elevator = ap->pitch_kp * vz_error + ap->pitch_kd * vz_deriv; + actions[1] = ap_clamp(elevator, -1.0f, 1.0f); + ap->prev_vz = vz; + + // Aileron PID (achieve target bank) + float bank_error = target_bank - bank; + float bank_deriv = (bank_error - ap->prev_bank_error) / dt; + float aileron = ap->roll_kp * bank_error + ap->roll_kd * bank_deriv; + actions[2] = ap_clamp(aileron, -1.0f, 1.0f); + ap->prev_bank_error = bank_error; + break; + } + + case AP_CLIMB: { + // PD control to maintain target climb rate + float vz_error = ap->target_vz - vz; + float vz_deriv = (vz - ap->prev_vz) / dt; + // Negative because nose-up pitch (negative elevator) increases climb + float elevator = -ap->pitch_kp * vz_error + ap->pitch_kd * vz_deriv; + actions[1] = ap_clamp(elevator, -1.0f, 1.0f); + ap->prev_vz = vz; + break; + } + + case AP_DESCEND: { + // PD control to maintain target descent rate + float vz_error = -ap->target_vz - vz; // Target is negative vz + float vz_deriv = (vz - ap->prev_vz) / dt; + float elevator = -ap->pitch_kp * vz_error + ap->pitch_kd * vz_deriv; + actions[1] = ap_clamp(elevator, -1.0f, 1.0f); + ap->prev_vz = vz; + break; + } + + case AP_HARD_TURN_LEFT: + case AP_HARD_TURN_RIGHT: { + // Aggressive turn with high bank angle (70°) + float target_bank = AP_HARD_BANK_DEG * (PI / 180.0f); + if (ap->mode == AP_HARD_TURN_LEFT) target_bank = -target_bank; + + // Hard pull to maintain altitude in steep bank + float vz_error = -vz; + float elevator = -0.5f + ap->pitch_kp * vz_error; // Base pull + PD + actions[1] = ap_clamp(elevator, -1.0f, 1.0f); + ap->prev_vz = vz; + + // Aggressive aileron to achieve bank (50% more aggressive) + float bank_error = target_bank - bank; + float aileron = ap->roll_kp * bank_error * 1.5f; + actions[2] = ap_clamp(aileron, -1.0f, 1.0f); + break; + } + + case AP_WEAVE: { + // Sine wave banking - oscillates left/right, hard to lead + ap->phase += dt * (2.0f * PI / AP_WEAVE_PERIOD); + if (ap->phase > 2.0f * PI) ap->phase -= 2.0f * PI; + + float target_bank = AP_WEAVE_AMPLITUDE * sinf(ap->phase); + + // Elevator PID for level flight (maintain vz = 0) + float vz_error = -vz; + float vz_deriv = (vz - ap->prev_vz) / dt; + float elevator = ap->pitch_kp * vz_error + ap->pitch_kd * vz_deriv; + actions[1] = ap_clamp(elevator, -1.0f, 1.0f); + ap->prev_vz = vz; + + // Aileron PID to track oscillating bank + float bank_error = target_bank - bank; + float bank_deriv = (bank_error - ap->prev_bank_error) / dt; + float aileron = ap->roll_kp * bank_error + ap->roll_kd * bank_deriv; + actions[2] = ap_clamp(aileron, -1.0f, 1.0f); + ap->prev_bank_error = bank_error; + break; + } + + case AP_EVASIVE: { + // Break turn away from threat when close and behind + Vec3 to_threat = sub3(ap->threat_pos, p->pos); + float dist = norm3(to_threat); + Vec3 fwd = quat_rotate(p->ori, vec3(1, 0, 0)); + float dot_fwd = dot3(normalize3(to_threat), fwd); + + float target_bank = 0.0f; + float base_elevator = 0.0f; + + // Check if threat is close (<600m) and not in front (behind or side) + if (dist < 600.0f && dot_fwd < 0.3f) { + // Threat close and behind - BREAK TURN! + // Determine which side threat is on + Vec3 right = quat_rotate(p->ori, vec3(0, -1, 0)); + float dot_right = dot3(normalize3(to_threat), right); + + // Turn away from threat (opposite side) + target_bank = (dot_right > 0) ? -1.2f : 1.2f; // ~70° break + base_elevator = -0.6f; // Pull hard + } + + // Elevator: base pull + PD for altitude + float vz_error = -vz; + float elevator = base_elevator + ap->pitch_kp * vz_error; + actions[1] = ap_clamp(elevator, -1.0f, 1.0f); + ap->prev_vz = vz; + + // Aileron to achieve break bank (aggressive) + float bank_error = target_bank - bank; + float aileron = ap->roll_kp * bank_error * 1.5f; + actions[2] = ap_clamp(aileron, -1.0f, 1.0f); + break; + } + + case AP_RANDOM: + // Should have been randomized at reset, fall through to straight + break; + + default: + break; + } +} + +#endif // AUTOPILOT_H diff --git a/pufferlib/ocean/dogfight/baselines/BASELINE_SUMMARY.md b/pufferlib/ocean/dogfight/baselines/BASELINE_SUMMARY.md new file mode 100644 index 000000000..e32c51af2 --- /dev/null +++ b/pufferlib/ocean/dogfight/baselines/BASELINE_SUMMARY.md @@ -0,0 +1,449 @@ +# Baseline Training Results + +Date: 2026-01-12 +Training: 100M steps each + +--- + +## Pre-Reward Shaping (baseline) +Reward: `-dist/10000` per step (pursuit only) + +| Run | Episode Return | Episode Length | +|-----|----------------|----------------| +| 1 | -31.78 | 1111 | +| 2 | -46.42 | 1247 | +| 3 | -73.12 | 1371 | +| **Mean** | **-50.44** | **1243** | + +Observations: +- High variance (-31 to -73) +- All returns negative +- Weak reward signal (~-0.03 per step) + +--- + +## Post-Reward Shaping (Phase 3.5) +Reward: base pursuit + closing velocity + tail position + altitude/speed penalties + +| Run | Episode Return | Episode Length | +|-----|----------------|----------------| +| 1 | -66.82 | 1140 | +| 2 | +5.32 | 1063 | +| 3 | +16.13 | 1050 | +| **Mean** | **-15.12** | **1084** | + +Observations: +- **2 of 3 runs achieved positive returns** (significant improvement) +- Shorter episodes (more decisive behavior) +- Still high variance (need more tuning) +- Closing velocity and tail position rewards working + +--- + +## Phase 5: Combat Mechanics +Reward: pursuit shaping + hit (+1.0) + kill (+10.0) + +| Run | Episode Return | Episode Length | Kills | Shots Hit/Fired | +|-----|----------------|----------------|-------|-----------------| +| 1 | +29.54 | 1047 | 0.24 | 0.24/12.0 | +| 2 | +12.46 | 1081 | 0.16 | 0.16/11.8 | +| 3 | +28.31 | 1061 | 0.18 | 0.18/11.7 | +| **Mean** | **+23.44** | **1063** | **0.19** | **0.19/11.8** | + +Observations: +- **All 3 runs positive** (major improvement from Phase 3.5 mean of -15.12) +- Agent learned to shoot (~12 shots/episode, ~1.6% accuracy) +- ~0.19 kills per episode on average +- Combat rewards providing clear learning signal + +--- + +## Spawn Direction Fix (FAILED) +Date: 2026-01-13 +Change: Make respawned opponent fly same direction as player instead of always +X + +| Run | Episode Return | Episode Length | Kills | Shots Hit/Fired | +|-----|----------------|----------------|-------|-----------------| +| 1 | -6.91 | 1107 | 0.133 | 0.133/11.0 | +| 2 | -97.46 | 1118 | 0.06 | 0.06/10.8 | +| 3 | -34.51 | 1075 | 0.061 | 0.06/12.4 | +| **Mean** | **-46.29** | **1100** | **0.085** | **0.08/11.4** | + +Observations: +- **Significantly worse than baseline** (-46.29 vs +23.44) +- Predictable +X direction was actually easier to learn +- **REVERTED** - keeping opponent always flies +X + +--- + +## Aiming Reward (SUCCESS) +Date: 2026-01-13 +Change: Add continuous reward for gun cone alignment (tracking bonus + firing solution bonus) + +| Run | Episode Return | Episode Length | Kills | Shots Hit/Fired | +|-----|----------------|----------------|-------|-----------------| +| 1 | +63.08 | 1067 | 0.51 | 0.51/10.2 | +| 2 | +12.64 | 1127 | 0.21 | 0.21/10.2 | +| 3 | +35.41 | 1113 | 0.37 | 0.37/9.9 | +| **Mean** | **+37.04** | **1102** | **0.36** | **0.36/10.1** | + +Observations: +- **+58% improvement in return** (+23.44 → +37.04) +- **+89% improvement in kills** (0.19 → 0.36) +- **+125% improvement in accuracy** (1.6% → 3.6%) +- Aiming reward provides gradient for learning to aim, not just fire + +--- + +## Physics Refactor (3582d2d4) - Pre-Quaternion Fix +Date: 2026-01-13 +Commit: 3582d2d4 "Physics in Own File - Test Flights" +Change: Moved physics to flightlib.h, added test_flight.py validation tests + +| Run | Episode Return | Episode Length | Kills | Shots Hit/Fired | +|-----|----------------|----------------|-------|-----------------| +| 1 | +45.32 | 1139 | 0.42 | 0.42/10.2 | +| 2 | +15.30 | 1136 | 0.19 | 0.19/10.2 | +| 3 | +51.87 | 1133 | 0.46 | 0.46/10.0 | +| **Mean** | **+37.50** | **1136** | **0.36** | **0.36/10.1** | + +Observations: +- Performance consistent with Aiming Reward baseline (+37.04 → +37.50) +- Physics refactor did not affect training +- test_flight.py shows climb_rate test failing (-29.6 vs +15.4 expected) +- Quaternion sign issue identified in test setup (not affecting training) + +--- + +## Coordinated Turn Tests (1c30c546) +Date: 2026-01-14 +Commit: 1c30c546 "Coordinated Turn Tests" +Change: Fixed quaternion signs in tests, added 60° coordinated turn test with PID validation (97% efficiency) + +| Run | Episode Return | Episode Length | Kills | Shots Hit/Fired | +|-----|----------------|----------------|-------|-----------------| +| 1 | +26.17 | 1151 | 0.29 | 0.29/10.5 | +| 2 | +55.99 | 1148 | 0.47 | 0.47/10.6 | +| 3 | +10.82 | 1151 | 0.20 | 0.20/9.6 | +| **Mean** | **+30.99** | **1150** | **0.32** | **0.32/10.2** | + +Observations: +- Performance consistent with previous baseline (+37.50 → +30.99, within variance) +- Test fixes did not affect training (physics unchanged) +- All tests now passing: max_speed, stall, climb, glide, turn_30, turn_60, pitch, roll + +--- + +## Performance Optimizations (374871df) +Date: 2026-01-14 +Commit: 374871df +Change: Replace divisions with multiplications by inverse constants; precompute gun cone cosf() per episode + +| Run | Episode Return | Episode Length | Kills | Shots Hit/Fired | +|-----|----------------|----------------|-------|-----------------| +| 1 | +44.39 | 1128 | 0.40 | 0.40/10.1 | +| 2 | +37.43 | 1139 | 0.34 | 0.34/9.9 | +| 3 | +45.54 | 1128 | 0.40 | 0.40/11.2 | +| **Mean** | **+42.45** | **1132** | **0.38** | **0.38/10.4** | + +Observations: +- **+37% improvement over previous baseline** (+30.99 → +42.45) +- 21 divisions replaced with multiplications (2.3x faster per op) +- Gun cone trig precomputed per episode (curriculum-ready) +- SPS: 1.2-1.3M + +--- + +## Autopilot Infrastructure (85980679) +Date: 2026-01-14 +Commit: 85980679 +Change: Add opponent autopilot system for curriculum learning (not enabled by default) + +| Run | Episode Return | Episode Length | Kills | Shots Hit/Fired | +|-----|----------------|----------------|-------|-----------------| +| 1 | +36.85 | 1140 | 0.33 | 0.33/9.1 | +| 2 | +55.26 | 1140 | 0.51 | 0.51/11.3 | +| 3 | +12.78 | 1150 | 0.25 | 0.25/10.9 | +| **Mean** | **+34.97** | **1143** | **0.36** | **0.36/10.4** | + +Changes: +- NEW: autopilot.h - 7 autopilot modes (STRAIGHT, LEVEL, TURN_LEFT/RIGHT, CLIMB, DESCEND, RANDOM) +- NEW: set_autopilot() Python API for curriculum learning +- Default: AP_STRAIGHT (identical to previous behavior) +- PID gains from test_flight.py validation + +Observations: +- Performance consistent with baseline (+42.45 → +34.97, within variance) +- **No regression** - autopilot infrastructure has negligible overhead +- Autopilot disabled by default (AP_STRAIGHT = old behavior) +- Ready for curriculum: call `env.set_autopilot(mode=AutopilotMode.RANDOM)` to enable + +--- + +## Vectorized set_autopilot (80bcf31e) +Date: 2026-01-14 +Commit: 80bcf31e +Change: Add vec_set_autopilot() C binding; set_autopilot(env_idx=None) sets all envs in one call + +| Run | Episode Return | Episode Length | Kills | Shots Hit/Fired | +|-----|----------------|----------------|-------|-----------------| +| 1 | +45.37 | 1153 | 0.47 | 0.47/10.6 | +| 2 | +51.04 | 1140 | 0.46 | 0.46/11.2 | +| 3 | +37.00 | 1110 | 0.35 | 0.35/10.8 | +| **Mean** | **+44.47** | **1134** | **0.43** | **0.43/10.9** | + +Changes: +- binding.c: Added vec_set_autopilot() for batch autopilot configuration +- dogfight.py: set_autopilot(env_idx=None) now sets all envs in one C call + +Observations: +- Performance consistent with baseline (+34.97 → +44.47, within variance) +- **No regression** - vectorized API adds no overhead during training +- Unblocks multi-env curriculum learning (no more N Python->C calls) + +--- + +## Mode Weights for Curriculum (0a1c2e6d) +Date: 2026-01-14 +Commit: 0a1c2e6d +Change: Add weighted random mode selection for curriculum learning + +| Run | Episode Return | Episode Length | Kills | Shots Hit/Fired | +|-----|----------------|----------------|-------|-----------------| +| 1 | +41.15 | 1139 | 0.36 | 0.36/11.0 | +| 2 | +53.82 | 1149 | 0.46 | 0.46/7.8 | +| 3 | +52.25 | 1133 | 0.45 | 0.45/10.2 | +| **Mean** | **+49.07** | **1140** | **0.42** | **0.42/9.7** | + +Changes: +- autopilot.h: Added mode_weights[AP_COUNT] array, weighted random selection in autopilot_randomize() +- autopilot.h: Added separate LCG RNG (rng_state) to avoid srand() interference from vec_reset +- binding.c: Added vec_set_mode_weights(), env_get_autopilot_mode() bindings +- dogfight.py: Added set_mode_weights(), get_autopilot_mode() methods +- test_flight.py: Added test_mode_weights() unit test + +Observations: +- Performance consistent with baseline (+44.47 → +49.07, within variance) +- **No regression** - mode weights infrastructure has negligible overhead +- Fixed RNG bug: autopilot now uses own LCG instead of shared rand() (was always selecting same mode) +- Ready for curriculum: `env.set_mode_weights(level=0.8, turn_left=0.1, turn_right=0.1)` to bias easy modes + +--- + +## Observation Scheme Sweep + +### Scheme 0: WORLD_FRAME (Baseline) +Date: 2026-01-14 +Config: obs_scheme = 0 +Observations: 19 (player pos/vel/ori/up + world-frame rel_pos/vel) + +| Run | Episode Return | Episode Length | Kills | Shots Hit/Fired | +|-----|----------------|----------------|-------|-----------------| +| 1 | +18.22 | 1128 | 0.20 | 0.20/10.1 | +| 2 | +19.71 | 1135 | 0.23 | 0.23/10.5 | +| 3 | +52.98 | 1139 | 0.46 | 0.46/8.0 | +| **Mean** | **+30.30** | **1134** | **0.30** | **0.30/9.5** | + +Observations: +- High variance between runs (18-53 return) +- Baseline for comparison with body-frame and angles schemes + +--- + +### Scheme 1: BODY_FRAME +Date: 2026-01-14 +Config: obs_scheme = 1 +Observations: 21 (body-frame rel_pos/vel + aim_dot + dist_norm) + +| Run | Episode Return | Episode Length | Kills | Shots Hit/Fired | +|-----|----------------|----------------|-------|-----------------| +| 1 | +50.12 | 1205 | 0.14 | 0.14/1.2 | +| 2 | +21.95 | 1292 | 0.02 | 0.02/0.5 | +| 3 | -5.26 | 1258 | 0.19 | 0.19/8.4 | +| **Mean** | **+22.27** | **1252** | **0.12** | **0.12/3.4** | + +Observations: +- **Worse than WORLD_FRAME** (+22.27 vs +30.30) +- Agent fires much less often (3.4 shots vs 9.5) +- Fewer kills despite aim helpers (0.12 vs 0.30) +- Higher variance - body-frame transform may confuse learning + +--- + +### Scheme 2: ANGLES +Date: 2026-01-14 +Config: obs_scheme = 2 +Observations: 12 (pos + speed + euler angles + azimuth/elevation/dist + closing_rate + opp_heading) + +| Run | Episode Return | Episode Length | Kills | Shots Hit/Fired | +|-----|----------------|----------------|-------|-----------------| +| 1 | +163.78 | 1198 | 0.01 | 0.01/0.02 | +| 2 | +71.56 | 1298 | 0.31 | 0.31/4.9 | +| 3 | +151.36 | 1263 | 0.01 | 0.01/0.06 | +| **Mean** | **+128.90** | **1253** | **0.11** | **0.11/1.7** | + +Observations: +- **Highest return** but misleading - agent exploits pursuit shaping without shooting +- 2 of 3 runs learned to not fire at all (0.02 and 0.06 shots) +- Only run 2 learned combat (0.31 kills) +- Smaller obs space (12) may lack info needed to learn trigger timing + +--- + +### Observation Scheme Summary + +| Scheme | Obs Size | Mean Return | Mean Kills | Shots/Ep | Notes | +|--------|----------|-------------|------------|----------|-------| +| 0: WORLD_FRAME | 19 | +30.30 | 0.30 | 9.5 | **Best combat learning** | +| 1: BODY_FRAME | 21 | +22.27 | 0.12 | 3.4 | Worse than baseline | +| 2: ANGLES | 12 | +128.90 | 0.11 | 1.7 | Exploits pursuit reward | + +--- + +### Scheme 3: CONTROL_ERROR +Date: 2026-01-14 +Config: obs_scheme = 3 +Observations: 17 (player state + pitch/yaw/roll errors to target + closing_rate + opp_heading) + +| Run | Episode Return | Episode Length | Kills | Shots Hit/Fired | +|-----|----------------|----------------|-------|-----------------| +| 1 | +165.98 | 1233 | 0.00 | 0.00/0.00 | +| 2 | +167.76 | 1238 | 0.00 | 0.00/0.00 | +| 3 | +165.45 | 1245 | 0.00 | 0.00/0.01 | +| **Mean** | **+166.40** | **1239** | **0.00** | **0.00/0.00** | + +Observations: +- **Highest return** but completely exploits pursuit reward +- Agent learned to not fire at all (0 shots across all runs) +- Control error obs may be too "solved" - agent just follows target + +--- + +### Scheme 4: REALISTIC +Date: 2026-01-14 +Config: obs_scheme = 4 +Observations: 10 (airspeed/altitude/pitch/roll + gunsight az/el/size + aspect/horizon/dist) + +| Run | Episode Return | Episode Length | Kills | Shots Hit/Fired | +|-----|----------------|----------------|-------|-----------------| +| 1 | +174.53 | 1159 | 0.00 | 0.00/0.01 | +| 2 | +171.96 | 1174 | 0.00 | 0.00/0.01 | +| 3 | +158.68 | 1252 | 0.00 | 0.00/0.01 | +| **Mean** | **+168.39** | **1195** | **0.00** | **0.00/0.01** | + +Observations: +- **Very high return** but no combat at all +- Smallest network (2.2K params) but same exploitation pattern +- Missing world position may prevent learning proper pursuit + +--- + +### Scheme 5: MAXIMALIST +Date: 2026-01-14 +Config: obs_scheme = 5 +Observations: 43 (everything: world+body velocities, quaternion+euler, world+body rel_pos/vel, angles, etc.) + +| Run | Episode Return | Episode Length | Kills | Shots Hit/Fired | +|-----|----------------|----------------|-------|-----------------| +| 1 | +90.95 | 1279 | 0.04 | 0.04/0.10 | +| 2 | +66.94 | 1167 | 0.31 | 0.31/2.1 | +| 3 | +92.29 | 1219 | 0.04 | 0.04/0.11 | +| **Mean** | **+83.39** | **1222** | **0.13** | **0.13/0.8** | + +Observations: +- Run 2 learned combat (0.31 kills) - only non-WORLD_FRAME scheme to do so reliably +- Lower return than pursuit-exploiting schemes but more combat +- Largest network (6.4K params) - may need more training time + +--- + +### Final Observation Scheme Summary + +| Scheme | Obs Size | Params | Mean Return | Mean Kills | Shots/Ep | Combat? | +|--------|----------|--------|-------------|------------|----------|---------| +| 0: WORLD_FRAME | 19 | 3.3K | +30.30 | **0.30** | 9.5 | **YES** | +| 1: BODY_FRAME | 21 | 3.6K | +22.27 | 0.12 | 3.4 | Weak | +| 2: ANGLES | 12 | 2.4K | +128.90 | 0.11 | 1.7 | No | +| 3: CONTROL_ERROR | 17 | 3.1K | +166.40 | 0.00 | 0.0 | No | +| 4: REALISTIC | 10 | 2.2K | +168.39 | 0.00 | 0.0 | No | +| 5: MAXIMALIST | 43 | 6.4K | +83.39 | 0.13 | 0.8 | 1/3 runs | + +**Conclusion:** WORLD_FRAME (scheme 0) is the best observation representation for learning combat: +- Only scheme where all 3 runs learned to fire consistently +- Best kill rate (0.30 kills/episode) +- The "engineered" schemes (ANGLES, CONTROL_ERROR, REALISTIC) all exploit pursuit reward without learning to shoot +- MAXIMALIST occasionally learns combat but inconsistently + +**Insight:** The pursuit reward shaping is too strong relative to kill rewards. Agents can achieve high return just by chasing without ever firing. The world-frame observations may make it harder to exploit this pattern because the agent can't "solve" pursuit as cleanly. + +--- + +## Sweepable Rewards v2 (04dd0167) +Date: 2026-01-14 +Commit: 04dd0167 +Change: Reward system overhaul - kill reward 10.0→1.0, all rewards sweepable via INI, perf metric for kill rate + +| Run | Episode Return | Episode Length | Kills | Perf | Accuracy | +|-----|----------------|----------------|-------|------|----------| +| 1 | -0.28 | 1155 | 1.18 | 0.706 | 1.3% | +| 2 | +43.03 | 1159 | 3.87 | 0.963 | 5.2% | +| 3 | +43.91 | 1152 | 5.57 | 0.988 | 6.3% | +| **Mean** | **+28.89** | **1155** | **3.54** | **0.886** | **4.3%** | + +Changes: +- Kill reward: 10.0 → 1.0 (fixed, not swept) +- Hit reward: 1.0 → 0.5 (sweepable) +- All shaping rewards now configurable via INI +- NEW: `perf` metric = fraction of episodes with kills (0.0-1.0) +- Episode-level kill/shot tracking + +**Comparison with Previous Best (Phase 5 Combat):** + +| Metric | Old (kill=10) | New (kill=1) | Change | +|--------|---------------|--------------|--------| +| Kills/ep | 0.19 | 3.54 | **+1763%** | +| Accuracy | 1.6% | 4.3% | **+169%** | +| Return | +23.44 | +28.89 | +23% | + +**Observations:** +- **Massive improvement in kills** - 18x more kills per episode +- 2/3 runs learned strong combat (perf > 0.96) +- Run 1 weaker but still learned some shooting +- Lower kill reward (1.0 vs 10.0) paradoxically improved learning +- Simpler reward signal easier to optimize + +--- + +## Terminal on Kill (a31d1dc7) +Date: 2026-01-15 +Commit: a31d1dc7 + +### Major Changes +1. **Terminal on kill**: Episode ends immediately when player kills opponent (was: respawn and continue) +2. **Binary perf metric**: `perf = env->kill ? 1.0 : 0.0` per episode (was: cumulative kills / episodes) +3. **Simplified Log struct**: Removed `kills`, `deaths`, `shots_hit` (redundant with terminal-on-kill) +4. **Kill flag on env**: Added `env->kill` to track success per episode +5. **Score = terminal reward**: `score = rewards[0]` (1.0 on kill, 0.0 on failure) + +### Results + +| Run | Episode Return | Episode Length | Perf | Shots/Ep | +|-----|----------------|----------------|------|----------| +| 1 | +120.65 | 1102 | 0.002 | 0.01 | +| 2 | +121.18 | 1264 | 0.002 | 0.01 | +| 3 | +76.90 | 1232 | 0.009 | 0.04 | +| **Mean** | **+106.24** | **1199** | **0.004** | **0.02** | + +### Comparison with Previous (Sweepable Rewards v2) + +| Metric | Before | After | Change | +|--------|--------|-------|--------| +| Perf | 0.886 | 0.004 | -99.5% | +| Shots/ep | 74.4 | 0.02 | -99.97% | +| Episode Return | +28.89 | +106.24 | +268% | + +### Observations +- Agent learned to NOT fire - perf dropped from 88.6% to 0.4% +- Very few shots fired (0.01-0.04 per episode vs 74 before) +- High return comes from pursuit shaping, not combat +- Needs investigation: DEBUG printf, check miss penalty, verify shots_fired tracking diff --git a/pufferlib/ocean/dogfight/binding.c b/pufferlib/ocean/dogfight/binding.c new file mode 100644 index 000000000..f6abfb0e6 --- /dev/null +++ b/pufferlib/ocean/dogfight/binding.c @@ -0,0 +1,328 @@ +#include "dogfight.h" + +#define Env Dogfight + +// We need Python.h for the forward declaration, but env_binding.h includes it +// So we'll put the forward decl and MY_METHODS after including env_binding.h +// but we need MY_METHODS defined before... Let's restructure. + +// Include Python first to get PyObject type +#include + +// Forward declare our custom methods +static PyObject* env_force_state(PyObject* self, PyObject* args, PyObject* kwargs); +static PyObject* env_set_autopilot(PyObject* self, PyObject* args, PyObject* kwargs); +static PyObject* vec_set_autopilot(PyObject* self, PyObject* args, PyObject* kwargs); +static PyObject* vec_set_mode_weights(PyObject* self, PyObject* args, PyObject* kwargs); +static PyObject* env_get_autopilot_mode(PyObject* self, PyObject* args); +static PyObject* env_get_state(PyObject* self, PyObject* args); +static PyObject* env_set_obs_highlight(PyObject* self, PyObject* args); + +// Register custom methods before including the template +#define MY_METHODS \ + {"env_force_state", (PyCFunction)env_force_state, METH_VARARGS | METH_KEYWORDS, "Force environment state"}, \ + {"env_set_autopilot", (PyCFunction)env_set_autopilot, METH_VARARGS | METH_KEYWORDS, "Set opponent autopilot mode"}, \ + {"vec_set_autopilot", (PyCFunction)vec_set_autopilot, METH_VARARGS | METH_KEYWORDS, "Set autopilot for all envs"}, \ + {"vec_set_mode_weights", (PyCFunction)vec_set_mode_weights, METH_VARARGS | METH_KEYWORDS, "Set mode weights for all envs"}, \ + {"env_get_autopilot_mode", (PyCFunction)env_get_autopilot_mode, METH_VARARGS, "Get current autopilot mode"}, \ + {"env_get_state", (PyCFunction)env_get_state, METH_VARARGS, "Get raw player state"}, \ + {"env_set_obs_highlight", (PyCFunction)env_set_obs_highlight, METH_VARARGS, "Set observation indices to highlight with red arrows"} + +// Helper to get float from kwargs with default (before env_binding.h since my_init uses it) +static float get_float(PyObject *kwargs, const char *key, float default_val) { + if (!kwargs) return default_val; + PyObject *val = PyDict_GetItemString(kwargs, key); + if (!val) return default_val; + if (PyFloat_Check(val)) return (float)PyFloat_AsDouble(val); + if (PyLong_Check(val)) return (float)PyLong_AsLong(val); + return default_val; +} + +// Helper to get int from kwargs with default +static int get_int(PyObject *kwargs, const char *key, int default_val) { + if (!kwargs) return default_val; + PyObject *val = PyDict_GetItemString(kwargs, key); + if (!val) return default_val; + if (PyLong_Check(val)) return (int)PyLong_AsLong(val); + if (PyFloat_Check(val)) return (int)PyFloat_AsDouble(val); + return default_val; +} + +#include "../env_binding.h" + +static int my_init(Env *env, PyObject *args, PyObject *kwargs) { + env->max_steps = unpack(kwargs, "max_steps"); + int obs_scheme = get_int(kwargs, "obs_scheme", 0); + + RewardConfig rcfg = { + .aim_scale = get_float(kwargs, "reward_aim_scale", 0.05f), + .closing_scale = get_float(kwargs, "reward_closing_scale", 0.003f), + .neg_g = get_float(kwargs, "penalty_neg_g", 0.02f), + .speed_min = get_float(kwargs, "speed_min", 50.0f), + }; + + int curriculum_enabled = get_int(kwargs, "curriculum_enabled", 0); + int curriculum_randomize = get_int(kwargs, "curriculum_randomize", 0); + + float advance_threshold = get_float(kwargs, "advance_threshold", 0.7f); + + int env_num = get_int(kwargs, "env_num", 0); + + init(env, obs_scheme, &rcfg, curriculum_enabled, curriculum_randomize, advance_threshold, env_num); + return 0; +} + +static int my_log(PyObject *dict, Log *log) { + assign_to_dict(dict, "episode_return", log->episode_return); + assign_to_dict(dict, "episode_length", log->episode_length); + assign_to_dict(dict, "score", log->score); + assign_to_dict(dict, "perf", log->perf); + assign_to_dict(dict, "shots_fired", log->shots_fired); + assign_to_dict(dict, "accuracy", log->accuracy); + assign_to_dict(dict, "stage", log->stage); + assign_to_dict(dict, "total_stage_weight", log->total_stage_weight); + assign_to_dict(dict, "avg_stage_weight", log->avg_stage_weight); + assign_to_dict(dict, "avg_abs_bias", log->avg_abs_bias); + assign_to_dict(dict, "ultimate", log->ultimate); + assign_to_dict(dict, "n", log->n); + return 0; +} + +// Force state wrapper - unpacks kwargs and calls C function +static PyObject* env_force_state(PyObject* self, PyObject* args, PyObject* kwargs) { + // First arg is env handle + if (PyTuple_Size(args) != 1) { + PyErr_SetString(PyExc_TypeError, "env_force_state requires 1 positional arg (env handle)"); + return NULL; + } + + Env* env = unpack_env(args); + if (!env) return NULL; + + // Extract all parameters with defaults + // Player position + float p_px = get_float(kwargs, "p_px", 0.0f); + float p_py = get_float(kwargs, "p_py", 0.0f); + float p_pz = get_float(kwargs, "p_pz", 1000.0f); + + // Player velocity + float p_vx = get_float(kwargs, "p_vx", 150.0f); + float p_vy = get_float(kwargs, "p_vy", 0.0f); + float p_vz = get_float(kwargs, "p_vz", 0.0f); + + // Player orientation (identity quat = wings level, flying +X) + float p_ow = get_float(kwargs, "p_ow", 1.0f); + float p_ox = get_float(kwargs, "p_ox", 0.0f); + float p_oy = get_float(kwargs, "p_oy", 0.0f); + float p_oz = get_float(kwargs, "p_oz", 0.0f); + + // Player throttle + float p_throttle = get_float(kwargs, "p_throttle", 1.0f); + + // Opponent position (-9999 = auto: 400m ahead) + float o_px = get_float(kwargs, "o_px", -9999.0f); + float o_py = get_float(kwargs, "o_py", -9999.0f); + float o_pz = get_float(kwargs, "o_pz", -9999.0f); + + // Opponent velocity (-9999 = auto: match player) + float o_vx = get_float(kwargs, "o_vx", -9999.0f); + float o_vy = get_float(kwargs, "o_vy", -9999.0f); + float o_vz = get_float(kwargs, "o_vz", -9999.0f); + + // Opponent orientation (-9999 = auto: match player) + float o_ow = get_float(kwargs, "o_ow", -9999.0f); + float o_ox = get_float(kwargs, "o_ox", -9999.0f); + float o_oy = get_float(kwargs, "o_oy", -9999.0f); + float o_oz = get_float(kwargs, "o_oz", -9999.0f); + + // Environment tick + int tick = get_int(kwargs, "tick", 0); + + // Call the C function + force_state(env, + p_px, p_py, p_pz, + p_vx, p_vy, p_vz, + p_ow, p_ox, p_oy, p_oz, + p_throttle, + o_px, o_py, o_pz, + o_vx, o_vy, o_vz, + o_ow, o_ox, o_oy, o_oz, + tick + ); + + Py_RETURN_NONE; +} + +// Set autopilot mode for opponent aircraft +static PyObject* env_set_autopilot(PyObject* self, PyObject* args, PyObject* kwargs) { + if (PyTuple_Size(args) != 1) { + PyErr_SetString(PyExc_TypeError, "env_set_autopilot requires 1 positional arg (env handle)"); + return NULL; + } + + Env* env = unpack_env(args); + if (!env) return NULL; + + // Get autopilot parameters + int mode = get_int(kwargs, "mode", AP_STRAIGHT); + float throttle = get_float(kwargs, "throttle", AP_DEFAULT_THROTTLE); + float bank_deg = get_float(kwargs, "bank_deg", AP_DEFAULT_BANK_DEG); + float climb_rate = get_float(kwargs, "climb_rate", AP_DEFAULT_CLIMB_RATE); + + // Set the autopilot mode + autopilot_set_mode(&env->opponent_ap, (AutopilotMode)mode, throttle, bank_deg, climb_rate); + + Py_RETURN_NONE; +} + +// Set autopilot mode for all environments (vectorized) +static PyObject* vec_set_autopilot(PyObject* self, PyObject* args, PyObject* kwargs) { + if (PyTuple_Size(args) != 1) { + PyErr_SetString(PyExc_TypeError, "vec_set_autopilot requires 1 positional arg (vec handle)"); + return NULL; + } + + VecEnv* vec = unpack_vecenv(args); + if (!vec) return NULL; + + // Get autopilot parameters + int mode = get_int(kwargs, "mode", AP_STRAIGHT); + float throttle = get_float(kwargs, "throttle", AP_DEFAULT_THROTTLE); + float bank_deg = get_float(kwargs, "bank_deg", AP_DEFAULT_BANK_DEG); + float climb_rate = get_float(kwargs, "climb_rate", AP_DEFAULT_CLIMB_RATE); + + // Set autopilot for all environments + for (int i = 0; i < vec->num_envs; i++) { + autopilot_set_mode(&vec->envs[i]->opponent_ap, (AutopilotMode)mode, + throttle, bank_deg, climb_rate); + } + + Py_RETURN_NONE; +} + +// Set mode weights for curriculum learning (vectorized) +static PyObject* vec_set_mode_weights(PyObject* self, PyObject* args, PyObject* kwargs) { + if (PyTuple_Size(args) != 1) { + PyErr_SetString(PyExc_TypeError, "vec_set_mode_weights requires 1 positional arg (vec handle)"); + return NULL; + } + + VecEnv* vec = unpack_vecenv(args); + if (!vec) return NULL; + + // Get weights for each mode (default 0.2 each for modes 1-5) + float w_level = get_float(kwargs, "level", 0.2f); + float w_turn_left = get_float(kwargs, "turn_left", 0.2f); + float w_turn_right = get_float(kwargs, "turn_right", 0.2f); + float w_climb = get_float(kwargs, "climb", 0.2f); + float w_descend = get_float(kwargs, "descend", 0.2f); + + // Set weights for all environments + for (int i = 0; i < vec->num_envs; i++) { + AutopilotState* ap = &vec->envs[i]->opponent_ap; + ap->mode_weights[AP_LEVEL] = w_level; + ap->mode_weights[AP_TURN_LEFT] = w_turn_left; + ap->mode_weights[AP_TURN_RIGHT] = w_turn_right; + ap->mode_weights[AP_CLIMB] = w_climb; + ap->mode_weights[AP_DESCEND] = w_descend; + } + + Py_RETURN_NONE; +} + +// Get current autopilot mode (for testing/debugging) +static PyObject* env_get_autopilot_mode(PyObject* self, PyObject* args) { + Env* env = unpack_env(args); + if (!env) return NULL; + + return PyLong_FromLong((long)env->opponent_ap.mode); +} + +// Get raw player state (for physics tests - independent of obs_scheme) +static PyObject* env_get_state(PyObject* self, PyObject* args) { + Env* env = unpack_env(args); + if (!env) return NULL; + + Plane* p = &env->player; + Vec3 up = quat_rotate(p->ori, vec3(0, 0, 1)); + Vec3 fwd = quat_rotate(p->ori, vec3(1, 0, 0)); + + PyObject* dict = PyDict_New(); + if (!dict) return NULL; + + // Position + PyDict_SetItemString(dict, "px", PyFloat_FromDouble(p->pos.x)); + PyDict_SetItemString(dict, "py", PyFloat_FromDouble(p->pos.y)); + PyDict_SetItemString(dict, "pz", PyFloat_FromDouble(p->pos.z)); + + // Velocity + PyDict_SetItemString(dict, "vx", PyFloat_FromDouble(p->vel.x)); + PyDict_SetItemString(dict, "vy", PyFloat_FromDouble(p->vel.y)); + PyDict_SetItemString(dict, "vz", PyFloat_FromDouble(p->vel.z)); + + // Orientation quaternion + PyDict_SetItemString(dict, "ow", PyFloat_FromDouble(p->ori.w)); + PyDict_SetItemString(dict, "ox", PyFloat_FromDouble(p->ori.x)); + PyDict_SetItemString(dict, "oy", PyFloat_FromDouble(p->ori.y)); + PyDict_SetItemString(dict, "oz", PyFloat_FromDouble(p->ori.z)); + + // Up vector (derived) + PyDict_SetItemString(dict, "up_x", PyFloat_FromDouble(up.x)); + PyDict_SetItemString(dict, "up_y", PyFloat_FromDouble(up.y)); + PyDict_SetItemString(dict, "up_z", PyFloat_FromDouble(up.z)); + + // Forward vector (derived) + PyDict_SetItemString(dict, "fwd_x", PyFloat_FromDouble(fwd.x)); + PyDict_SetItemString(dict, "fwd_y", PyFloat_FromDouble(fwd.y)); + PyDict_SetItemString(dict, "fwd_z", PyFloat_FromDouble(fwd.z)); + + // Throttle + PyDict_SetItemString(dict, "throttle", PyFloat_FromDouble(p->throttle)); + + // G-force (current G-loading) + PyDict_SetItemString(dict, "g_force", PyFloat_FromDouble(p->g_force)); + + return dict; +} + +// Set which observation indices to highlight with red arrows +// Args: env_handle, list of indices (e.g., [4, 5, 6] for pitch, roll, yaw in scheme 0) +static PyObject* env_set_obs_highlight(PyObject* self, PyObject* args) { + PyObject* env_arg; + PyObject* indices_list; + + if (!PyArg_ParseTuple(args, "OO", &env_arg, &indices_list)) { + return NULL; + } + + // Get env from handle + Env* env = (Env*)PyLong_AsVoidPtr(env_arg); + if (!env) { + PyErr_SetString(PyExc_TypeError, "Invalid env handle"); + return NULL; + } + + // Clear existing highlights + memset(env->obs_highlight, 0, sizeof(env->obs_highlight)); + + // Parse list of indices + if (!PyList_Check(indices_list)) { + PyErr_SetString(PyExc_TypeError, "Second argument must be a list of indices"); + return NULL; + } + + Py_ssize_t n = PyList_Size(indices_list); + for (Py_ssize_t i = 0; i < n; i++) { + PyObject* item = PyList_GetItem(indices_list, i); + if (!PyLong_Check(item)) { + PyErr_SetString(PyExc_TypeError, "Indices must be integers"); + return NULL; + } + int idx = (int)PyLong_AsLong(item); + if (idx >= 0 && idx < 16) { + env->obs_highlight[idx] = 1; + } + } + + Py_RETURN_NONE; +} diff --git a/pufferlib/ocean/dogfight/dogfight.h b/pufferlib/ocean/dogfight/dogfight.h new file mode 100644 index 000000000..76f4656b6 --- /dev/null +++ b/pufferlib/ocean/dogfight/dogfight.h @@ -0,0 +1,936 @@ +// dogfight.h - WW2 aerial combat environment +// Uses flightlib.h for flight physics + +#include +#include +#include +#include +#include + +#include "raylib.h" +#include "rlgl.h" // For rlSetClipPlanes() + +#define DEBUG 0 +#define DEMOTE_THRESHOLD 0.3f +#define EVAL_WINDOW 50 +#define PENALTY_STALL 0.002f +#define PENALTY_RUDDER 0.001f + +#include "flightlib.h" +#include "autopilot.h" + +typedef enum { + OBS_ANGLES = 0, // Spherical coordinates (12 obs) + OBS_PURSUIT = 1, // Energy-aware pursuit observations (13 obs) + OBS_REALISTIC = 2, // Cockpit instruments only (10 obs) + OBS_REALISTIC_RANGE = 3, // REALISTIC with explicit range (10 obs) + OBS_REALISTIC_ENEMY_STATE = 4, // + enemy pitch/roll/heading (13 obs) + OBS_REALISTIC_FULL = 5, // + turn rate + G-loading (15 obs) + OBS_SCHEME_COUNT +} ObsScheme; + +static const int OBS_SIZES[OBS_SCHEME_COUNT] = {12, 13, 10, 10, 13, 15}; + +typedef enum { + CURRICULUM_TAIL_CHASE = 0, // Easiest: opponent ahead, same heading + CURRICULUM_HEAD_ON, // Opponent coming toward us + CURRICULUM_VERTICAL, // Above or below player (was stage 3) + CURRICULUM_MANEUVERING, // Opponent does turns (was stage 4) + CURRICULUM_FULL_RANDOM, // Mix of all basic modes (was stage 5) + CURRICULUM_HARD_MANEUVERING, // Hard turns + weave patterns (was stage 6) + CURRICULUM_CROSSING, // 45 degree deflection shots (was stage 2, reduced from 90°) + CURRICULUM_EVASIVE, // Reactive evasion (hardest) + CURRICULUM_COUNT +} CurriculumStage; + +// Stage difficulty weights for composite metric (higher = harder = more valuable) +// Used to compute difficulty_weighted_perf = perf * avg_stage_weight +// Reordered 2026-01-18 to match new enum order (see CURRICULUM_PLANS.md) +static const float STAGE_WEIGHTS[CURRICULUM_COUNT] = { + 0.2f, // TAIL_CHASE - trivial + 0.3f, // HEAD_ON - easy + 0.4f, // VERTICAL - medium (was stage 3) + 0.5f, // MANEUVERING - medium (was stage 4) + 0.65f, // FULL_RANDOM - medium-hard (was stage 5) + 0.8f, // HARD_MANEUVERING - hard (was stage 6) + 0.9f, // CROSSING - hard, 45° deflection (was stage 2) + 1.0f // EVASIVE - hardest +}; + +#define DT 0.02f + +#define WORLD_HALF_X 2000.0f +#define WORLD_HALF_Y 2000.0f +#define WORLD_MAX_Z 3000.0f +#define MAX_SPEED 250.0f + +#define INV_WORLD_HALF_X 0.0005f // 1/2000 +#define INV_WORLD_HALF_Y 0.0005f // 1/2000 +#define INV_WORLD_MAX_Z 0.000333333f // 1/3000 +#define INV_MAX_SPEED 0.004f // 1/250 +#define INV_PI 0.31830988618f // 1/PI +#define INV_HALF_PI 0.63661977236f // 2/PI (i.e., 1/(PI*0.5)) + +#define GUN_RANGE 500.0f // meters +#define INV_GUN_RANGE 0.002f // 1/500 +#define GUN_CONE_ANGLE 0.087f // ~5 degrees in radians +#define FIRE_COOLDOWN 10 // ticks (0.2 seconds at 50Hz) + +typedef struct Log { + float episode_return; + float episode_length; + float score; // 1.0 on kill, 0.0 on failure + float perf; + float shots_fired; + float accuracy; + float stage; // current curriculum stage (for monitoring) + // Curriculum-weighted metrics (Phase 1) + float total_stage_weight; // Sum of stage weights across all episodes + float avg_stage_weight; // total_stage_weight / n + float total_abs_bias; // Sum of |aileron_bias| at episode end + float avg_abs_bias; // total_abs_bias / n + float ultimate; // Main sweep metric: kill_rate * avg_stage_weight / (1 + avg_abs_bias * 0.01) + float n; +} Log; + +typedef enum DeathReason { + DEATH_NONE = 0, // Episode still running + DEATH_KILL = 1, // Player scored a kill (success) + DEATH_OOB = 2, // Out of bounds + DEATH_TIMEOUT = 3, // Max steps reached + DEATH_SUPERSONIC = 4 // Physics blowup +} DeathReason; + +typedef struct RewardConfig { + // Positive shaping + float aim_scale; // Continuous aiming reward (default 0.05) + float closing_scale; // +N per m/s closing (default 0.003) + // Penalties + float neg_g; // -N per unit G below 0.5 (default 0.02) - enforces "pull to turn" + // Thresholds + float speed_min; // Stall threshold (default 50.0) +} RewardConfig; + +typedef struct Client { + Camera3D camera; + float width; + float height; + // Camera orbit state (for mouse control) + float cam_distance; + float cam_azimuth; + float cam_elevation; + bool is_dragging; + float last_mouse_x; + float last_mouse_y; +} Client; + +typedef struct Dogfight { + float *observations; + float *actions; + float *rewards; + unsigned char *terminals; + Log log; + Client *client; + int tick; + int max_steps; + float episode_return; + Plane player; + Plane opponent; + // Per-episode precomputed values (for curriculum learning) + float gun_cone_angle; // Hit detection cone (radians) - FIXED at 5° + float cos_gun_cone; // cosf(gun_cone_angle) - for hit detection + // Opponent autopilot + AutopilotState opponent_ap; + // Observation scheme + int obs_scheme; + int obs_size; + // Reward configuration (sweepable) + RewardConfig rcfg; + // Episode-level tracking (reset each episode) + int kill; // 1 if killed this episode, 0 otherwise + float episode_shots_fired; // For accuracy tracking + // Curriculum learning + int curriculum_enabled; // 0 = off (legacy spawning), 1 = on + int curriculum_randomize; // 0 = progressive (training), 1 = random stage each episode (eval) + int total_episodes; // Cumulative episodes (persists across resets) + CurriculumStage stage; // Current difficulty stage + int is_initialized; // Flag to preserve curriculum state across re-init (for Multiprocessing) + // Performance-based curriculum + float recent_kills; // Kills in current evaluation window + float recent_episodes; // Episodes in current evaluation window + float advance_threshold; // Kill rate to advance (default 0.7) + // Anti-spinning + float total_aileron_usage; // Accumulated |aileron| input (for spin death) + float aileron_bias; // Cumulative signed aileron (for directional penalty) + // Episode reward accumulators (for DEBUG summaries) + float sum_r_closing; + float sum_r_speed; // Stall penalty + float sum_r_neg_g; + float sum_r_rudder; + float sum_r_aim; + // Aiming diagnostics (reset each episode, for DEBUG output) + float best_aim_angle; // Best (smallest) aim angle achieved (radians) + int ticks_in_cone; // Ticks where aim_dot > cos_gun_cone + float closest_dist; // Closest approach to target (meters) + // Flight envelope diagnostics (reset each episode, for DEBUG output) + float max_g, min_g; // Peak G-forces experienced + float max_bank; // Peak bank angle (abs, radians) + float max_pitch; // Peak pitch angle (abs, radians) + float min_speed, max_speed; // Speed envelope (m/s) + float min_alt, max_alt; // Altitude envelope (m) + float sum_throttle; // For computing mean throttle + int trigger_pulls; // Times trigger was pulled (>0.5) + int prev_trigger; // For edge detection + DeathReason death_reason; + // Debug + int env_num; // Environment index (for filtering debug output) + // Observation highlighting (for visual debugging) + unsigned char obs_highlight[16]; // 1 = highlight this observation with red arrow +} Dogfight; + +#include "dogfight_observations.h" + +void init(Dogfight *env, int obs_scheme, RewardConfig *rcfg, int curriculum_enabled, int curriculum_randomize, float advance_threshold, int env_num) { + env->log = (Log){0}; + env->tick = 0; + env->env_num = env_num; + env->episode_return = 0.0f; + env->client = NULL; + // Observation scheme + env->obs_scheme = (obs_scheme >= 0 && obs_scheme < OBS_SCHEME_COUNT) ? obs_scheme : 0; + env->obs_size = OBS_SIZES[env->obs_scheme]; + // Gun cone for HIT DETECTION - fixed at 5° + env->gun_cone_angle = GUN_CONE_ANGLE; + env->cos_gun_cone = cosf(env->gun_cone_angle); + // Initialize opponent autopilot + autopilot_init(&env->opponent_ap); + // Reward configuration (copy from provided config) + env->rcfg = *rcfg; + // Episode tracking + env->kill = 0; + env->episode_shots_fired = 0.0f; + + env->curriculum_enabled = curriculum_enabled; + env->curriculum_randomize = curriculum_randomize; + if (!env->is_initialized) { + env->total_episodes = 0; + env->stage = CURRICULUM_TAIL_CHASE; + + env->recent_kills = 0.0f; + env->recent_episodes = 0.0f; + env->advance_threshold = advance_threshold > 0.0f ? advance_threshold : 0.7f; + if (DEBUG >= 1) { + fprintf(stderr, "[INIT] FIRST init ptr=%p env_num=%d - setting total_episodes=0, stage=0\n", (void*)env, env_num); + } + } else { + if (DEBUG >= 1) { + fprintf(stderr, "[INIT] RE-init ptr=%p env_num=%d - preserving total_episodes=%d, stage=%d\n", + (void*)env, env_num, env->total_episodes, env->stage); + } + } + env->is_initialized = 1; + env->total_aileron_usage = 0.0f; + + memset(env->obs_highlight, 0, sizeof(env->obs_highlight)); +} + +void set_obs_highlight(Dogfight *env, int *indices, int count) { + memset(env->obs_highlight, 0, sizeof(env->obs_highlight)); + for (int i = 0; i < count && i < 16; i++) { + if (indices[i] >= 0 && indices[i] < 16) { + env->obs_highlight[indices[i]] = 1; + } + } +} + +void add_log(Dogfight *env) { + // Level 1: Episode summary (one line, easy to grep) + if (DEBUG >= 1 && env->env_num == 0) { + const char* death_names[] = {"NONE", "KILL", "OOB", "TIMEOUT", "SUPERSONIC"}; + float mean_ail = env->total_aileron_usage / fmaxf((float)env->tick, 1.0f); + printf("EP tick=%d ret=%.2f death=%s kill=%d stage=%d total_eps=%d mean_ail=%.2f bias=%.1f\n", + env->tick, env->episode_return, death_names[env->death_reason], + env->kill, env->stage, env->total_episodes, mean_ail, env->aileron_bias); + } + + // Level 2: Reward breakdown (which components dominated?) + if (DEBUG >= 2 && env->env_num == 0) { + printf(" SHAPING: closing=%+.2f aim=%+.2f\n", env->sum_r_closing, env->sum_r_aim); + printf(" PENALTY: stall=%.2f neg_g=%.2f rudder=%.2f\n", + env->sum_r_speed, env->sum_r_neg_g, env->sum_r_rudder); + printf(" AIM: best=%.1f° in_cone=%d/%d (%.0f%%) closest=%.0fm\n", + env->best_aim_angle * RAD_TO_DEG, + env->ticks_in_cone, env->tick, + 100.0f * env->ticks_in_cone / fmaxf((float)env->tick, 1.0f), + env->closest_dist); + } + + // Level 3: Flight envelope and control statistics + if (DEBUG >= 3 && env->env_num == 0) { + float mean_throttle = env->sum_throttle / fmaxf((float)env->tick, 1.0f); + printf(" FLIGHT: G=[%+.1f,%+.1f] bank=%.0f° pitch=%.0f° speed=[%.0f,%.0f] alt=[%.0f,%.0f]\n", + env->min_g, env->max_g, + env->max_bank * RAD_TO_DEG, env->max_pitch * RAD_TO_DEG, + env->min_speed, env->max_speed, + env->min_alt, env->max_alt); + printf(" CONTROL: mean_throttle=%.0f%% trigger_pulls=%d shots=%d\n", + mean_throttle * 100.0f, env->trigger_pulls, (int)env->episode_shots_fired); + } + + if (DEBUG >= 10) printf("=== ADD_LOG ===\n"); + if (DEBUG >= 10) printf(" kill=%d, episode_return=%.2f, tick=%d\n", env->kill, env->episode_return, env->tick); + if (DEBUG >= 10) printf(" episode_shots_fired=%.0f, reward=%.2f\n", env->episode_shots_fired, env->rewards[0]); + env->log.episode_return += env->episode_return; + env->log.episode_length += (float)env->tick; + env->log.perf += env->kill ? 1.0f : 0.0f; + env->log.score += env->rewards[0]; + env->log.shots_fired += env->episode_shots_fired; + env->log.accuracy = (env->log.shots_fired > 0.0f) ? (env->log.perf / env->log.shots_fired * 100.0f) : 0.0f; + env->log.stage = (float)env->stage; // Track curriculum stage + + // Curriculum-weighted metrics (Phase 1) + // Track difficulty faced and compute composite metric + env->log.total_stage_weight += STAGE_WEIGHTS[env->stage]; + env->log.total_abs_bias += fabsf(env->aileron_bias); // Track bias at episode end + env->log.n += 1.0f; + env->log.avg_stage_weight = env->log.total_stage_weight / env->log.n; + env->log.avg_abs_bias = env->log.total_abs_bias / env->log.n; + + // ultimate = kill_rate * stage_weight / (1 + avg_abs_bias * 0.01) + // Rewards killing hard opponents, penalizes degenerate aileron bias + float kill_rate = env->log.perf / env->log.n; + float difficulty_weighted = kill_rate * env->log.avg_stage_weight; + float bias_divisor = 1.0f + env->log.avg_abs_bias * 0.1f; // min 1.0, safe + env->log.ultimate = difficulty_weighted / bias_divisor; + + if (DEBUG >= 10) printf(" log.perf=%.2f, log.shots_fired=%.0f, log.n=%.0f\n", env->log.perf, env->log.shots_fired, env->log.n); + + if (env->curriculum_enabled && !env->curriculum_randomize) { + env->recent_episodes += 1.0f; + env->recent_kills += env->kill ? 1.0f : 0.0f; + + // Evaluate every eval_window episodes + if (env->recent_episodes >= (float)EVAL_WINDOW) { + float recent_rate = env->recent_kills / env->recent_episodes; + + if (recent_rate > env->advance_threshold && env->stage < CURRICULUM_COUNT - 1) { + env->stage++; + if (DEBUG >= 1) { + fprintf(stderr, "[ADVANCE] env=%d stage->%d (rate=%.2f, window=%d)\n", + env->env_num, env->stage, recent_rate, EVAL_WINDOW); + } + } else if (recent_rate < DEMOTE_THRESHOLD && env->stage > 0) { + env->stage--; + if (DEBUG >= 1) { + fprintf(stderr, "[DEMOTE] env=%d stage->%d (rate=%.2f, window=%d)\n", + env->env_num, env->stage, recent_rate, EVAL_WINDOW); + } + } + + env->recent_kills = 0.0f; + env->recent_episodes = 0.0f; + } + } +} + +// ============================================================================ +// Curriculum Learning: Stage-specific spawn functions +// ============================================================================ + +// Get current curriculum stage - now performance-based (df10) +// Stage advancement/demotion handled in add_log() based on recent kill rate +CurriculumStage get_curriculum_stage(Dogfight *env) { + if (!env->curriculum_enabled) return CURRICULUM_FULL_RANDOM; + if (env->curriculum_randomize) { + // Random stage for eval mode - tests all difficulties + return (CurriculumStage)(rand() % CURRICULUM_COUNT); + } + // Stage is managed by add_log() based on performance + return env->stage; +} + +// Stage 0: TAIL_CHASE - Opponent ahead, same heading (easiest) +void spawn_tail_chase(Dogfight *env, Vec3 player_pos, Vec3 player_vel) { + // Opponent 200-400m ahead with offset giving ~15-25% chance of aligned spawn + // At 300m, 5° gun cone = ~26m radius for hits + // ±40/±30 gives avg offset ~25m = borderline hits, provides learning signal + Vec3 opp_pos = vec3( + player_pos.x + rndf(200, 400), + player_pos.y + rndf(-40, 40), + player_pos.z + rndf(-30, 30) + ); + reset_plane(&env->opponent, opp_pos, player_vel); + env->opponent_ap.mode = AP_STRAIGHT; +} + +// Stage 1: HEAD_ON - Opponent coming toward us +void spawn_head_on(Dogfight *env, Vec3 player_pos, Vec3 player_vel) { + // Opponent 400-600m ahead, facing us (opposite velocity) + Vec3 opp_pos = vec3( + player_pos.x + rndf(400, 600), + player_pos.y + rndf(-50, 50), + player_pos.z + rndf(-30, 30) + ); + Vec3 opp_vel = vec3(-player_vel.x, -player_vel.y, player_vel.z); + reset_plane(&env->opponent, opp_pos, opp_vel); + env->opponent_ap.mode = AP_STRAIGHT; +} + +// Stage 6: CROSSING - 45 degree deflection shots (reduced from 90° - see CURRICULUM_PLANS.md) +// 90° deflection is historically nearly impossible; 45° is achievable with proper lead +void spawn_crossing(Dogfight *env, Vec3 player_pos, Vec3 player_vel) { + // Opponent 300-500m to the side, flying at 45° angle (not perpendicular) + float side = rndf(0, 1) > 0.5f ? 1.0f : -1.0f; + Vec3 opp_pos = vec3( + player_pos.x + rndf(100, 200), + player_pos.y + side * rndf(300, 500), + player_pos.z + rndf(-50, 50) + ); + // 45° crossing velocity: opponent flies at 45° angle across player's path + // cos(45°) ≈ 0.707, sin(45°) ≈ 0.707 + float speed = norm3(player_vel); + float cos45 = 0.7071f; + float sin45 = 0.7071f; + // side=+1 (right): fly toward (-45°) = (cos, -sin) to cross leftward + // side=-1 (left): fly toward (+45°) = (cos, +sin) to cross rightward + Vec3 opp_vel = vec3(speed * cos45, -side * speed * sin45, 0); + reset_plane(&env->opponent, opp_pos, opp_vel); + env->opponent_ap.mode = AP_STRAIGHT; +} + +// Stage 3: VERTICAL - Above or below player +void spawn_vertical(Dogfight *env, Vec3 player_pos, Vec3 player_vel) { + // Opponent 200-400m ahead, 200-400m above OR below + float vert = rndf(0, 1) > 0.5f ? 1.0f : -1.0f; + float alt_offset = vert * rndf(200, 400); + Vec3 opp_pos = vec3( + player_pos.x + rndf(200, 400), + player_pos.y + rndf(-50, 50), + clampf(player_pos.z + alt_offset, 300, 2500) + ); + reset_plane(&env->opponent, opp_pos, player_vel); + env->opponent_ap.mode = AP_LEVEL; // Maintain altitude +} + +// Stage 4: MANEUVERING - Opponent does gentle turns (30°) +void spawn_maneuvering(Dogfight *env, Vec3 player_pos, Vec3 player_vel) { + // Random spawn position (similar to original) + Vec3 opp_pos = vec3( + player_pos.x + rndf(200, 500), + player_pos.y + rndf(-100, 100), + player_pos.z + rndf(-50, 50) + ); + reset_plane(&env->opponent, opp_pos, player_vel); + // Randomly choose turn direction - gentle 30° bank + env->opponent_ap.mode = rndf(0, 1) > 0.5f ? AP_TURN_LEFT : AP_TURN_RIGHT; + env->opponent_ap.target_bank = AP_STAGE4_BANK_DEG * (M_PI / 180.0f); // 30° +} + +// Stage 5: FULL_RANDOM - Medium difficulty (360° spawn + random heading, 45° turns) +void spawn_full_random(Dogfight *env, Vec3 player_pos, Vec3 player_vel) { + // Random direction in 3D sphere (300-600m from player) + float dist = rndf(300, 600); + float theta = rndf(0, 2.0f * M_PI); // Azimuth: 0-360° + float phi = rndf(-0.3f, 0.3f); // Elevation: ±17° (keep near level) + + Vec3 opp_pos = vec3( + player_pos.x + dist * cosf(theta) * cosf(phi), + player_pos.y + dist * sinf(theta) * cosf(phi), + clampf(player_pos.z + dist * sinf(phi), 300, 2500) + ); + + // Random velocity direction (not necessarily toward/away from player) + float vel_theta = rndf(0, 2.0f * M_PI); + float speed = norm3(player_vel); + Vec3 opp_vel = vec3(speed * cosf(vel_theta), speed * sinf(vel_theta), 0); + + reset_plane(&env->opponent, opp_pos, opp_vel); + + // Set orientation to match velocity direction (yaw rotation around Z) + env->opponent.ori = quat_from_axis_angle(vec3(0, 0, 1), vel_theta); + + // Use autopilot randomization (if configured) + if (env->opponent_ap.randomize_on_reset) { + autopilot_randomize(&env->opponent_ap); + } else { + // Default: uniform random mode with 45° turns + float r = rndf(0, 1); + if (r < 0.2f) env->opponent_ap.mode = AP_STRAIGHT; + else if (r < 0.4f) env->opponent_ap.mode = AP_LEVEL; + else if (r < 0.6f) env->opponent_ap.mode = AP_TURN_LEFT; + else if (r < 0.8f) env->opponent_ap.mode = AP_TURN_RIGHT; + else env->opponent_ap.mode = AP_CLIMB; + } + // Set 45° bank for stage 5 turns + env->opponent_ap.target_bank = AP_STAGE5_BANK_DEG * (M_PI / 180.0f); +} + +// Stage 6: HARD_MANEUVERING - Hard turns and weave patterns +void spawn_hard_maneuvering(Dogfight *env, Vec3 player_pos, Vec3 player_vel) { + Vec3 opp_pos = vec3( + player_pos.x + rndf(200, 400), + player_pos.y + rndf(-100, 100), + player_pos.z + rndf(-50, 50) + ); + reset_plane(&env->opponent, opp_pos, player_vel); + + // Pick from hard maneuver modes + float r = rndf(0, 1); + if (r < 0.3f) { + env->opponent_ap.mode = AP_HARD_TURN_LEFT; + } else if (r < 0.6f) { + env->opponent_ap.mode = AP_HARD_TURN_RIGHT; + } else { + env->opponent_ap.mode = AP_WEAVE; + env->opponent_ap.phase = rndf(0, 2.0f * M_PI); // Random start phase + } +} + +// Stage 7: EVASIVE - Opponent reacts to player position +void spawn_evasive(Dogfight *env, Vec3 player_pos, Vec3 player_vel) { + // Spawn in various positions (like FULL_RANDOM) + float dist = rndf(300, 500); + float theta = rndf(0, 2.0f * M_PI); + float phi = rndf(-0.3f, 0.3f); + + Vec3 opp_pos = vec3( + player_pos.x + dist * cosf(theta) * cosf(phi), + player_pos.y + dist * sinf(theta) * cosf(phi), + clampf(player_pos.z + dist * sinf(phi), 300, 2500) + ); + + float vel_theta = rndf(0, 2.0f * M_PI); + float speed = norm3(player_vel); + Vec3 opp_vel = vec3(speed * cosf(vel_theta), speed * sinf(vel_theta), 0); + + reset_plane(&env->opponent, opp_pos, opp_vel); + env->opponent.ori = quat_from_axis_angle(vec3(0, 0, 1), vel_theta); + + // Mix of hard modes with AP_EVASIVE dominant + float r = rndf(0, 1); + if (r < 0.4f) { + env->opponent_ap.mode = AP_EVASIVE; + } else if (r < 0.55f) { + env->opponent_ap.mode = AP_HARD_TURN_LEFT; + } else if (r < 0.7f) { + env->opponent_ap.mode = AP_HARD_TURN_RIGHT; + } else if (r < 0.85f) { + env->opponent_ap.mode = AP_WEAVE; + env->opponent_ap.phase = rndf(0, 2.0f * M_PI); + } else { + // 15% chance of regular turn modes (still steep 60°) + env->opponent_ap.mode = rndf(0,1) > 0.5f ? AP_TURN_LEFT : AP_TURN_RIGHT; + env->opponent_ap.target_bank = AP_STAGE6_BANK_DEG * (M_PI / 180.0f); // 60° + } +} + +// Master spawn function: dispatches to stage-specific spawner +void spawn_by_curriculum(Dogfight *env, Vec3 player_pos, Vec3 player_vel) { + CurriculumStage new_stage = get_curriculum_stage(env); + + // Log stage transitions + if (new_stage != env->stage) { + if (DEBUG >= 1) { + fprintf(stderr, "[STAGE_CHANGE] ptr=%p env=%d eps=%d: stage %d -> %d\n", + (void*)env, env->env_num, env->total_episodes, env->stage, new_stage); + fflush(stderr); + } + env->stage = new_stage; + } + + switch (env->stage) { + case CURRICULUM_TAIL_CHASE: spawn_tail_chase(env, player_pos, player_vel); break; + case CURRICULUM_HEAD_ON: spawn_head_on(env, player_pos, player_vel); break; + case CURRICULUM_CROSSING: spawn_crossing(env, player_pos, player_vel); break; + case CURRICULUM_VERTICAL: spawn_vertical(env, player_pos, player_vel); break; + case CURRICULUM_MANEUVERING: spawn_maneuvering(env, player_pos, player_vel); break; + case CURRICULUM_FULL_RANDOM: spawn_full_random(env, player_pos, player_vel); break; + case CURRICULUM_HARD_MANEUVERING: spawn_hard_maneuvering(env, player_pos, player_vel); break; + case CURRICULUM_EVASIVE: + default: spawn_evasive(env, player_pos, player_vel); break; + } + + // Reset autopilot PID state after spawning + env->opponent_ap.prev_vz = 0.0f; + env->opponent_ap.prev_bank_error = 0.0f; +} + +// Legacy spawn (for curriculum_enabled=0) +void spawn_legacy(Dogfight *env, Vec3 player_pos, Vec3 player_vel) { + Vec3 opp_pos = vec3( + player_pos.x + rndf(200, 500), + player_pos.y + rndf(-100, 100), + player_pos.z + rndf(-50, 50) + ); + reset_plane(&env->opponent, opp_pos, player_vel); + + // Handle autopilot: randomize if configured, reset PID state + if (env->opponent_ap.randomize_on_reset) { + autopilot_randomize(&env->opponent_ap); + } + env->opponent_ap.prev_vz = 0.0f; + env->opponent_ap.prev_bank_error = 0.0f; +} + +// ============================================================================ + +void c_reset(Dogfight *env) { + // Increment total episodes BEFORE determining stage (so first episode is 0) + env->total_episodes++; + + env->tick = 0; + env->episode_return = 0.0f; + + // Clear episode tracking + env->kill = 0; + env->episode_shots_fired = 0.0f; + env->total_aileron_usage = 0.0f; + env->aileron_bias = 0.0f; + + // Reset reward accumulators + env->sum_r_closing = 0.0f; + env->sum_r_speed = 0.0f; + env->sum_r_neg_g = 0.0f; + env->sum_r_rudder = 0.0f; + env->sum_r_aim = 0.0f; + env->death_reason = DEATH_NONE; + + // Reset aiming diagnostics + env->best_aim_angle = M_PI; // Start at worst (180°) + env->ticks_in_cone = 0; + env->closest_dist = 10000.0f; // Start at max + + // Reset flight envelope diagnostics + env->max_g = 1.0f; // Start at 1G (level flight) + env->min_g = 1.0f; + env->max_bank = 0.0f; + env->max_pitch = 0.0f; + env->min_speed = 10000.0f; // Start at max + env->max_speed = 0.0f; + env->min_alt = 10000.0f; // Start at max + env->max_alt = 0.0f; + env->sum_throttle = 0.0f; + env->trigger_pulls = 0; + env->prev_trigger = 0; + + // Gun cone for hit detection - stays fixed at 5° + env->cos_gun_cone = cosf(env->gun_cone_angle); + + // Spawn player at random position + Vec3 pos = vec3(rndf(-500, 500), rndf(-500, 500), rndf(500, 1500)); + Vec3 vel = vec3(80, 0, 0); + reset_plane(&env->player, pos, vel); + + // Spawn opponent based on curriculum stage (or legacy if disabled) + if (env->curriculum_enabled) { + spawn_by_curriculum(env, pos, vel); + } else { + spawn_legacy(env, pos, vel); + } + + if (DEBUG >= 10) printf("=== RESET ===\n"); + if (DEBUG >= 10) printf("kill=%d, episode_shots_fired=%.0f (now cleared)\n", env->kill, env->episode_shots_fired); + if (DEBUG >= 10) printf("player_pos=(%.1f, %.1f, %.1f)\n", pos.x, pos.y, pos.z); + if (DEBUG >= 10) printf("player_vel=(%.1f, %.1f, %.1f) speed=%.1f\n", vel.x, vel.y, vel.z, norm3(vel)); + if (DEBUG >= 10) printf("opponent_pos=(%.1f, %.1f, %.1f)\n", env->opponent.pos.x, env->opponent.pos.y, env->opponent.pos.z); + if (DEBUG >= 10) printf("initial_dist=%.1f m, stage=%d\n", norm3(sub3(env->opponent.pos, pos)), env->stage); + + compute_observations(env); +#if DEBUG >= 5 + print_observations(env); +#endif +} + +// Check if shooter hits target (cone-based hit detection) +bool check_hit(Plane *shooter, Plane *target, float cos_gun_cone) { + Vec3 to_target = sub3(target->pos, shooter->pos); + float dist = norm3(to_target); + if (dist > GUN_RANGE) return false; + if (dist < 1.0f) return false; // Too close (avoid division issues) + + Vec3 forward = quat_rotate(shooter->ori, vec3(1, 0, 0)); + Vec3 to_target_norm = normalize3(to_target); + float cos_angle = dot3(to_target_norm, forward); + return cos_angle > cos_gun_cone; +} +void c_step(Dogfight *env) { + env->tick++; + env->rewards[0] = 0.0f; + env->terminals[0] = 0; + + if (DEBUG >= 10) printf("\n========== TICK %d ==========\n", env->tick); + if (DEBUG >= 10) printf("=== ACTIONS ===\n"); + if (DEBUG >= 10) printf("throttle_raw=%.3f -> throttle=%.3f\n", env->actions[0], (env->actions[0] + 1.0f) * 0.5f); + if (DEBUG >= 10) printf("elevator=%.3f -> pitch_rate=%.3f rad/s\n", env->actions[1], env->actions[1] * MAX_PITCH_RATE); + if (DEBUG >= 10) printf("ailerons=%.3f -> roll_rate=%.3f rad/s\n", env->actions[2], env->actions[2] * MAX_ROLL_RATE); + if (DEBUG >= 10) printf("rudder=%.3f -> yaw_rate=%.3f rad/s\n", env->actions[3], -env->actions[3] * MAX_YAW_RATE); + if (DEBUG >= 10) printf("trigger=%.3f (fires if >0.5)\n", env->actions[4]); + + // Player uses full physics with actions + step_plane_with_physics(&env->player, env->actions, DT); + + // Opponent uses autopilot (if not AP_STRAIGHT, uses full physics) + if (env->opponent_ap.mode != AP_STRAIGHT) { + float opp_actions[5]; + env->opponent_ap.threat_pos = env->player.pos; // For AP_EVASIVE mode + autopilot_step(&env->opponent_ap, &env->opponent, opp_actions, DT); + step_plane_with_physics(&env->opponent, opp_actions, DT); + } else { + step_plane(&env->opponent, DT); + } + + // Track aileron usage for monitoring (no death penalty - see BISECTION.md) + env->total_aileron_usage += fabsf(env->actions[2]); + env->aileron_bias += env->actions[2]; + +#if DEBUG >= 3 + // Track flight envelope diagnostics (only when debugging - expensive) + { + Plane *dbg_p = &env->player; + if (dbg_p->g_force > env->max_g) env->max_g = dbg_p->g_force; + if (dbg_p->g_force < env->min_g) env->min_g = dbg_p->g_force; + float speed = norm3(dbg_p->vel); + if (speed < env->min_speed) env->min_speed = speed; + if (speed > env->max_speed) env->max_speed = speed; + if (dbg_p->pos.z < env->min_alt) env->min_alt = dbg_p->pos.z; + if (dbg_p->pos.z > env->max_alt) env->max_alt = dbg_p->pos.z; + // Bank angle from quaternion + float bank = atan2f(2.0f * (dbg_p->ori.w * dbg_p->ori.x + dbg_p->ori.y * dbg_p->ori.z), + 1.0f - 2.0f * (dbg_p->ori.x * dbg_p->ori.x + dbg_p->ori.y * dbg_p->ori.y)); + if (fabsf(bank) > env->max_bank) env->max_bank = fabsf(bank); + // Pitch angle from quaternion + float pitch = asinf(clampf(2.0f * (dbg_p->ori.w * dbg_p->ori.y - dbg_p->ori.z * dbg_p->ori.x), -1.0f, 1.0f)); + if (fabsf(pitch) > env->max_pitch) env->max_pitch = fabsf(pitch); + // Throttle accumulator + env->sum_throttle += dbg_p->throttle; + // Trigger pull edge detection + int trigger_now = (env->actions[4] > 0.5f) ? 1 : 0; + if (trigger_now && !env->prev_trigger) env->trigger_pulls++; + env->prev_trigger = trigger_now; + } +#endif + + // === Combat (Phase 5) === + Plane *p = &env->player; + Plane *o = &env->opponent; + float reward = 0.0f; + + // Decrement fire cooldowns + if (p->fire_cooldown > 0) p->fire_cooldown--; + if (o->fire_cooldown > 0) o->fire_cooldown--; + + // Player fires: action[4] > 0.5 and cooldown ready + if (DEBUG >= 10) printf("trigger=%.3f, cooldown=%d\n", env->actions[4], p->fire_cooldown); + if (env->actions[4] > 0.5f && p->fire_cooldown == 0) { + p->fire_cooldown = FIRE_COOLDOWN; + env->episode_shots_fired += 1.0f; + if (DEBUG >= 10) printf("=== FIRED! episode_shots_fired=%.0f ===\n", env->episode_shots_fired); + + // Check if hit = kill = SUCCESS = terminal + if (check_hit(p, o, env->cos_gun_cone)) { + if (DEBUG >= 10) printf("*** KILL! ***\n"); + env->kill = 1; + env->death_reason = DEATH_KILL; + env->rewards[0] = 1.0f; + env->episode_return += 1.0f; + env->terminals[0] = 1; + add_log(env); + c_reset(env); + return; + } else { + if (DEBUG >= 10) printf("MISS (dist=%.1f, in_cone=%d)\n", norm3(sub3(o->pos, p->pos)), + check_hit(p, o, env->cos_gun_cone)); + } + } + + // === Reward Shaping (all values from rcfg, sweepable) === + Vec3 rel_pos = sub3(o->pos, p->pos); + float dist = norm3(rel_pos); + + // === df11 Simplified Rewards (6 terms: 3 positive, 3 penalties) === + + // 1. Closing velocity: approaching = good + Vec3 rel_vel = sub3(p->vel, o->vel); + Vec3 rel_pos_norm = normalize3(rel_pos); + float closing_rate = dot3(rel_vel, rel_pos_norm); + float r_closing = clampf(closing_rate * env->rcfg.closing_scale, -0.05f, 0.05f); + reward += r_closing; + + // 2. Aim quality: continuous feedback for gun alignment + Vec3 player_fwd = quat_rotate(p->ori, vec3(1, 0, 0)); + float aim_dot = dot3(rel_pos_norm, player_fwd); // -1 to +1 + float aim_angle_deg = acosf(clampf(aim_dot, -1.0f, 1.0f)) * RAD_TO_DEG; + float r_aim = 0.0f; + if (dist < GUN_RANGE * 2.0f) { // Only in engagement envelope (~1000m) + float aim_quality = (aim_dot + 1.0f) * 0.5f; // Remap [-1,1] to [0,1] + r_aim = aim_quality * env->rcfg.aim_scale; + } + reward += r_aim; + + // 3. Negative G penalty: enforce "pull to turn" (realistic) + float g_threshold = 0.5f; + float g_deficit = fmaxf(0.0f, g_threshold - p->g_force); + float r_neg_g = -g_deficit * env->rcfg.neg_g; + reward += r_neg_g; + + // 4. Stall penalty: speed safety + float speed = norm3(p->vel); + float r_stall = 0.0f; + if (speed < env->rcfg.speed_min) { + r_stall = -(env->rcfg.speed_min - speed) * PENALTY_STALL; + } + reward += r_stall; + + // 5. Rudder penalty: prevent knife-edge climbing (small) + float r_rudder = -fabsf(env->actions[3]) * PENALTY_RUDDER; + reward += r_rudder; + +#if DEBUG >= 2 + // Track aiming diagnostics + { + float aim_angle_rad = acosf(clampf(aim_dot, -1.0f, 1.0f)); + if (aim_angle_rad < env->best_aim_angle) env->best_aim_angle = aim_angle_rad; + if (aim_dot > env->cos_gun_cone) env->ticks_in_cone++; + if (dist < env->closest_dist) env->closest_dist = dist; + } +#endif + + // Accumulate for episode summary + env->sum_r_closing += r_closing; + env->sum_r_aim += r_aim; + env->sum_r_neg_g += r_neg_g; + env->sum_r_speed += r_stall; + env->sum_r_rudder += r_rudder; + + if (DEBUG >= 4 && env->env_num == 0) printf("=== REWARD (df11) ===\n"); + if (DEBUG >= 4 && env->env_num == 0) printf("r_closing=%.4f (rate=%.1f m/s)\n", r_closing, closing_rate); + if (DEBUG >= 4 && env->env_num == 0) printf("r_aim=%.4f (aim_angle=%.1f deg, dist=%.1f)\n", r_aim, aim_angle_deg, dist); + if (DEBUG >= 4 && env->env_num == 0) printf("r_neg_g=%.5f (g=%.2f)\n", r_neg_g, p->g_force); + if (DEBUG >= 4 && env->env_num == 0) printf("r_stall=%.4f (speed=%.1f)\n", r_stall, speed); + if (DEBUG >= 4 && env->env_num == 0) printf("r_rudder=%.5f (rud=%.2f)\n", r_rudder, env->actions[3]); + if (DEBUG >= 4 && env->env_num == 0) printf("reward_total=%.4f\n", reward); + + if (DEBUG >= 10) printf("=== COMBAT ===\n"); + if (DEBUG >= 10) printf("aim_angle=%.1f deg (cone=5 deg)\n", aim_angle_deg); + if (DEBUG >= 10) printf("dist_to_target=%.1f m (gun_range=500)\n", dist); + if (DEBUG >= 10) printf("in_cone=%d, in_range=%d\n", aim_dot > env->cos_gun_cone, dist < GUN_RANGE); + + // Global reward clamping to prevent gradient explosion (restored for df8) + reward = fmaxf(-1.0f, fminf(1.0f, reward)); + + env->rewards[0] = reward; + env->episode_return += reward; + + // Check bounds (player only) + bool oob = fabsf(p->pos.x) > WORLD_HALF_X || + fabsf(p->pos.y) > WORLD_HALF_Y || + p->pos.z < 0 || p->pos.z > WORLD_MAX_Z; + + // Check for supersonic (physics blowup) - 340 m/s = Mach 1 + float player_speed = norm3(p->vel); + float opp_speed = norm3(o->vel); + bool supersonic = player_speed > 340.0f || opp_speed > 340.0f; + if (DEBUG && supersonic) { + printf("=== SUPERSONIC BLOWUP ===\n"); + printf("player_speed=%.1f, opp_speed=%.1f\n", player_speed, opp_speed); + printf("player_vel=(%.1f, %.1f, %.1f)\n", p->vel.x, p->vel.y, p->vel.z); + printf("opp_vel=(%.1f, %.1f, %.1f)\n", o->vel.x, o->vel.y, o->vel.z); + printf("opp_ap_mode=%d\n", env->opponent_ap.mode); + } + + if (oob || env->tick >= env->max_steps || supersonic) { + if (DEBUG >= 10) printf("=== TERMINAL (FAILURE) ===\n"); + if (DEBUG >= 10) printf("oob=%d, supersonic=%d, tick=%d/%d\n", oob, supersonic, env->tick, env->max_steps); + // Track death reason (priority: supersonic > oob > timeout) + if (supersonic) { + env->death_reason = DEATH_SUPERSONIC; + } else if (oob) { + env->death_reason = DEATH_OOB; + } else { + env->death_reason = DEATH_TIMEOUT; + } + env->rewards[0] = (supersonic || p->pos.z <= 0) ? -1.0f : 0.0f; + env->terminals[0] = 1; + add_log(env); + c_reset(env); + return; + } + + compute_observations(env); +#if DEBUG >= 5 + print_observations(env); +#endif +} + +void c_close(Dogfight *env); + +#include "dogfight_render.h" + +// Force exact game state for testing. Defaults shown in comments are applied in Python. +void force_state( + Dogfight *env, + float p_px, // = 0.0f, player pos X + float p_py, // = 0.0f, player pos Y + float p_pz, // = 1000.0f, player pos Z + float p_vx, // = 150.0f, player vel X (m/s) + float p_vy, // = 0.0f, player vel Y + float p_vz, // = 0.0f, player vel Z + float p_ow, // = 1.0f, player orientation quat W + float p_ox, // = 0.0f, player orientation quat X + float p_oy, // = 0.0f, player orientation quat Y + float p_oz, // = 0.0f, player orientation quat Z + float p_throttle, // = 1.0f, player throttle [0,1] + float o_px, // = -9999.0f (auto: 400m ahead), opponent pos X + float o_py, // = -9999.0f (auto), opponent pos Y + float o_pz, // = -9999.0f (auto), opponent pos Z + float o_vx, // = -9999.0f (auto: match player), opponent vel X + float o_vy, // = -9999.0f (auto), opponent vel Y + float o_vz, // = -9999.0f (auto), opponent vel Z + float o_ow, // = -9999.0f (auto: match player), opponent ori W + float o_ox, // = -9999.0f (auto), opponent ori X + float o_oy, // = -9999.0f (auto), opponent ori Y + float o_oz, // = -9999.0f (auto), opponent ori Z + int tick // = 0, environment tick +) { + env->player.pos = vec3(p_px, p_py, p_pz); + env->player.vel = vec3(p_vx, p_vy, p_vz); + env->player.ori = quat(p_ow, p_ox, p_oy, p_oz); + quat_normalize(&env->player.ori); + env->player.throttle = p_throttle; + env->player.fire_cooldown = 0; + env->player.yaw_from_rudder = 0.0f; + + // Opponent position: auto = 400m ahead of player + if (o_px < -9000.0f) { + Vec3 fwd = quat_rotate(env->player.ori, vec3(1, 0, 0)); + env->opponent.pos = add3(env->player.pos, mul3(fwd, 400.0f)); + } else { + env->opponent.pos = vec3(o_px, o_py, o_pz); + } + + // Opponent velocity: auto = match player + if (o_vx < -9000.0f) { + env->opponent.vel = env->player.vel; + } else { + env->opponent.vel = vec3(o_vx, o_vy, o_vz); + } + + // Opponent orientation: auto = match player + if (o_ow < -9000.0f) { + env->opponent.ori = env->player.ori; + } else { + env->opponent.ori = quat(o_ow, o_ox, o_oy, o_oz); + quat_normalize(&env->opponent.ori); + } + env->opponent.fire_cooldown = 0; + env->opponent.yaw_from_rudder = 0.0f; + + // Environment state + env->tick = tick; + env->episode_return = 0.0f; + + compute_observations(env); +#if DEBUG >= 5 + print_observations(env); +#endif +} diff --git a/pufferlib/ocean/dogfight/dogfight.py b/pufferlib/ocean/dogfight/dogfight.py new file mode 100644 index 000000000..bf568794d --- /dev/null +++ b/pufferlib/ocean/dogfight/dogfight.py @@ -0,0 +1,309 @@ +import time +import numpy as np +import gymnasium + +import pufferlib +from pufferlib.ocean.dogfight import binding + + +# Autopilot mode constants (must match autopilot.h enum) +class AutopilotMode: + STRAIGHT = 0 # Fly straight (current/default behavior) + LEVEL = 1 # Level flight with PD on vz + TURN_LEFT = 2 # Coordinated left turn + TURN_RIGHT = 3 # Coordinated right turn + CLIMB = 4 # Constant climb rate + DESCEND = 5 # Constant descent rate + RANDOM = 6 # Random mode selection at reset + + +# Observation sizes by scheme (must match C OBS_SIZES in dogfight.h) +OBS_SIZES = { + 0: 12, # ANGLES: pos(3) + speed(1) + euler(3) + target_angles(4) + opp(1) + 1: 13, # PURSUIT: speed(1) + pot(1) + euler(2) + energy(1) + target(4) + tgt_state(3) + energy_adv(1) + 2: 10, # REALISTIC: instruments(4) + gunsight(3) + visual(3) + 3: 10, # REALISTIC_RANGE: instruments(4) + gunsight(3) + visual(3) w/ km range + 4: 13, # REALISTIC_ENEMY_STATE: + enemy pitch/roll/heading + 5: 15, # REALISTIC_FULL: + turn rate + G-loading +} + + +class Dogfight(pufferlib.PufferEnv): + def __init__( + self, + num_envs=16, + render_mode=None, + render_fps=None, # Target FPS when rendering (None=no delay, 50=real-time, 10=slow-mo) + report_interval=1, + buf=None, + seed=42, + max_steps=3000, + obs_scheme=0, + # Curriculum learning + curriculum_enabled=0, # 0=off (legacy), 1=on (progressive stages) + curriculum_randomize=0, # 0=progressive (training), 1=random stage each episode (eval) + advance_threshold=0.7, + # df11: Simplified rewards (6 terms) + reward_aim_scale=0.05, # Continuous aiming reward + reward_closing_scale=0.003, # Per m/s closing + penalty_neg_g=0.02, # Enforce "pull to turn" + speed_min=50.0, # Stall threshold + ): + # Observation size depends on scheme + obs_size = OBS_SIZES.get(obs_scheme, 19) + self.obs_scheme = obs_scheme + self.single_observation_space = gymnasium.spaces.Box( + low=-1, + high=1, + shape=(obs_size,), + dtype=np.float32, + ) + + # Action: Box(5) continuous [-1, 1] + # [0] throttle, [1] elevator, [2] ailerons, [3] rudder, [4] trigger + self.single_action_space = gymnasium.spaces.Box( + low=-1, high=1, shape=(5,), dtype=np.float32 + ) + + self.num_agents = num_envs + self.agents_per_batch = num_envs # For pufferl LSTM compatibility + self.render_mode = render_mode + self.render_fps = render_fps + self.report_interval = report_interval + self.tick = 0 + + super().__init__(buf) + self.actions = self.actions.astype(np.float32) # REQUIRED for continuous + + # Print hyperparameters at init (for sweep debugging) + print(f"=== DOGFIGHT ENV INIT ===") + print(f" obs_scheme={obs_scheme}, num_envs={num_envs}") + print(f" REWARDS: aim={reward_aim_scale:.4f} closing={reward_closing_scale:.4f}") + print(f" PENALTY: neg_g={penalty_neg_g:.4f}") + print(f" curriculum={curriculum_enabled}, advance={advance_threshold}") + + self._env_handles = [] + for env_num in range(num_envs): + handle = binding.env_init( + self.observations[env_num:(env_num+1)], + self.actions[env_num:(env_num+1)], + self.rewards[env_num:(env_num+1)], + self.terminals[env_num:(env_num+1)], + self.truncations[env_num:(env_num+1)], + env_num, + env_num=env_num, + report_interval=self.report_interval, + max_steps=max_steps, + obs_scheme=obs_scheme, + + curriculum_enabled=curriculum_enabled, + curriculum_randomize=curriculum_randomize, + advance_threshold=advance_threshold, + + reward_aim_scale=reward_aim_scale, + reward_closing_scale=reward_closing_scale, + penalty_neg_g=penalty_neg_g, + speed_min=speed_min, + ) + self._env_handles.append(handle) + + self.c_envs = binding.vectorize(*self._env_handles) + + def reset(self, seed=None): + self.tick = 0 + binding.vec_reset(self.c_envs, seed if seed else 0) + return self.observations, [] + + def step(self, actions): + self.actions[:] = actions + + self.tick += 1 + binding.vec_step(self.c_envs) + + # Auto-render if render_mode is 'human' (Gymnasium convention) + if self.render_mode == 'human': + self.render() + if self.render_fps: + time.sleep(1.0 / self.render_fps) + + info = [] + if self.tick % self.report_interval == 0: + log_data = binding.vec_log(self.c_envs) + if log_data: + info.append(log_data) + + return (self.observations, self.rewards, self.terminals, self.truncations, info) + + def render(self): + binding.vec_render(self.c_envs, 0) + + def close(self): + binding.vec_close(self.c_envs) + + def force_state( + self, + env_idx=0, + player_pos=None, # (x, y, z) tuple, default (0, 0, 1000) + player_vel=None, # (vx, vy, vz) tuple, default (150, 0, 0) + player_ori=None, # (w, x, y, z) quaternion, default (1, 0, 0, 0) = wings level + player_throttle=1.0, # [0, 1], default full throttle + opponent_pos=None, # (x, y, z) or None for auto (400m ahead) + opponent_vel=None, # (vx, vy, vz) or None for auto (match player) + opponent_ori=None, # (w, x, y, z) or None for auto (match player) + tick=0, + ): + """ + Force exact game state for testing/debugging. + + Usage: + env.force_state(player_pos=(-1500, 0, 1000), player_vel=(150, 0, 0)) + env.force_state(player_vel=(80, 0, 0)) # Just change velocity + """ + # Build kwargs for C binding + kwargs = {'tick': tick, 'p_throttle': player_throttle} + + # Player position + if player_pos is not None: + kwargs['p_px'], kwargs['p_py'], kwargs['p_pz'] = player_pos + + # Player velocity + if player_vel is not None: + kwargs['p_vx'], kwargs['p_vy'], kwargs['p_vz'] = player_vel + + # Player orientation + if player_ori is not None: + kwargs['p_ow'], kwargs['p_ox'], kwargs['p_oy'], kwargs['p_oz'] = player_ori + + # Opponent position (None = auto) + if opponent_pos is not None: + kwargs['o_px'], kwargs['o_py'], kwargs['o_pz'] = opponent_pos + + # Opponent velocity (None = auto) + if opponent_vel is not None: + kwargs['o_vx'], kwargs['o_vy'], kwargs['o_vz'] = opponent_vel + + # Opponent orientation (None = auto) + if opponent_ori is not None: + kwargs['o_ow'], kwargs['o_ox'], kwargs['o_oy'], kwargs['o_oz'] = opponent_ori + + # Call C binding with the specific env handle + binding.env_force_state(self._env_handles[env_idx], **kwargs) + + def get_state(self, env_idx=0): + """ + Get raw player state (independent of observation scheme). + + Returns dict with keys: + px, py, pz: Position + vx, vy, vz: Velocity + ow, ox, oy, oz: Orientation quaternion + up_x, up_y, up_z: Up vector (derived from quaternion) + fwd_x, fwd_y, fwd_z: Forward vector (derived from quaternion) + throttle: Current throttle + + Useful for physics tests that need exact state regardless of obs_scheme. + """ + return binding.env_get_state(self._env_handles[env_idx]) + + def set_autopilot( + self, + env_idx=0, + mode=AutopilotMode.STRAIGHT, + throttle=1.0, + bank_deg=30.0, + climb_rate=5.0, + ): + """ + Set autopilot mode for opponent aircraft. + + Args: + env_idx: Environment index, or None for all environments + mode: AutopilotMode constant (STRAIGHT, LEVEL, TURN_LEFT, etc.) + throttle: Target throttle [0, 1] + bank_deg: Bank angle for turn modes (degrees) + climb_rate: Target vertical velocity for climb/descend (m/s) + + Usage: + env.set_autopilot(mode=AutopilotMode.LEVEL) # Level flight, env 0 + env.set_autopilot(mode=AutopilotMode.TURN_RIGHT, bank_deg=45) # 45° right turn + env.set_autopilot(mode=AutopilotMode.RANDOM) # Randomize each episode + env.set_autopilot(env_idx=None, mode=AutopilotMode.RANDOM) # All envs + """ + if env_idx is None: + # Vectorized: set all envs at once + binding.vec_set_autopilot( + self.c_envs, + mode=mode, + throttle=throttle, + bank_deg=bank_deg, + climb_rate=climb_rate, + ) + else: + # Single env + binding.env_set_autopilot( + self._env_handles[env_idx], + mode=mode, + throttle=throttle, + bank_deg=bank_deg, + climb_rate=climb_rate, + ) + + def set_mode_weights(self, level=0.2, turn_left=0.2, turn_right=0.2, + climb=0.2, descend=0.2): + """ + Set probability weights for AP_RANDOM mode selection. + + Weights should sum to 1.0. Used for curriculum learning to bias + toward easier modes (e.g., LEVEL, STRAIGHT turns) early in training. + + Args: + level: Weight for AP_LEVEL (maintain altitude) + turn_left: Weight for AP_TURN_LEFT + turn_right: Weight for AP_TURN_RIGHT + climb: Weight for AP_CLIMB + descend: Weight for AP_DESCEND + """ + binding.vec_set_mode_weights( + self.c_envs, + level=level, turn_left=turn_left, turn_right=turn_right, + climb=climb, descend=descend, + ) + + def get_autopilot_mode(self, env_idx=0): + """Get current autopilot mode for an environment (for testing/debugging).""" + return binding.env_get_autopilot_mode(self._env_handles[env_idx]) + + def set_obs_highlight(self, indices, env_idx=0): + """ + Set which observations to highlight with red arrows in the visual display. + + Args: + indices: List of observation indices to highlight (e.g., [4, 5, 6] for pitch, roll, yaw) + env_idx: Environment index + + Usage: + env.set_obs_highlight([4, 5, 6]) # Highlight pitch, roll, yaw in scheme 0 + env.set_obs_highlight([]) # Clear highlights + """ + binding.env_set_obs_highlight(self._env_handles[env_idx], list(indices)) + + +def test_performance(timeout=10, atn_cache=1024): + env = Dogfight(num_envs=1000) + env.reset() + tick = 0 + + actions = [env.action_space.sample() for _ in range(atn_cache)] + + import time + start = time.time() + while time.time() - start < timeout: + atn = actions[tick % atn_cache] + env.step(atn) + tick += 1 + + print(f"SPS: {env.num_agents * tick / (time.time() - start)}") + + +if __name__ == "__main__": + test_performance() diff --git a/pufferlib/ocean/dogfight/dogfight_observations.h b/pufferlib/ocean/dogfight/dogfight_observations.h new file mode 100644 index 000000000..ce40126b7 --- /dev/null +++ b/pufferlib/ocean/dogfight/dogfight_observations.h @@ -0,0 +1,528 @@ +// dogfight_observations.h - Observation computation for dogfight environment +// Extracted from dogfight.h to reduce file size +// +// Contains: +// - compute_obs_angles() - Scheme 0: Spherical coordinates +// - compute_obs_pursuit() - Scheme 1: Energy-aware pursuit +// - compute_obs_realistic() - Scheme 2: Cockpit instruments +// - compute_obs_realistic_range() - Scheme 3: With explicit range +// - compute_obs_realistic_enemy_state() - Scheme 4: + enemy state +// - compute_obs_realistic_full() - Scheme 5: Full instrumentation +// - compute_observations() - Dispatcher + +#ifndef DOGFIGHT_OBSERVATIONS_H +#define DOGFIGHT_OBSERVATIONS_H + +// Requires: flightlib.h (Vec3, Quat, math), Dogfight struct defined before include + +// Scheme 0: Angles observations (spherical coordinates) +void compute_obs_angles(Dogfight *env) { + Plane *p = &env->player; + Plane *o = &env->opponent; + + Quat q_inv = {p->ori.w, -p->ori.x, -p->ori.y, -p->ori.z}; + + // Player Euler angles from quaternion + float pitch = asinf(clampf(2.0f * (p->ori.w * p->ori.y - p->ori.z * p->ori.x), -1.0f, 1.0f)); + float roll = atan2f(2.0f * (p->ori.w * p->ori.x + p->ori.y * p->ori.z), + 1.0f - 2.0f * (p->ori.x * p->ori.x + p->ori.y * p->ori.y)); + float yaw = atan2f(2.0f * (p->ori.w * p->ori.z + p->ori.x * p->ori.y), + 1.0f - 2.0f * (p->ori.y * p->ori.y + p->ori.z * p->ori.z)); + + // Target in body frame -> spherical + Vec3 rel_pos = sub3(o->pos, p->pos); + Vec3 rel_pos_body = quat_rotate(q_inv, rel_pos); + float dist = norm3(rel_pos); + + float azimuth = atan2f(rel_pos_body.y, rel_pos_body.x); // -pi to pi + float r_horiz = sqrtf(rel_pos_body.x * rel_pos_body.x + rel_pos_body.y * rel_pos_body.y); + float elevation = atan2f(rel_pos_body.z, fmaxf(r_horiz, 1e-6f)); // -pi/2 to pi/2 + + // Closing rate + Vec3 rel_vel = sub3(p->vel, o->vel); + float closing_rate = dot3(rel_vel, normalize3(rel_pos)); + + // Opponent heading relative to player + Vec3 opp_fwd = quat_rotate(o->ori, vec3(1, 0, 0)); + Vec3 opp_fwd_body = quat_rotate(q_inv, opp_fwd); + float opp_heading = atan2f(opp_fwd_body.y, opp_fwd_body.x); + + int i = 0; + // Player state (clamped to [-1,1] in case plane is near OOB) + env->observations[i++] = clampf(p->pos.x * INV_WORLD_HALF_X, -1.0f, 1.0f); + env->observations[i++] = clampf(p->pos.y * INV_WORLD_HALF_Y, -1.0f, 1.0f); + env->observations[i++] = clampf(p->pos.z * INV_WORLD_MAX_Z, 0.0f, 1.0f); + env->observations[i++] = clampf(norm3(p->vel) * INV_MAX_SPEED, 0.0f, 1.0f); // Speed scalar + env->observations[i++] = pitch * INV_PI; // -0.5 to 0.5 + env->observations[i++] = roll * INV_PI; // -1 to 1 + env->observations[i++] = yaw * INV_PI; // -1 to 1 + + // Target angles + env->observations[i++] = azimuth * INV_PI; // -1 to 1 + env->observations[i++] = elevation * INV_HALF_PI; // -1 to 1 + env->observations[i++] = clampf(dist * INV_GUN_RANGE, 0.0f, 2.0f) - 1.0f; // [-1,1] + env->observations[i++] = clampf(closing_rate * INV_MAX_SPEED, -1.0f, 1.0f); // Clamped to [-1,1] + + // Opponent info + env->observations[i++] = opp_heading * INV_PI; // -1 to 1 + // OBS_SIZE = 12 +} + +// Scheme 1: OBS_PURSUIT - Energy-aware pursuit observations (13 obs) +// Better than old OBS_CONTROL_ERROR: no spoon-feeding of control errors, +// instead provides body-frame target info and energy state for learning pursuit +void compute_obs_pursuit(Dogfight *env) { + Plane *p = &env->player; + Plane *o = &env->opponent; + + Quat q_inv = {p->ori.w, -p->ori.x, -p->ori.y, -p->ori.z}; + + // Own Euler angles + float pitch = asinf(clampf(2.0f * (p->ori.w * p->ori.y - p->ori.z * p->ori.x), -1.0f, 1.0f)); + float roll = atan2f(2.0f * (p->ori.w * p->ori.x + p->ori.y * p->ori.z), + 1.0f - 2.0f * (p->ori.x * p->ori.x + p->ori.y * p->ori.y)); + + // Own energy state: (potential + kinetic) / 2, normalized to [0,1] + float speed = norm3(p->vel); + float alt = p->pos.z; + float potential = alt * INV_WORLD_MAX_Z; + float kinetic = (speed * speed) / (MAX_SPEED * MAX_SPEED); + float own_energy = (potential + kinetic) * 0.5f; + + // Target in body frame + Vec3 rel_pos = sub3(o->pos, p->pos); + Vec3 rel_pos_body = quat_rotate(q_inv, rel_pos); + float dist = norm3(rel_pos); + + float target_az = atan2f(rel_pos_body.y, rel_pos_body.x); + float r_horiz = sqrtf(rel_pos_body.x * rel_pos_body.x + rel_pos_body.y * rel_pos_body.y); + float target_el = atan2f(rel_pos_body.z, fmaxf(r_horiz, 1e-6f)); + + // Closure rate + Vec3 rel_vel = sub3(p->vel, o->vel); + float closure = dot3(rel_vel, normalize3(rel_pos)); + + // Target Euler angles + float target_pitch = asinf(clampf(2.0f * (o->ori.w * o->ori.y - o->ori.z * o->ori.x), -1.0f, 1.0f)); + float target_roll = atan2f(2.0f * (o->ori.w * o->ori.x + o->ori.y * o->ori.z), + 1.0f - 2.0f * (o->ori.x * o->ori.x + o->ori.y * o->ori.y)); + + // Target aspect (head-on vs tail) + Vec3 opp_fwd = quat_rotate(o->ori, vec3(1, 0, 0)); + Vec3 to_player = normalize3(sub3(p->pos, o->pos)); + float target_aspect = dot3(opp_fwd, to_player); + + // Target energy + float opp_speed = norm3(o->vel); + float opp_alt = o->pos.z; + float opp_potential = opp_alt * INV_WORLD_MAX_Z; + float opp_kinetic = (opp_speed * opp_speed) / (MAX_SPEED * MAX_SPEED); + float opp_energy = (opp_potential + opp_kinetic) * 0.5f; + + // Energy advantage + float energy_advantage = clampf(own_energy - opp_energy, -1.0f, 1.0f); + + int i = 0; + // Own flight state (5 obs) + env->observations[i++] = clampf(speed * INV_MAX_SPEED, 0.0f, 1.0f); + env->observations[i++] = potential; + env->observations[i++] = pitch * INV_HALF_PI; + env->observations[i++] = roll * INV_PI; + env->observations[i++] = own_energy; + + // Target position in body frame (4 obs) + env->observations[i++] = target_az * INV_PI; + env->observations[i++] = target_el * INV_HALF_PI; + env->observations[i++] = clampf(dist * INV_GUN_RANGE, 0.0f, 2.0f) - 1.0f; + env->observations[i++] = clampf(closure * INV_MAX_SPEED, -1.0f, 1.0f); + + // Target state (3 obs) + env->observations[i++] = target_roll * INV_PI; + env->observations[i++] = target_pitch * INV_HALF_PI; + env->observations[i++] = target_aspect; + + // Energy comparison (1 obs) + env->observations[i++] = energy_advantage; + // OBS_SIZE = 13 +} + +// Scheme 2: Realistic cockpit instruments only +void compute_obs_realistic(Dogfight *env) { + Plane *p = &env->player; + Plane *o = &env->opponent; + + Quat q_inv = {p->ori.w, -p->ori.x, -p->ori.y, -p->ori.z}; + + // Player Euler angles + float pitch = asinf(clampf(2.0f * (p->ori.w * p->ori.y - p->ori.z * p->ori.x), -1.0f, 1.0f)); + float roll = atan2f(2.0f * (p->ori.w * p->ori.x + p->ori.y * p->ori.z), + 1.0f - 2.0f * (p->ori.x * p->ori.x + p->ori.y * p->ori.y)); + + // Target in body frame for gunsight + Vec3 rel_pos = sub3(o->pos, p->pos); + Vec3 rel_pos_body = quat_rotate(q_inv, rel_pos); + float dist = norm3(rel_pos); + + float target_az = atan2f(rel_pos_body.y, rel_pos_body.x); + float r_horiz = sqrtf(rel_pos_body.x * rel_pos_body.x + rel_pos_body.y * rel_pos_body.y); + float target_el = atan2f(rel_pos_body.z, fmaxf(r_horiz, 1e-6f)); + + // Target apparent size (larger when closer) + float target_size = 20.0f / fmaxf(dist, 10.0f); // ~wingspan/distance + + // Opponent aspect (are they facing toward/away from us?) + Vec3 opp_fwd = quat_rotate(o->ori, vec3(1, 0, 0)); + Vec3 to_player = normalize3(sub3(p->pos, o->pos)); + float target_aspect = dot3(opp_fwd, to_player); // 1 = head-on, -1 = tail + + // Horizon visible (is up vector pointing up?) + Vec3 up = quat_rotate(p->ori, vec3(0, 0, 1)); + float horizon_visible = up.z; // 1 = level, 0 = knife-edge, -1 = inverted + + int i = 0; + // Instruments (4 obs) + env->observations[i++] = clampf(norm3(p->vel) * INV_MAX_SPEED, 0.0f, 1.0f); // Airspeed + env->observations[i++] = p->pos.z * INV_WORLD_MAX_Z; // Altitude + env->observations[i++] = pitch * INV_HALF_PI; // Pitch indicator + env->observations[i++] = roll * INV_PI; // Bank indicator + + // Gunsight (3 obs) + env->observations[i++] = target_az * INV_PI; // Target azimuth in sight + env->observations[i++] = target_el * INV_HALF_PI; // Target elevation in sight + env->observations[i++] = clampf(target_size, 0.0f, 2.0f) - 1.0f; // Target size + + // Visual cues (3 obs) + env->observations[i++] = target_aspect; // -1 to 1 + env->observations[i++] = horizon_visible; // -1 to 1 + env->observations[i++] = clampf(dist * INV_GUN_RANGE, 0.0f, 2.0f) - 1.0f; // Distance estimate + // OBS_SIZE = 10 +} + +// Scheme 3: REALISTIC with explicit range (10 obs) +// Like REALISTIC but with km range + closure rate instead of target_size + distance_estimate +void compute_obs_realistic_range(Dogfight *env) { + Plane *p = &env->player; + Plane *o = &env->opponent; + + Quat q_inv = {p->ori.w, -p->ori.x, -p->ori.y, -p->ori.z}; + + // Player Euler angles + float pitch = asinf(clampf(2.0f * (p->ori.w * p->ori.y - p->ori.z * p->ori.x), -1.0f, 1.0f)); + float roll = atan2f(2.0f * (p->ori.w * p->ori.x + p->ori.y * p->ori.z), + 1.0f - 2.0f * (p->ori.x * p->ori.x + p->ori.y * p->ori.y)); + + // Target in body frame for gunsight + Vec3 rel_pos = sub3(o->pos, p->pos); + Vec3 rel_pos_body = quat_rotate(q_inv, rel_pos); + float dist = norm3(rel_pos); + + float target_az = atan2f(rel_pos_body.y, rel_pos_body.x); + float r_horiz = sqrtf(rel_pos_body.x * rel_pos_body.x + rel_pos_body.y * rel_pos_body.y); + float target_el = atan2f(rel_pos_body.z, fmaxf(r_horiz, 1e-6f)); + + // Range in km (0 = point blank, 0.5 = 1km, 1.0 = 2km+) + float range_km = clampf(dist / 2000.0f, 0.0f, 1.0f); + + // Opponent aspect (are they facing toward/away from us?) + Vec3 opp_fwd = quat_rotate(o->ori, vec3(1, 0, 0)); + Vec3 to_player = normalize3(sub3(p->pos, o->pos)); + float target_aspect = dot3(opp_fwd, to_player); // 1 = head-on, -1 = tail + + // Horizon visible (is up vector pointing up?) + Vec3 up = quat_rotate(p->ori, vec3(0, 0, 1)); + float horizon_visible = up.z; // 1 = level, 0 = knife-edge, -1 = inverted + + // Closure rate (positive = closing) + Vec3 rel_vel = sub3(p->vel, o->vel); + float closure_rate = dot3(rel_vel, normalize3(rel_pos)); + + int i = 0; + // Instruments (4 obs) + env->observations[i++] = clampf(norm3(p->vel) * INV_MAX_SPEED, 0.0f, 1.0f); // Airspeed + env->observations[i++] = p->pos.z * INV_WORLD_MAX_Z; // Altitude + env->observations[i++] = pitch * INV_HALF_PI; // Pitch indicator + env->observations[i++] = roll * INV_PI; // Bank indicator + + // Gunsight (3 obs) + env->observations[i++] = target_az * INV_PI; // Target azimuth in sight + env->observations[i++] = target_el * INV_HALF_PI; // Target elevation in sight + env->observations[i++] = range_km; // Range: 0=close, 1=2km+ + + // Visual cues (3 obs) + env->observations[i++] = target_aspect; // -1 to 1 + env->observations[i++] = horizon_visible; // -1 to 1 + env->observations[i++] = clampf(closure_rate * INV_MAX_SPEED, -1.0f, 1.0f); // Closure rate + // OBS_SIZE = 10 +} + +// Scheme 4: REALISTIC_ENEMY_STATE (13 obs) +// REALISTIC_RANGE + enemy pitch/roll/heading +void compute_obs_realistic_enemy_state(Dogfight *env) { + Plane *p = &env->player; + Plane *o = &env->opponent; + + Quat q_inv = {p->ori.w, -p->ori.x, -p->ori.y, -p->ori.z}; + + // Player Euler angles + float pitch = asinf(clampf(2.0f * (p->ori.w * p->ori.y - p->ori.z * p->ori.x), -1.0f, 1.0f)); + float roll = atan2f(2.0f * (p->ori.w * p->ori.x + p->ori.y * p->ori.z), + 1.0f - 2.0f * (p->ori.x * p->ori.x + p->ori.y * p->ori.y)); + + // Target in body frame for gunsight + Vec3 rel_pos = sub3(o->pos, p->pos); + Vec3 rel_pos_body = quat_rotate(q_inv, rel_pos); + float dist = norm3(rel_pos); + + float target_az = atan2f(rel_pos_body.y, rel_pos_body.x); + float r_horiz = sqrtf(rel_pos_body.x * rel_pos_body.x + rel_pos_body.y * rel_pos_body.y); + float target_el = atan2f(rel_pos_body.z, fmaxf(r_horiz, 1e-6f)); + + // Range in km + float range_km = clampf(dist / 2000.0f, 0.0f, 1.0f); + + // Opponent aspect + Vec3 opp_fwd = quat_rotate(o->ori, vec3(1, 0, 0)); + Vec3 to_player = normalize3(sub3(p->pos, o->pos)); + float target_aspect = dot3(opp_fwd, to_player); + + // Horizon visible + Vec3 up = quat_rotate(p->ori, vec3(0, 0, 1)); + float horizon_visible = up.z; + + // Closure rate + Vec3 rel_vel = sub3(p->vel, o->vel); + float closure_rate = dot3(rel_vel, normalize3(rel_pos)); + + // Enemy Euler angles (relative to horizon) + float enemy_pitch = asinf(clampf(2.0f * (o->ori.w * o->ori.y - o->ori.z * o->ori.x), -1.0f, 1.0f)); + float enemy_roll = atan2f(2.0f * (o->ori.w * o->ori.x + o->ori.y * o->ori.z), + 1.0f - 2.0f * (o->ori.x * o->ori.x + o->ori.y * o->ori.y)); + + // Enemy heading relative to player (+1 = pointing at player, -1 = pointing away) + float enemy_heading_rel = target_aspect; // Already computed as dot(opp_fwd, to_player) + + int i = 0; + // Instruments (4 obs) + env->observations[i++] = clampf(norm3(p->vel) * INV_MAX_SPEED, 0.0f, 1.0f); + env->observations[i++] = p->pos.z * INV_WORLD_MAX_Z; + env->observations[i++] = pitch * INV_HALF_PI; + env->observations[i++] = roll * INV_PI; + + // Gunsight (3 obs) + env->observations[i++] = target_az * INV_PI; + env->observations[i++] = target_el * INV_HALF_PI; + env->observations[i++] = range_km; + + // Visual cues (3 obs) + env->observations[i++] = target_aspect; + env->observations[i++] = horizon_visible; + env->observations[i++] = clampf(closure_rate * INV_MAX_SPEED, -1.0f, 1.0f); + + // Enemy state (3 obs) - NEW + env->observations[i++] = enemy_pitch * INV_HALF_PI; // Enemy nose angle vs horizon + env->observations[i++] = enemy_roll * INV_PI; // Enemy bank angle vs horizon + env->observations[i++] = enemy_heading_rel; // Pointing toward/away + // OBS_SIZE = 13 +} + +// Scheme 5: REALISTIC_FULL (15 obs) +// REALISTIC_ENEMY_STATE + turn rate + G-loading +void compute_obs_realistic_full(Dogfight *env) { + Plane *p = &env->player; + Plane *o = &env->opponent; + + Quat q_inv = {p->ori.w, -p->ori.x, -p->ori.y, -p->ori.z}; + + // Player Euler angles + float pitch = asinf(clampf(2.0f * (p->ori.w * p->ori.y - p->ori.z * p->ori.x), -1.0f, 1.0f)); + float roll = atan2f(2.0f * (p->ori.w * p->ori.x + p->ori.y * p->ori.z), + 1.0f - 2.0f * (p->ori.x * p->ori.x + p->ori.y * p->ori.y)); + + // Target in body frame for gunsight + Vec3 rel_pos = sub3(o->pos, p->pos); + Vec3 rel_pos_body = quat_rotate(q_inv, rel_pos); + float dist = norm3(rel_pos); + + float target_az = atan2f(rel_pos_body.y, rel_pos_body.x); + float r_horiz = sqrtf(rel_pos_body.x * rel_pos_body.x + rel_pos_body.y * rel_pos_body.y); + float target_el = atan2f(rel_pos_body.z, fmaxf(r_horiz, 1e-6f)); + + // Range in km + float range_km = clampf(dist / 2000.0f, 0.0f, 1.0f); + + // Opponent aspect + Vec3 opp_fwd = quat_rotate(o->ori, vec3(1, 0, 0)); + Vec3 to_player = normalize3(sub3(p->pos, o->pos)); + float target_aspect = dot3(opp_fwd, to_player); + + // Horizon visible + Vec3 up = quat_rotate(p->ori, vec3(0, 0, 1)); + float horizon_visible = up.z; + + // Closure rate + Vec3 rel_vel = sub3(p->vel, o->vel); + float closure_rate = dot3(rel_vel, normalize3(rel_pos)); + + // Enemy Euler angles + float enemy_pitch = asinf(clampf(2.0f * (o->ori.w * o->ori.y - o->ori.z * o->ori.x), -1.0f, 1.0f)); + float enemy_roll = atan2f(2.0f * (o->ori.w * o->ori.x + o->ori.y * o->ori.z), + 1.0f - 2.0f * (o->ori.x * o->ori.x + o->ori.y * o->ori.y)); + float enemy_heading_rel = target_aspect; + + // Turn rate from velocity change + float speed = norm3(p->vel); + float turn_rate_actual = 0.0f; + if (speed > 10.0f) { + Vec3 accel = mul3(sub3(p->vel, p->prev_vel), 1.0f / DT); + Vec3 vel_dir = mul3(p->vel, 1.0f / speed); + float accel_forward = dot3(accel, vel_dir); + Vec3 accel_centripetal = sub3(accel, mul3(vel_dir, accel_forward)); + float centripetal_mag = norm3(accel_centripetal); + turn_rate_actual = centripetal_mag / speed; // omega = a/v + } + // Normalize turn rate: max ~0.5 rad/s (29 deg/s) for sustained turn + float turn_rate_norm = clampf(turn_rate_actual / 0.5f, -1.0f, 1.0f); + + // G-loading: use physics-accurate p->g_force (aerodynamic forces) + // Range: -1.5 to +6.0 G, normalize so 1G = 0, 6G = 1, -1.5G = -0.5 + float g_loading_norm = clampf((p->g_force - 1.0f) / 5.0f, -0.5f, 1.0f); + + int i = 0; + // Instruments (4 obs) + env->observations[i++] = clampf(speed * INV_MAX_SPEED, 0.0f, 1.0f); + env->observations[i++] = p->pos.z * INV_WORLD_MAX_Z; + env->observations[i++] = pitch * INV_HALF_PI; + env->observations[i++] = roll * INV_PI; + + // Gunsight (3 obs) + env->observations[i++] = target_az * INV_PI; + env->observations[i++] = target_el * INV_HALF_PI; + env->observations[i++] = range_km; + + // Visual cues (3 obs) + env->observations[i++] = target_aspect; + env->observations[i++] = horizon_visible; + env->observations[i++] = clampf(closure_rate * INV_MAX_SPEED, -1.0f, 1.0f); + + // Enemy state (3 obs) + env->observations[i++] = enemy_pitch * INV_HALF_PI; + env->observations[i++] = enemy_roll * INV_PI; + env->observations[i++] = enemy_heading_rel; + + // Own state (2 obs) - NEW + env->observations[i++] = turn_rate_norm; // How fast am I turning? + env->observations[i++] = g_loading_norm; // How hard am I pulling? + // OBS_SIZE = 15 +} + +// Dispatcher function +void compute_observations(Dogfight *env) { + switch (env->obs_scheme) { + case OBS_ANGLES: compute_obs_angles(env); break; + case OBS_PURSUIT: compute_obs_pursuit(env); break; + case OBS_REALISTIC: compute_obs_realistic(env); break; + case OBS_REALISTIC_RANGE: compute_obs_realistic_range(env); break; + case OBS_REALISTIC_ENEMY_STATE: compute_obs_realistic_enemy_state(env); break; + case OBS_REALISTIC_FULL: compute_obs_realistic_full(env); break; + default: compute_obs_angles(env); break; + } +} + +// Print observations for DEBUG level 5 +// Output format: [idx] name = +0.640 [0,1] or [-1,1] +#if DEBUG >= 5 + +// Observation labels for DEBUG printing (same as dogfight_render.h for HUD) +// Scheme 0: OBS_ANGLES (12 obs) +static const char* DEBUG_OBS_LABELS_ANGLES[12] = { + "px", "py", "pz", "speed", "pitch", "roll", "yaw", + "tgt_az", "tgt_el", "dist", "closure", "opp_hdg" +}; + +// Scheme 1: OBS_PURSUIT (13 obs) +static const char* DEBUG_OBS_LABELS_PURSUIT[13] = { + "speed", "potential", "pitch", "roll", "energy", + "tgt_az", "tgt_el", "dist", "closure", + "tgt_roll", "tgt_pitch", "aspect", "E_adv" +}; + +// Scheme 2: OBS_REALISTIC (10 obs) +static const char* DEBUG_OBS_LABELS_REALISTIC[10] = { + "airspeed", "altitude", "pitch", "roll", + "tgt_az", "tgt_el", "tgt_size", + "aspect", "horizon", "dist" +}; + +// Scheme 3: OBS_REALISTIC_RANGE (10 obs) +static const char* DEBUG_OBS_LABELS_REALISTIC_RANGE[10] = { + "airspeed", "altitude", "pitch", "roll", + "tgt_az", "tgt_el", "range_km", + "aspect", "horizon", "closure" +}; + +// Scheme 4: OBS_REALISTIC_ENEMY_STATE (13 obs) +static const char* DEBUG_OBS_LABELS_REALISTIC_ENEMY_STATE[13] = { + "airspeed", "altitude", "pitch", "roll", + "tgt_az", "tgt_el", "range_km", + "aspect", "horizon", "closure", + "emy_pitch", "emy_roll", "emy_hdg" +}; + +// Scheme 5: OBS_REALISTIC_FULL (15 obs) +static const char* DEBUG_OBS_LABELS_REALISTIC_FULL[15] = { + "airspeed", "altitude", "pitch", "roll", + "tgt_az", "tgt_el", "range_km", + "aspect", "horizon", "closure", + "emy_pitch", "emy_roll", "emy_hdg", + "turn_rate", "g_load" +}; +void print_observations(Dogfight *env) { + const char** labels = NULL; + int num_obs = env->obs_size; + + // Select labels based on scheme + switch (env->obs_scheme) { + case OBS_ANGLES: labels = DEBUG_OBS_LABELS_ANGLES; break; + case OBS_PURSUIT: labels = DEBUG_OBS_LABELS_PURSUIT; break; + case OBS_REALISTIC: labels = DEBUG_OBS_LABELS_REALISTIC; break; + case OBS_REALISTIC_RANGE: labels = DEBUG_OBS_LABELS_REALISTIC_RANGE; break; + case OBS_REALISTIC_ENEMY_STATE: labels = DEBUG_OBS_LABELS_REALISTIC_ENEMY_STATE; break; + case OBS_REALISTIC_FULL: labels = DEBUG_OBS_LABELS_REALISTIC_FULL; break; + default: labels = DEBUG_OBS_LABELS_ANGLES; break; + } + + printf("=== OBS (scheme %d, %d obs) ===\n", env->obs_scheme, num_obs); + + for (int i = 0; i < num_obs; i++) { + float val = env->observations[i]; + + // Determine range based on scheme and index + // [0,1] range: speed, potential, energy, airspeed, altitude, range_km + // [-1,1] range: everything else + bool is_01 = false; + switch (env->obs_scheme) { + case OBS_ANGLES: + is_01 = (i == 3); // speed + break; + case OBS_PURSUIT: + is_01 = (i == 0 || i == 1 || i == 4); // speed, potential, energy + break; + case OBS_REALISTIC: + case OBS_REALISTIC_RANGE: + case OBS_REALISTIC_ENEMY_STATE: + case OBS_REALISTIC_FULL: + is_01 = (i == 0 || i == 1); // airspeed, altitude + // Also range_km (index 6) is [0,1] for schemes 3-5 + if (env->obs_scheme != OBS_REALISTIC && i == 6) is_01 = true; + break; + default: + break; + } + + const char* range_str = is_01 ? "[0,1]" : "[-1,1]"; + printf("[%2d] %-10s = %+.3f %s\n", i, labels[i], val, range_str); + } +} +#endif // DEBUG >= 5 + +#endif // DOGFIGHT_OBSERVATIONS_H diff --git a/pufferlib/ocean/dogfight/dogfight_render.h b/pufferlib/ocean/dogfight/dogfight_render.h new file mode 100644 index 000000000..72a93f526 --- /dev/null +++ b/pufferlib/ocean/dogfight/dogfight_render.h @@ -0,0 +1,399 @@ +// dogfight_render.h - Rendering functions for dogfight environment +// Extracted from dogfight.h to reduce file size +// +// Contains: +// - draw_plane_shape() - 3D wireframe airplane +// - handle_camera_controls() - Mouse orbit/zoom +// - draw_obs_bar() - Single observation bar +// - draw_obs_monitor() - Full observation HUD +// - c_render() - Main render loop +// - c_close() - Cleanup + +#ifndef DOGFIGHT_RENDER_H +#define DOGFIGHT_RENDER_H + +// Requires: raylib.h, rlgl.h, flightlib.h (Vec3, Quat), Dogfight struct + +// Observation labels for each scheme (for HUD display) +// Scheme 0: OBS_ANGLES (12 obs) +static const char* OBS_LABELS_ANGLES[12] = { + "px", "py", "pz", "speed", "pitch", "roll", "yaw", + "tgt_az", "tgt_el", "dist", "closure", "opp_hdg" +}; + +// Scheme 1: OBS_PURSUIT (13 obs) +static const char* OBS_LABELS_PURSUIT[13] = { + "speed", "potential", "pitch", "roll", "energy", + "tgt_az", "tgt_el", "dist", "closure", + "tgt_roll", "tgt_pitch", "aspect", "E_adv" +}; + +// Scheme 2: OBS_REALISTIC (10 obs) +static const char* OBS_LABELS_REALISTIC[10] = { + "airspeed", "altitude", "pitch", "roll", + "tgt_az", "tgt_el", "tgt_size", + "aspect", "horizon", "dist" +}; + +// Scheme 3: OBS_REALISTIC_RANGE (10 obs) +static const char* OBS_LABELS_REALISTIC_RANGE[10] = { + "airspeed", "altitude", "pitch", "roll", + "tgt_az", "tgt_el", "range_km", + "aspect", "horizon", "closure" +}; + +// Scheme 4: OBS_REALISTIC_ENEMY_STATE (13 obs) +static const char* OBS_LABELS_REALISTIC_ENEMY_STATE[13] = { + "airspeed", "altitude", "pitch", "roll", + "tgt_az", "tgt_el", "range_km", + "aspect", "horizon", "closure", + "emy_pitch", "emy_roll", "emy_hdg" +}; + +// Scheme 5: OBS_REALISTIC_FULL (15 obs) +static const char* OBS_LABELS_REALISTIC_FULL[15] = { + "airspeed", "altitude", "pitch", "roll", + "tgt_az", "tgt_el", "range_km", + "aspect", "horizon", "closure", + "emy_pitch", "emy_roll", "emy_hdg", + "turn_rate", "g_load" +}; + +// Draw airplane shape using lines - shows roll/pitch/yaw clearly +// Body frame: X=forward, Y=right, Z=up +void draw_plane_shape(Vec3 pos, Quat ori, Color body_color, Color wing_color) { + // Body frame points (scaled for visibility: ~20m wingspan, ~25m length) + Vec3 nose = vec3(15, 0, 0); + Vec3 tail = vec3(-10, 0, 0); + Vec3 left_wing = vec3(0, -12, 0); + Vec3 right_wing = vec3(0, 12, 0); + Vec3 vtail_top = vec3(-8, 0, 8); // Vertical stabilizer + Vec3 htail_left = vec3(-10, -5, 0); // Horizontal stabilizer + Vec3 htail_right = vec3(-10, 5, 0); + + // Rotate all points by orientation and translate to world position + Vec3 nose_w = add3(pos, quat_rotate(ori, nose)); + Vec3 tail_w = add3(pos, quat_rotate(ori, tail)); + Vec3 lwing_w = add3(pos, quat_rotate(ori, left_wing)); + Vec3 rwing_w = add3(pos, quat_rotate(ori, right_wing)); + Vec3 vtop_w = add3(pos, quat_rotate(ori, vtail_top)); + Vec3 htl_w = add3(pos, quat_rotate(ori, htail_left)); + Vec3 htr_w = add3(pos, quat_rotate(ori, htail_right)); + + // Convert to Raylib Vector3 + Vector3 nose_r = {nose_w.x, nose_w.y, nose_w.z}; + Vector3 tail_r = {tail_w.x, tail_w.y, tail_w.z}; + Vector3 lwing_r = {lwing_w.x, lwing_w.y, lwing_w.z}; + Vector3 rwing_r = {rwing_w.x, rwing_w.y, rwing_w.z}; + Vector3 vtop_r = {vtop_w.x, vtop_w.y, vtop_w.z}; + Vector3 htl_r = {htl_w.x, htl_w.y, htl_w.z}; + Vector3 htr_r = {htr_w.x, htr_w.y, htr_w.z}; + + // Fuselage (nose to tail) + DrawLine3D(nose_r, tail_r, body_color); + + // Main wings (left to right, through center for visibility) + DrawLine3D(lwing_r, rwing_r, wing_color); + // Wing to fuselage connections (makes it look more solid) + DrawLine3D(lwing_r, nose_r, wing_color); + DrawLine3D(rwing_r, nose_r, wing_color); + + // Vertical stabilizer (tail to top) + DrawLine3D(tail_r, vtop_r, body_color); + + // Horizontal stabilizer + DrawLine3D(htl_r, htr_r, body_color); + DrawLine3D(htl_r, tail_r, body_color); + DrawLine3D(htr_r, tail_r, body_color); + + // Small sphere at nose to show front clearly + DrawSphere(nose_r, 2.0f, body_color); +} + +void handle_camera_controls(Client *c) { + Vector2 mouse = GetMousePosition(); + + if (IsMouseButtonPressed(MOUSE_BUTTON_LEFT)) { + c->is_dragging = true; + c->last_mouse_x = mouse.x; + c->last_mouse_y = mouse.y; + } + if (IsMouseButtonReleased(MOUSE_BUTTON_LEFT)) { + c->is_dragging = false; + } + + if (c->is_dragging) { + float sensitivity = 0.005f; + c->cam_azimuth -= (mouse.x - c->last_mouse_x) * sensitivity; + c->cam_elevation += (mouse.y - c->last_mouse_y) * sensitivity; + c->cam_elevation = clampf(c->cam_elevation, -1.4f, 1.4f); // prevent gimbal lock + c->last_mouse_x = mouse.x; + c->last_mouse_y = mouse.y; + } + + // Mouse wheel zoom + float wheel = GetMouseWheelMove(); + if (wheel != 0) { + c->cam_distance = clampf(c->cam_distance - wheel * 10.0f, 30.0f, 300.0f); + } +} + +// Draw a single observation bar +// x, y: top-left position +// label: observation name +// value: the observation value +// is_01_range: true for [0,1] range, false for [-1,1] range +void draw_obs_bar(int x, int y, const char* label, float value, bool is_01_range) { + // Draw label (fixed width) + DrawText(label, x, y, 14, WHITE); + + // Bar dimensions + int bar_x = x + 80; + int bar_w = 150; + int bar_h = 14; + + // Draw background + DrawRectangle(bar_x, y, bar_w, bar_h, DARKGRAY); + + // Calculate fill position + float norm_val; + int fill_x, fill_w; + + if (is_01_range) { + // [0, 1] range - fill from left + norm_val = clampf(value, 0.0f, 1.0f); + fill_x = bar_x; + fill_w = (int)(norm_val * bar_w); + } else { + // [-1, 1] range - fill from center + norm_val = clampf(value, -1.0f, 1.0f); + int center = bar_x + bar_w / 2; + if (norm_val >= 0) { + fill_x = center; + fill_w = (int)(norm_val * bar_w / 2); + } else { + fill_w = (int)(-norm_val * bar_w / 2); + fill_x = center - fill_w; + } + } + + // Color based on magnitude + Color fill_color = GREEN; + if (fabsf(value) > 0.9f) fill_color = YELLOW; + if (fabsf(value) > 1.0f) fill_color = RED; + + DrawRectangle(fill_x, y, fill_w, bar_h, fill_color); + + // Draw center line for [-1,1] range + if (!is_01_range) { + int center = bar_x + bar_w / 2; + DrawLine(center, y, center, y + bar_h, WHITE); + } + + // Draw value text + DrawText(TextFormat("%+.2f", value), bar_x + bar_w + 5, y, 14, WHITE); +} + +// Draw observation monitor showing all observation values as bars +void draw_obs_monitor(Dogfight *env) { + int start_x = 900; + int start_y = 10; + int row_height = 18; + + const char** labels = NULL; + int num_obs = env->obs_size; + + // Select labels based on scheme + switch (env->obs_scheme) { + case OBS_ANGLES: + labels = OBS_LABELS_ANGLES; + break; + case OBS_PURSUIT: + labels = OBS_LABELS_PURSUIT; + break; + case OBS_REALISTIC: + labels = OBS_LABELS_REALISTIC; + break; + case OBS_REALISTIC_RANGE: + labels = OBS_LABELS_REALISTIC_RANGE; + break; + case OBS_REALISTIC_ENEMY_STATE: + labels = OBS_LABELS_REALISTIC_ENEMY_STATE; + break; + case OBS_REALISTIC_FULL: + labels = OBS_LABELS_REALISTIC_FULL; + break; + default: + labels = OBS_LABELS_ANGLES; + break; + } + + // Title + DrawText(TextFormat("OBS (scheme %d)", env->obs_scheme), + start_x, start_y, 16, YELLOW); + start_y += 22; + + // Draw each observation bar + for (int i = 0; i < num_obs; i++) { + float val = env->observations[i]; + // Determine if this observation is [0,1] range + // Based on observation scheme and index: + // - Scheme 0 (ANGLES): index 3 (speed) is [0,1] + // - Scheme 1 (PURSUIT): indices 0 (speed), 1 (potential), 4 (energy) are [0,1] + // - Scheme 2-5 (REALISTIC*): indices 0 (airspeed), 1 (altitude) are [0,1] + bool is_01 = false; + switch (env->obs_scheme) { + case OBS_ANGLES: + is_01 = (i == 3); // speed + break; + case OBS_PURSUIT: + is_01 = (i == 0 || i == 1 || i == 4); // speed, potential, energy + break; + case OBS_REALISTIC: + case OBS_REALISTIC_RANGE: + case OBS_REALISTIC_ENEMY_STATE: + case OBS_REALISTIC_FULL: + is_01 = (i == 0 || i == 1); // airspeed, altitude + // Also range_km (index 6) is [0,1] + if (env->obs_scheme != OBS_REALISTIC && i == 6) is_01 = true; + break; + default: + break; + } + int y = start_y + i * row_height; + draw_obs_bar(start_x, y, labels[i], val, is_01); + + // Draw red arrow for highlighted observations + if (env->obs_highlight[i]) { + // Draw arrow pointing right at the label (triangle) + int arrow_x = start_x - 20; + int arrow_y = y + 7; // Center vertically + // Triangle pointing right: 3 points + DrawTriangle( + (Vector2){arrow_x, arrow_y - 5}, // Top + (Vector2){arrow_x, arrow_y + 5}, // Bottom + (Vector2){arrow_x + 12, arrow_y}, // Tip (right) + RED + ); + } + } +} + +void c_render(Dogfight *env) { + // 1. Lazy initialization + if (env->client == NULL) { + env->client = (Client *)calloc(1, sizeof(Client)); + env->client->width = 1280; + env->client->height = 720; + env->client->cam_distance = 80.0f; + env->client->cam_azimuth = 0.0f; + env->client->cam_elevation = 0.3f; + env->client->is_dragging = false; + + InitWindow(1280, 720, "Dogfight"); + SetTargetFPS(60); + + // Z-up coordinate system + env->client->camera.up = (Vector3){0.0f, 0.0f, 1.0f}; + env->client->camera.fovy = 45.0f; + env->client->camera.projection = CAMERA_PERSPECTIVE; + } + + // 2. Handle window close + if (WindowShouldClose() || IsKeyDown(KEY_ESCAPE)) { + c_close(env); + exit(0); + } + + // 3. Handle mouse controls for camera orbit + handle_camera_controls(env->client); + + // 4. Update chase camera + Plane *p = &env->player; + Vec3 fwd = quat_rotate(p->ori, vec3(1, 0, 0)); + float dist = env->client->cam_distance; + + // Apply orbit offsets from mouse drag + float az = env->client->cam_azimuth; + float el = env->client->cam_elevation; + + // Base chase position (behind and above player) + float cam_x = p->pos.x - fwd.x * dist * cosf(el) * cosf(az) + fwd.y * dist * sinf(az); + float cam_y = p->pos.y - fwd.y * dist * cosf(el) * cosf(az) - fwd.x * dist * sinf(az); + float cam_z = p->pos.z + dist * sinf(el) + 20.0f; + + env->client->camera.position = (Vector3){cam_x, cam_y, cam_z}; + env->client->camera.target = (Vector3){p->pos.x, p->pos.y, p->pos.z}; + + // 5. Begin drawing + BeginDrawing(); + ClearBackground((Color){6, 24, 24, 255}); // Dark blue-green sky + + // Set clip planes for long-range visibility (default far=1000 is too close) + rlSetClipPlanes(1.0, 10000.0); // near=1m, far=10km + BeginMode3D(env->client->camera); + + // 6. Draw ground plane at z=0 (XY plane, since we use Z-up) + // DrawPlane uses raylib's Y-up convention (XZ plane), so we draw triangles instead + Vector3 g1 = {-2000, -2000, 0}; + Vector3 g2 = {2000, -2000, 0}; + Vector3 g3 = {2000, 2000, 0}; + Vector3 g4 = {-2000, 2000, 0}; + Color ground_color = (Color){20, 60, 20, 255}; + DrawTriangle3D(g1, g2, g3, ground_color); + DrawTriangle3D(g1, g3, g4, ground_color); + + // 7. Draw world bounds wireframe + // Bounds: X +/-2000, Y +/-2000, Z 0-3000 -> center at (0, 0, 1500) + DrawCubeWires((Vector3){0, 0, 1500}, 4000, 4000, 3000, (Color){100, 100, 100, 255}); + + // 8. Draw player plane (cyan wireframe airplane) + Color cyan = {0, 255, 255, 255}; + Color light_cyan = {100, 255, 255, 255}; + draw_plane_shape(p->pos, p->ori, cyan, light_cyan); + + // 9. Draw opponent plane (red wireframe airplane) + Plane *o = &env->opponent; + draw_plane_shape(o->pos, o->ori, RED, ORANGE); + + // 10. Draw tracer when firing (cooldown just set = just fired) + if (p->fire_cooldown >= FIRE_COOLDOWN - 2) { // Show for 2 frames + Vec3 nose = add3(p->pos, quat_rotate(p->ori, vec3(15, 0, 0))); + Vec3 tracer_end = add3(p->pos, quat_rotate(p->ori, vec3(GUN_RANGE, 0, 0))); + Vector3 nose_r = {nose.x, nose.y, nose.z}; + Vector3 end_r = {tracer_end.x, tracer_end.y, tracer_end.z}; + DrawLine3D(nose_r, end_r, YELLOW); + } + + EndMode3D(); + + // 10. Draw HUD + float speed = norm3(p->vel); + float dist_to_opp = norm3(sub3(o->pos, p->pos)); + + DrawText(TextFormat("Speed: %.0f m/s", speed), 10, 10, 20, WHITE); + DrawText(TextFormat("Altitude: %.0f m", p->pos.z), 10, 40, 20, WHITE); + DrawText(TextFormat("Throttle: %.0f%%", p->throttle * 100.0f), 10, 70, 20, WHITE); + DrawText(TextFormat("Distance: %.0f m", dist_to_opp), 10, 100, 20, WHITE); + DrawText(TextFormat("Tick: %d / %d", env->tick, env->max_steps), 10, 130, 20, WHITE); + DrawText(TextFormat("Return: %.2f", env->episode_return), 10, 160, 20, WHITE); + DrawText(TextFormat("Perf: %.1f%% | Shots: %.0f", env->log.perf / fmaxf(env->log.n, 1.0f) * 100.0f, env->log.shots_fired), 10, 190, 20, YELLOW); + + // 11. Draw observation monitor (right side) + draw_obs_monitor(env); + + // Controls hint + DrawText("Mouse drag: Orbit | Scroll: Zoom | ESC: Exit", 10, (int)env->client->height - 30, 16, GRAY); + + EndDrawing(); +} + +void c_close(Dogfight *env) { + if (env->client != NULL) { + CloseWindow(); + free(env->client); + env->client = NULL; + } +} + +#endif // DOGFIGHT_RENDER_H diff --git a/pufferlib/ocean/dogfight/dogfight_test.c b/pufferlib/ocean/dogfight/dogfight_test.c new file mode 100644 index 000000000..24b1dc53c --- /dev/null +++ b/pufferlib/ocean/dogfight/dogfight_test.c @@ -0,0 +1,1450 @@ +#include +#include +#include +#include "dogfight.h" + +#define ASSERT_NEAR(a, b, eps) assert(fabs((a) - (b)) < (eps)) + +static float obs_buf[32]; // Enough for current and future obs +static float act_buf[5]; +static float rew_buf[1]; +static unsigned char term_buf[1]; + +static Dogfight make_env(int max_steps) { + Dogfight env = {0}; + env.observations = obs_buf; + env.actions = act_buf; + env.rewards = rew_buf; + env.terminals = term_buf; + env.max_steps = max_steps; + // df11: Simplified reward config (6 terms) + RewardConfig rcfg = { + .aim_scale = 0.05f, .closing_scale = 0.003f, + .neg_g = 0.02f, + .speed_min = 50.0f, + }; + init(&env, 0, &rcfg, 0, 0, 0.7f, 0); // curriculum_enabled=0 + return env; +} + +void test_vec3_math() { + Vec3 a = vec3(1, 2, 3); + Vec3 b = vec3(4, 5, 6); + + Vec3 sum = add3(a, b); + assert(sum.x == 5 && sum.y == 7 && sum.z == 9); + + Vec3 diff = sub3(b, a); + assert(diff.x == 3 && diff.y == 3 && diff.z == 3); + + Vec3 scaled = mul3(a, 2); + assert(scaled.x == 2 && scaled.y == 4 && scaled.z == 6); + + float d = dot3(a, b); + assert(d == 32); // 1*4 + 2*5 + 3*6 = 32 + + ASSERT_NEAR(norm3(vec3(3, 4, 0)), 5.0f, 1e-6f); + + printf("test_vec3_math PASS\n"); +} + +void test_quat_math() { + Quat identity = quat(1, 0, 0, 0); + Vec3 v = vec3(1, 0, 0); + Vec3 rotated = quat_rotate(identity, v); + ASSERT_NEAR(rotated.x, 1.0f, 1e-6f); + ASSERT_NEAR(rotated.y, 0.0f, 1e-6f); + ASSERT_NEAR(rotated.z, 0.0f, 1e-6f); + + // 90 degree rotation around Z axis + Quat rot_z = quat_from_axis_angle(vec3(0, 0, 1), PI / 2); + Vec3 v2 = quat_rotate(rot_z, vec3(1, 0, 0)); + ASSERT_NEAR(v2.x, 0.0f, 1e-5f); + ASSERT_NEAR(v2.y, 1.0f, 1e-5f); + ASSERT_NEAR(v2.z, 0.0f, 1e-5f); + + printf("test_quat_math PASS\n"); +} + +void test_init() { + Dogfight env = make_env(1000); + assert(env.tick == 0); + assert(env.episode_return == 0.0f); + assert(env.log.n == 0.0f); + assert(env.client == NULL); + printf("test_init PASS\n"); +} + +void test_reset_plane() { + Plane p; + Vec3 pos = vec3(100, 200, 300); + Vec3 vel = vec3(80, 0, 0); + reset_plane(&p, pos, vel); + + assert(p.pos.x == 100 && p.pos.y == 200 && p.pos.z == 300); + assert(p.vel.x == 80 && p.vel.y == 0 && p.vel.z == 0); + assert(p.ori.w == 1 && p.ori.x == 0 && p.ori.y == 0 && p.ori.z == 0); + assert(p.throttle == 0.5f); + + printf("test_reset_plane PASS\n"); +} + +void test_c_reset() { + Dogfight env = make_env(1000); + c_reset(&env); + + assert(env.tick == 0); + assert(env.episode_return == 0.0f); + + // Player spawned in bounds + assert(env.player.pos.x >= -500 && env.player.pos.x <= 500); + assert(env.player.pos.y >= -500 && env.player.pos.y <= 500); + assert(env.player.pos.z >= 500 && env.player.pos.z <= 1500); + + // Velocity set + assert(env.player.vel.x == 80); + + printf("test_c_reset PASS\n"); +} + +void test_compute_observations() { + // Tests ANGLES scheme (scheme 0, 12 obs) + Dogfight env = make_env(1000); + env.player.pos = vec3(1000, 500, 1500); + env.player.vel = vec3(125, 0, 0); + env.player.ori = quat(1, 0, 0, 0); // identity = facing +X, level + + compute_observations(&env); + + // ANGLES scheme layout: + // [0-2] pos normalized + ASSERT_NEAR(env.observations[0], 1000.0f / WORLD_HALF_X, 1e-6f); + ASSERT_NEAR(env.observations[1], 500.0f / WORLD_HALF_Y, 1e-6f); + ASSERT_NEAR(env.observations[2], 1500.0f / WORLD_MAX_Z, 1e-6f); + + // [3] speed normalized (scalar) + ASSERT_NEAR(env.observations[3], 125.0f / MAX_SPEED, 1e-6f); + + // [4-6] euler angles (all 0 for identity quaternion) + ASSERT_NEAR(env.observations[4], 0.0f, 1e-5f); // pitch / PI + ASSERT_NEAR(env.observations[5], 0.0f, 1e-5f); // roll / PI + ASSERT_NEAR(env.observations[6], 0.0f, 1e-5f); // yaw / PI + + // [7-11] target angles - depend on opponent position, check valid ranges + assert(env.observations[7] >= -1.0f && env.observations[7] <= 1.0f); // azimuth + assert(env.observations[8] >= -1.0f && env.observations[8] <= 1.0f); // elevation + assert(env.observations[9] >= -2.0f && env.observations[9] <= 2.0f); // distance + assert(env.observations[10] >= -1.0f && env.observations[10] <= 1.0f); // closing_rate + assert(env.observations[11] >= -1.0f && env.observations[11] <= 1.0f); // opp_heading + + printf("test_compute_observations PASS\n"); +} + +void test_c_step_moves_forward() { + Dogfight env = make_env(1000); + c_reset(&env); + + float initial_x = env.player.pos.x; + + // Set neutral actions for stable flight + env.actions[0] = 0.5f; // moderate throttle + env.actions[1] = 0.0f; + env.actions[2] = 0.0f; + env.actions[3] = 0.0f; + + c_step(&env); + + // With physics, plane should still move forward (roughly) + assert(env.player.pos.x > initial_x); + assert(env.tick == 1); + + printf("test_c_step_moves_forward PASS\n"); +} + +void test_oob_terminates() { + Dogfight env = make_env(1000); + c_reset(&env); + + // Place plane just past boundary + env.player.pos = vec3(WORLD_HALF_X + 1, 0, 1000); + env.player.vel = vec3(80, 0, 0); + + c_step(&env); + + assert(env.terminals[0] == 1); + assert(env.log.n == 1.0f); // Episode logged + + printf("test_oob_terminates PASS\n"); +} + +void test_max_steps_terminates() { + Dogfight env = make_env(5); + c_reset(&env); + + // Place plane in center, won't go OOB + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(10, 0, 0); // Slow + + for (int i = 0; i < 4; i++) { + c_step(&env); + assert(env.terminals[0] == 0); + } + + c_step(&env); // Step 5 should terminate + assert(env.terminals[0] == 1); + + printf("test_max_steps_terminates PASS\n"); +} + +void test_supersonic_terminates() { + Dogfight env = make_env(1000); + c_reset(&env); + + // Place player at 400 m/s (> 340 m/s limit) + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(400, 0, 0); // Supersonic! + env.player.ori = quat(1, 0, 0, 0); + env.opponent.pos = vec3(300, 0, 1000); + env.opponent.vel = vec3(80, 0, 0); + + c_step(&env); + + assert(env.terminals[0] == 1); + assert(env.rewards[0] == -1.0f); + + printf("test_supersonic_terminates PASS\n"); +} + +// Phase 2 tests + +void test_opponent_spawns() { + Dogfight env = make_env(1000); + c_reset(&env); + + // Opponent should exist and be ahead of player + float dx = env.opponent.pos.x - env.player.pos.x; + assert(dx >= 200 && dx <= 500); + assert(env.opponent.vel.x == 80); + + printf("test_opponent_spawns PASS\n"); +} + +void test_relative_observations() { + // Tests ANGLES scheme relative target info (azimuth, elevation, distance) + Dogfight env = make_env(1000); + c_reset(&env); + + // Place planes at known positions + // Player at origin facing +X, opponent directly ahead and slightly right/up + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(80, 0, 0); + env.player.ori = quat(1, 0, 0, 0); // identity = facing +X + env.opponent.pos = vec3(500, 100, 1050); // 500m ahead, 100m right, 50m up + env.opponent.vel = vec3(80, 0, 0); + env.opponent.ori = quat(1, 0, 0, 0); + + compute_observations(&env); + + // ANGLES scheme: relative position encoded as azimuth [7] and elevation [8] + // rel_pos in body frame = (500, 100, 50) since identity orientation + // azimuth = atan2(100, 500) / PI ≈ 0.063 + // elevation = atan2(50, sqrt(500^2+100^2)) / (PI/2) ≈ 0.062 + float azimuth = env.observations[7]; + float elevation = env.observations[8]; + float distance = env.observations[9]; + + // Azimuth should be small positive (opponent slightly right) + float expected_az = atan2f(100.0f, 500.0f) / PI; // ~0.063 + ASSERT_NEAR(azimuth, expected_az, 1e-4f); + + // Elevation should be small positive (opponent slightly above) + float r_horiz = sqrtf(500*500 + 100*100); + float expected_el = atan2f(50.0f, r_horiz) / (PI * 0.5f); // ~0.062 + ASSERT_NEAR(elevation, expected_el, 1e-4f); + + // Distance: sqrt(500^2 + 100^2 + 50^2) ≈ 512m, normalized to [-1,1] + float dist = sqrtf(500*500 + 100*100 + 50*50); + float expected_dist = clampf(dist / GUN_RANGE, 0.0f, 2.0f) - 1.0f; + ASSERT_NEAR(distance, expected_dist, 1e-4f); + + printf("test_relative_observations PASS\n"); +} + +void test_pursuit_reward() { + // df11: Test that being closer and on-target gives better reward + Dogfight env = make_env(1000); + c_reset(&env); + + // Place opponent far away (outside engagement envelope: > GUN_RANGE * 2 = 1000m) + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(100, 0, 0); // Flying forward + env.player.ori = quat(1, 0, 0, 0); // Facing +X + env.opponent.pos = vec3(1500, 0, 1000); // 1500m ahead - outside aim envelope + env.opponent.vel = vec3(100, 0, 0); + + c_step(&env); + float reward_far = env.rewards[0]; + + // Place opponent close (inside engagement envelope) + c_reset(&env); + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(100, 0, 0); + env.player.ori = quat(1, 0, 0, 0); + env.opponent.pos = vec3(300, 0, 1000); // 300m ahead - inside aim envelope + env.opponent.vel = vec3(100, 0, 0); + + c_step(&env); + float reward_close = env.rewards[0]; + + // Closer should give better reward due to aim bonus (within envelope) + assert(reward_close > reward_far); + + printf("test_pursuit_reward PASS\n"); +} + +// Phase 3 tests + +void test_aircraft_params() { + // Check that aircraft parameters are defined with reasonable values + assert(MASS > 0 && MASS < 10000); // kg, WW2 fighter ~2500-4000kg + assert(WING_AREA > 0 && WING_AREA < 100); // m², WW2 fighter ~15-25m² + assert(C_D0 > 0 && C_D0 < 0.1); // parasitic drag coef + assert(K > 0 && K < 0.5); // induced drag factor + assert(C_L_MAX > 0 && C_L_MAX < 2.0); // max lift coef + assert(C_L_ALPHA > 0 && C_L_ALPHA < 10); // lift slope ~5.7/rad + assert(ENGINE_POWER > 0); // watts + assert(GRAVITY > 9 && GRAVITY < 10); // m/s² + + printf("test_aircraft_params PASS\n"); +} + +void test_throttle_accelerates() { + Dogfight env = make_env(1000); + c_reset(&env); + + // Place plane level, flying forward + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(100, 0, 0); + env.player.ori = quat(1, 0, 0, 0); + + float speed_before = norm3(env.player.vel); + + // Full throttle + env.actions[0] = 1.0f; // throttle + env.actions[1] = 0.0f; // elevator + env.actions[2] = 0.0f; // ailerons + env.actions[3] = 0.0f; // rudder + + for (int i = 0; i < 50; i++) c_step(&env); + + float speed_after = norm3(env.player.vel); + + // With thrust, should accelerate (or at least maintain speed against drag) + assert(speed_after >= speed_before * 0.9f); + + printf("test_throttle_accelerates PASS\n"); +} + +void test_plane_falls_without_lift() { + Dogfight env = make_env(1000); + c_reset(&env); + + // Place plane with no forward velocity (stalled) + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(0, 0, 0); + env.player.ori = quat(1, 0, 0, 0); + + float z_before = env.player.pos.z; + + // Zero throttle + env.actions[0] = -1.0f; + env.actions[1] = 0.0f; + env.actions[2] = 0.0f; + env.actions[3] = 0.0f; + + for (int i = 0; i < 50; i++) c_step(&env); + + float z_after = env.player.pos.z; + + // Should fall due to gravity + assert(z_after < z_before); + // Should have fallen at least 0.5 * g * t² ≈ 0.5 * 10 * 1² = 5m in 1 sec + assert(z_before - z_after > 3.0f); + + printf("test_plane_falls_without_lift PASS\n"); +} + +void test_controls_affect_orientation() { + Dogfight env = make_env(1000); + + // Test pitch (elevator) + c_reset(&env); + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(100, 0, 0); + env.player.ori = quat(1, 0, 0, 0); + Quat ori_before = env.player.ori; + + env.actions[0] = 0.0f; + env.actions[1] = 1.0f; // full elevator (pitch) + env.actions[2] = 0.0f; + env.actions[3] = 0.0f; + + for (int i = 0; i < 10; i++) c_step(&env); + + // Orientation should have changed + float dot = ori_before.w * env.player.ori.w + + ori_before.x * env.player.ori.x + + ori_before.y * env.player.ori.y + + ori_before.z * env.player.ori.z; + assert(fabsf(dot) < 0.999f); // not identical + + // Test roll (ailerons) + c_reset(&env); + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(100, 0, 0); + env.player.ori = quat(1, 0, 0, 0); + ori_before = env.player.ori; + + env.actions[0] = 0.0f; + env.actions[1] = 0.0f; + env.actions[2] = 1.0f; // full ailerons (roll) + env.actions[3] = 0.0f; + + for (int i = 0; i < 10; i++) c_step(&env); + + dot = ori_before.w * env.player.ori.w + + ori_before.x * env.player.ori.x + + ori_before.y * env.player.ori.y + + ori_before.z * env.player.ori.z; + assert(fabsf(dot) < 0.999f); + + printf("test_controls_affect_orientation PASS\n"); +} + +void test_dynamic_pressure() { + // q = 0.5 * rho * V² + // At V=100 m/s: q = 0.5 * 1.225 * 10000 = 6125 Pa + float V = 100.0f; + float q = 0.5f * 1.225f * V * V; + ASSERT_NEAR(q, 6125.0f, 1.0f); + + printf("test_dynamic_pressure PASS\n"); +} + +void test_lift_opposes_gravity() { + Dogfight env = make_env(1000); + c_reset(&env); + + // Fly level at cruise speed with moderate throttle + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(120, 0, 0); // ~270 mph, reasonable cruise + env.player.ori = quat(1, 0, 0, 0); + + float z_before = env.player.pos.z; + + // Moderate throttle to maintain speed + env.actions[0] = 0.5f; + env.actions[1] = 0.0f; + env.actions[2] = 0.0f; + env.actions[3] = 0.0f; + + // Run for 1 second (50 steps at 0.02s) + for (int i = 0; i < 50; i++) c_step(&env); + + float z_after = env.player.pos.z; + + // With lift, altitude change should be much less than free fall + // Free fall: 0.5 * g * t² = 0.5 * 10 * 1 = 5m + // With lift: should lose less than 5m (ideally close to 0) + float dz = fabsf(z_after - z_before); + assert(dz < 50.0f); // generous tolerance for now + + printf("test_lift_opposes_gravity PASS\n"); +} + +void test_drag_slows_plane() { + Dogfight env = make_env(1000); + c_reset(&env); + + // High speed, zero throttle + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(200, 0, 0); + env.player.ori = quat(1, 0, 0, 0); + + float speed_before = norm3(env.player.vel); + + env.actions[0] = -1.0f; // zero throttle + env.actions[1] = 0.0f; + env.actions[2] = 0.0f; + env.actions[3] = 0.0f; + + for (int i = 0; i < 100; i++) c_step(&env); + + float speed_after = norm3(env.player.vel); + + // Drag should slow the plane + assert(speed_after < speed_before); + + printf("test_drag_slows_plane PASS\n"); +} + +void test_stall_clamps_lift() { + // Verify C_L clamping actually happens in physics + // A plane pointed straight up (90° alpha) should not get infinite lift + Dogfight env = make_env(1000); + c_reset(&env); + + // Flying forward at speed + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(100, 0, 0); + // Point nose straight up (90° pitch) + env.player.ori = quat_from_axis_angle(vec3(0, 1, 0), PI / 2); + + env.actions[0] = 0.5f; + env.actions[1] = 0.0f; + env.actions[2] = 0.0f; + env.actions[3] = 0.0f; + + float z_before = env.player.pos.z; + for (int i = 0; i < 25; i++) c_step(&env); + float z_after = env.player.pos.z; + + // With stall limiting, plane should NOT shoot up massively + // Max C_L = 1.4, at 100 m/s: L = 1.4 * 6125 * 22 = 188,650 N + // That's ~6.4g, so it can climb but not infinitely + // Should still fall or climb modestly, not go to space + assert(z_after - z_before < 500.0f); // reasonable bound + + printf("test_stall_clamps_lift PASS\n"); +} + +void test_glimit_clamps_acceleration() { + // Verify g-limit actually clamps extreme forces + Dogfight env = make_env(1000); + c_reset(&env); + + // High speed for lots of lift potential + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(200, 0, 0); + env.player.ori = quat(1, 0, 0, 0); + + // Full back stick to pitch up hard + env.actions[0] = 1.0f; + env.actions[1] = 1.0f; // full pitch + env.actions[2] = 0.0f; + env.actions[3] = 0.0f; + + for (int i = 0; i < 10; i++) c_step(&env); + + // Check that acceleration is bounded + // At 200 m/s, dynamic pressure is huge, but g-limit should cap it + // After pulling, vertical velocity should exist but not be insane + float vz = env.player.vel.z; + // At 8g for 0.2s: delta_v = 8 * 9.81 * 0.2 = ~16 m/s max vertical + assert(fabsf(vz) < 200.0f); // sanity check + + printf("test_glimit_clamps_acceleration PASS\n"); +} + +void test_forces_sum_correctly() { + // Test 3.17: verify all forces contribute to motion + Dogfight env = make_env(1000); + c_reset(&env); + + // Level flight at moderate speed + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(100, 0, 0); + env.player.ori = quat(1, 0, 0, 0); + + // No throttle - should slow down (drag) and fall (gravity > lift at zero alpha) + env.actions[0] = -1.0f; + env.actions[1] = 0.0f; + env.actions[2] = 0.0f; + env.actions[3] = 0.0f; + + float vx_before = env.player.vel.x; + float z_before = env.player.pos.z; + + for (int i = 0; i < 50; i++) c_step(&env); + + // Drag slows forward motion + assert(env.player.vel.x < vx_before); + // Gravity pulls down (at zero alpha, minimal lift) + assert(env.player.pos.z < z_before); + + printf("test_forces_sum_correctly PASS\n"); +} + +void test_integration_updates_state() { + // Test 3.18: verify Euler integration updates pos and vel + Dogfight env = make_env(1000); + c_reset(&env); + + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(100, 0, 0); + env.player.ori = quat(1, 0, 0, 0); + + Vec3 pos_before = env.player.pos; + Vec3 vel_before = env.player.vel; + + env.actions[0] = 0.5f; + env.actions[1] = 0.0f; + env.actions[2] = 0.0f; + env.actions[3] = 0.0f; + + c_step(&env); + + // Position should change (velocity integration) + assert(env.player.pos.x != pos_before.x || + env.player.pos.y != pos_before.y || + env.player.pos.z != pos_before.z); + + // Velocity should change (force integration) + assert(env.player.vel.x != vel_before.x || + env.player.vel.y != vel_before.y || + env.player.vel.z != vel_before.z); + + printf("test_integration_updates_state PASS\n"); +} + +// Phase 3.5 tests + +void test_closing_velocity_reward() { + Dogfight env = make_env(1000); + c_reset(&env); + + // Scenario 1: Player approaching opponent (closing) + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(100, 0, 0); // Moving toward opponent + env.opponent.pos = vec3(500, 0, 1000); + env.opponent.vel = vec3(50, 0, 0); // Moving slower + + c_step(&env); + float reward_closing = env.rewards[0]; + + // Scenario 2: Player moving away from opponent (opening) + c_reset(&env); + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(-100, 0, 0); // Moving away from opponent + env.opponent.pos = vec3(500, 0, 1000); + env.opponent.vel = vec3(50, 0, 0); + + c_step(&env); + float reward_opening = env.rewards[0]; + + // Closing should give better reward than opening + assert(reward_closing > reward_opening); + + printf("test_closing_velocity_reward PASS\n"); +} + +void test_tail_position_reward() { + Dogfight env = make_env(1000); + c_reset(&env); + + // Scenario 1: Player behind opponent (good position) + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(100, 0, 0); + env.player.ori = quat(1, 0, 0, 0); // Facing +X + env.opponent.pos = vec3(300, 0, 1000); + env.opponent.vel = vec3(100, 0, 0); + env.opponent.ori = quat(1, 0, 0, 0); // Opponent also facing +X (player behind) + + c_step(&env); + float reward_behind = env.rewards[0]; + + // Scenario 2: Player in front of opponent (bad position) + c_reset(&env); + env.player.pos = vec3(300, 0, 1000); + env.player.vel = vec3(100, 0, 0); + env.player.ori = quat(1, 0, 0, 0); + env.opponent.pos = vec3(0, 0, 1000); + env.opponent.vel = vec3(100, 0, 0); + env.opponent.ori = quat(1, 0, 0, 0); // Opponent facing player (player in front) + + c_step(&env); + float reward_front = env.rewards[0]; + + // Being behind should give better reward + assert(reward_behind > reward_front); + + printf("test_tail_position_reward PASS\n"); +} + +void test_speed_penalty() { + Dogfight env = make_env(1000); + + // Scenario 1: Good speed (100 m/s) + c_reset(&env); + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(100, 0, 0); + env.opponent.pos = vec3(300, 0, 1000); + + c_step(&env); + float reward_good_speed = env.rewards[0]; + + // Scenario 2: Too slow (20 m/s - stall risk) + c_reset(&env); + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(20, 0, 0); + env.opponent.pos = vec3(300, 0, 1000); + + c_step(&env); + float reward_slow = env.rewards[0]; + + // Good speed should have better reward + assert(reward_good_speed > reward_slow); + + printf("test_speed_penalty PASS\n"); +} + +// Phase 4: Rendering tests (camera math only, no actual drawing) +void test_chase_camera_behind_player() { + // Test that camera is positioned behind player based on orientation + Dogfight env = make_env(1000); + c_reset(&env); + + // Set player at origin, facing +X + env.player.pos = vec3(0, 0, 500); + env.player.ori = quat(1, 0, 0, 0); // identity = facing +X + + // Calculate camera position (same logic as c_render) + Vec3 fwd = quat_rotate(env.player.ori, vec3(1, 0, 0)); + float dist = 80.0f; // default cam_distance + float el = 0.3f; // default cam_elevation + float az = 0.0f; // default cam_azimuth + + float cam_x = env.player.pos.x - fwd.x * dist * cosf(el) * cosf(az) + fwd.y * dist * sinf(az); + float cam_y = env.player.pos.y - fwd.y * dist * cosf(el) * cosf(az) - fwd.x * dist * sinf(az); + float cam_z = env.player.pos.z + dist * sinf(el) + 20.0f; + + // Camera should be behind player (negative X direction) and above + assert(cam_x < env.player.pos.x); // Behind + assert(cam_z > env.player.pos.z); // Above + ASSERT_NEAR(cam_y, 0.0f, 1.0f); // Same Y (player at Y=0) + + printf("test_chase_camera_behind_player PASS\n"); +} + +void test_camera_orbit_updates() { + // Test camera orbit math with different azimuth/elevation + Dogfight env = make_env(1000); + c_reset(&env); + + env.player.pos = vec3(0, 0, 500); + env.player.ori = quat(1, 0, 0, 0); + + Vec3 fwd = quat_rotate(env.player.ori, vec3(1, 0, 0)); + float dist = 80.0f; + + // Test with azimuth rotation (looking from side) + float az = PI / 2.0f; // 90 degrees + float el = 0.3f; + + float cam_x = env.player.pos.x - fwd.x * dist * cosf(el) * cosf(az) + fwd.y * dist * sinf(az); + float cam_y = env.player.pos.y - fwd.y * dist * cosf(el) * cosf(az) - fwd.x * dist * sinf(az); + + // With 90 degree azimuth, camera should be to the side (negative Y) + assert(cam_y < -30.0f); // Significantly to the side + + // Test elevation change + float el_high = 1.2f; // Looking from above + float cam_z_high = env.player.pos.z + dist * sinf(el_high) + 20.0f; + float cam_z_low = env.player.pos.z + dist * sinf(0.1f) + 20.0f; + + assert(cam_z_high > cam_z_low); // Higher elevation = higher camera + + printf("test_camera_orbit_updates PASS\n"); +} + +void test_client_struct_defaults() { + // Test that Client would be initialized with correct defaults + // (We can't actually test c_render without Raylib window, but we test the values) + float default_distance = 80.0f; + float default_azimuth = 0.0f; + float default_elevation = 0.3f; + + assert(default_distance > 30.0f && default_distance < 300.0f); + assert(default_elevation > -1.4f && default_elevation < 1.4f); + ASSERT_NEAR(default_azimuth, 0.0f, 0.01f); + + printf("test_client_struct_defaults PASS\n"); +} + +// Phase 5: Combat tests +void test_trigger_fires() { + Dogfight env = make_env(1000); + c_reset(&env); + + // Set up player far from opponent (won't hit) + env.player.pos = vec3(0, 0, 500); + env.player.ori = quat(1, 0, 0, 0); + env.player.fire_cooldown = 0; + env.opponent.pos = vec3(1000, 0, 500); // Far away, won't hit + + env.actions[4] = 1.0f; // Trigger pulled + + // Step to process fire + c_step(&env); + + // Should have fired (cooldown set) + assert(env.player.fire_cooldown == FIRE_COOLDOWN); + assert(env.episode_shots_fired >= 1.0f); + + printf("test_trigger_fires PASS\n"); +} + +void test_fire_cooldown() { + Dogfight env = make_env(1000); + c_reset(&env); + + // Set up player far from opponent (won't hit) + env.player.pos = vec3(0, 0, 500); + env.player.ori = quat(1, 0, 0, 0); + env.opponent.pos = vec3(1000, 0, 500); // Far away, won't hit + + // Fire once + env.player.fire_cooldown = 0; + env.actions[4] = 1.0f; + c_step(&env); + float shots_after_first = env.episode_shots_fired; + + // Try to fire again immediately (should be blocked by cooldown) + c_step(&env); + float shots_after_second = env.episode_shots_fired; + + // Should not have fired again (still on cooldown) + assert(shots_after_second == shots_after_first); + + printf("test_fire_cooldown PASS\n"); +} + +void test_cone_hit_detection() { + Dogfight env = make_env(1000); + c_reset(&env); + + // Place player at origin facing +X + env.player.pos = vec3(0, 0, 500); + env.player.ori = quat(1, 0, 0, 0); // Identity = facing +X + + // Place opponent directly ahead within range + env.opponent.pos = vec3(200, 0, 500); // 200m ahead, in cone + + assert(check_hit(&env.player, &env.opponent, env.cos_gun_cone) == true); + + // Place opponent too far + env.opponent.pos = vec3(600, 0, 500); // 600m > GUN_RANGE + assert(check_hit(&env.player, &env.opponent, env.cos_gun_cone) == false); + + // Place opponent at side (outside 5 degree cone) + env.opponent.pos = vec3(200, 50, 500); // ~14 degrees off-axis + assert(check_hit(&env.player, &env.opponent, env.cos_gun_cone) == false); + + // Place opponent slightly off-axis but within cone + env.opponent.pos = vec3(200, 10, 500); // ~2.8 degrees off-axis + assert(check_hit(&env.player, &env.opponent, env.cos_gun_cone) == true); + + printf("test_cone_hit_detection PASS\n"); +} + +void test_hit_reward() { + Dogfight env = make_env(1000); + c_reset(&env); + + // Set up guaranteed hit + env.player.pos = vec3(0, 0, 500); + env.player.ori = quat(1, 0, 0, 0); + env.player.fire_cooldown = 0; + env.opponent.pos = vec3(200, 0, 500); // Directly ahead + + env.actions[4] = 1.0f; // Fire + + c_step(&env); + + // Kill = terminal with reward 1.0 + assert(env.terminals[0] == 1); + assert(env.rewards[0] == 1.0f); + + printf("test_hit_reward PASS\n"); +} + +void test_kill_terminates_episode() { + Dogfight env = make_env(1000); + c_reset(&env); + + // Set up guaranteed hit + env.player.pos = vec3(0, 0, 500); + env.player.ori = quat(1, 0, 0, 0); + env.player.fire_cooldown = 0; + env.opponent.pos = vec3(200, 0, 500); + + env.actions[4] = 1.0f; + + c_step(&env); + + // Kill should terminate episode + assert(env.terminals[0] == 1); + + // Reward should be 1.0 (kill reward) + assert(env.rewards[0] == 1.0f); + + // Perf should be tracked (1 kill in 1 episode = 1.0) + assert(env.log.perf >= 1.0f); + assert(env.log.n >= 1.0f); + + printf("test_kill_terminates_episode PASS\n"); +} + +void test_combat_constants() { + // Verify combat constants are reasonable + assert(GUN_RANGE == 500.0f); + assert(GUN_CONE_ANGLE > 0.08f && GUN_CONE_ANGLE < 0.09f); // ~5 degrees + assert(FIRE_COOLDOWN == 10); + + printf("test_combat_constants PASS\n"); +} + +// Phase 3.6: Additional reward/penalty tests (df11: updated for simplified rewards) + +void test_aim_reward() { + // df11: Test continuous aim reward - better aim = better reward + Dogfight env = make_env(1000); + + // Scenario 1: Opponent directly ahead (perfect aim, aim_dot = 1.0) + c_reset(&env); + env.actions[4] = -1.0f; // Don't fire + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(100, 0, 0); + env.player.ori = quat(1, 0, 0, 0); // Facing +X + env.opponent.pos = vec3(300, 0, 1000); // Directly ahead + env.opponent.vel = vec3(100, 0, 0); + env.opponent.ori = quat(1, 0, 0, 0); + c_step(&env); + float reward_on_target = env.rewards[0]; + + // Scenario 2: Opponent 90° off-axis (bad aim, aim_dot = 0.0) + c_reset(&env); + env.actions[4] = -1.0f; // Don't fire + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(100, 0, 0); + env.player.ori = quat(1, 0, 0, 0); + env.opponent.pos = vec3(0, 300, 1000); // 90° to the side + env.opponent.vel = vec3(100, 0, 0); + env.opponent.ori = quat(1, 0, 0, 0); + c_step(&env); + float reward_off_target = env.rewards[0]; + + // On target should have better reward (continuous aim reward) + assert(reward_on_target > reward_off_target); + + printf("test_aim_reward PASS\n"); +} + +void test_aim_reward_range_dependent() { + // df11: Test that aim reward only applies within engagement envelope (GUN_RANGE * 2) + Dogfight env = make_env(1000); + + // Scenario 1: In range (300m < GUN_RANGE * 2 = 1000m) - should get aim reward + c_reset(&env); + env.actions[4] = -1.0f; // Don't fire + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(100, 0, 0); + env.player.ori = quat(1, 0, 0, 0); + env.opponent.pos = vec3(300, 0, 1000); // 300m ahead, in envelope + env.opponent.vel = vec3(100, 0, 0); + env.opponent.ori = quat(1, 0, 0, 0); + c_step(&env); + float reward_in_range = env.rewards[0]; + + // Scenario 2: Out of range (1500m > GUN_RANGE * 2 = 1000m) - no aim reward + c_reset(&env); + env.actions[4] = -1.0f; // Don't fire + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(100, 0, 0); + env.player.ori = quat(1, 0, 0, 0); + env.opponent.pos = vec3(1500, 0, 1000); // 1500m ahead, out of envelope + env.opponent.vel = vec3(100, 0, 0); + env.opponent.ori = quat(1, 0, 0, 0); + c_step(&env); + float reward_out_of_range = env.rewards[0]; + + // In range should get aim bonus (both have same aim quality but different range) + assert(reward_in_range > reward_out_of_range); + + printf("test_aim_reward_range_dependent PASS\n"); +} + +// Helper to make env with curriculum enabled +static Dogfight make_env_curriculum(int max_steps, int randomize) { + Dogfight env = {0}; + env.observations = obs_buf; + env.actions = act_buf; + env.rewards = rew_buf; + env.terminals = term_buf; + env.max_steps = max_steps; + // df11: Simplified reward config + RewardConfig rcfg = { + .aim_scale = 0.05f, .closing_scale = 0.003f, + .neg_g = 0.02f, + .speed_min = 50.0f, + }; + init(&env, 0, &rcfg, 1, randomize, 0.7f, 0); // curriculum_enabled=1 + return env; +} + +// Helper to make env for rudder penalty test +static Dogfight make_env_for_rudder_test(int max_steps) { + Dogfight env = {0}; + env.observations = obs_buf; + env.actions = act_buf; + env.rewards = rew_buf; + env.terminals = term_buf; + env.max_steps = max_steps; + // df11: Simplified reward config + RewardConfig rcfg = { + .aim_scale = 0.05f, .closing_scale = 0.003f, + .neg_g = 0.02f, + .speed_min = 50.0f, + }; + init(&env, 0, &rcfg, 0, 0, 0.7f, 0); + return env; +} + +void test_rudder_penalty_accumulates() { + // df11: Test that constant rudder use accumulates meaningful penalty over multiple steps + Dogfight env = make_env_for_rudder_test(1000); // 10x default for visibility + c_reset(&env); + + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(100, 0, 0); + env.opponent.pos = vec3(300, 0, 1000); + + // Full rudder with moderate throttle to maintain flight + float total_reward = 0.0f; + for (int i = 0; i < 50; i++) { + env.actions[0] = 0.5f; // Moderate throttle + env.actions[1] = 0.0f; // Neutral elevator + env.actions[2] = 0.0f; // Neutral aileron + env.actions[3] = 1.0f; // Full right rudder (constant yaw) + env.actions[4] = -1.0f; // No fire + c_step(&env); + total_reward += env.rewards[0]; + + // Refresh opponent position (so distance reward stays similar) + env.opponent.pos = vec3(env.player.pos.x + 300, env.player.pos.y, env.player.pos.z); + } + + // Compare to coordinated flight: same scenario but no rudder + Dogfight env2 = make_env_for_rudder_test(1000); + c_reset(&env2); + + env2.player.pos = vec3(0, 0, 1000); + env2.player.vel = vec3(100, 0, 0); + env2.opponent.pos = vec3(300, 0, 1000); + + float total_reward_coordinated = 0.0f; + for (int i = 0; i < 50; i++) { + env2.actions[0] = 0.5f; + env2.actions[1] = 0.0f; + env2.actions[2] = 0.0f; // Neutral aileron + env2.actions[3] = 0.0f; // NO rudder (coordinated) + env2.actions[4] = -1.0f; + c_step(&env2); + total_reward_coordinated += env2.rewards[0]; + + env2.opponent.pos = vec3(env2.player.pos.x + 300, env2.player.pos.y, env2.player.pos.z); + } + + // Rudder use should accumulate worse reward than coordinated flight + assert(total_reward < total_reward_coordinated); + + printf("test_rudder_penalty_accumulates PASS\n"); +} + +// Helper to get bearing from player to opponent (degrees, 0=ahead, 90=right, 180=behind) +static float get_bearing(Dogfight *env) { + Vec3 rel = sub3(env->opponent.pos, env->player.pos); + Vec3 player_fwd = quat_rotate(env->player.ori, vec3(1, 0, 0)); + float dot = dot3(normalize3(rel), player_fwd); + return acosf(clampf(dot, -1, 1)) * 180.0f / M_PI; +} + +// Helper to get opponent heading (degrees, 0=+X, 90=+Y) +static float get_opponent_heading(Dogfight *env) { + Vec3 opp_fwd = quat_rotate(env->opponent.ori, vec3(1, 0, 0)); + return atan2f(opp_fwd.y, opp_fwd.x) * 180.0f / M_PI; +} + +void test_spawn_bearing_variety() { + // Test that FULL_RANDOM stage spawns opponents at various bearings (not just ahead) + // Set stage directly since curriculum is now performance-based (df10) + Dogfight env = make_env_curriculum(1000, 0); // Progressive mode + env.stage = CURRICULUM_FULL_RANDOM; // Force stage 4 (FULL_RANDOM) directly + + int front_count = 0; // bearing < 45 + int side_count = 0; // bearing 45-135 + int behind_count = 0; // bearing > 135 + + // Run many resets with different seeds + for (int seed = 0; seed < 100; seed++) { + srand(seed * 7 + 13); // Vary seed + c_reset(&env); + + // Verify we're in stage 4 (FULL_RANDOM after 2026-01-18 reorder) + assert(env.stage == CURRICULUM_FULL_RANDOM); + + float bearing = get_bearing(&env); + if (bearing < 45.0f) front_count++; + else if (bearing > 135.0f) behind_count++; + else side_count++; + } + + // With 360° spawning, we should see opponents in all directions + // Each sector should have at least some spawns (allow for randomness) + assert(front_count > 0); // Some in front + assert(side_count > 0); // Some to the side + assert(behind_count > 0); // Some behind (this is the key test!) + + printf("test_spawn_bearing_variety PASS (front=%d, side=%d, behind=%d)\n", + front_count, side_count, behind_count); +} + +void test_spawn_heading_variety() { + // Test that FULL_RANDOM opponents have varied headings (not always 0) + // Set stage directly since curriculum is now performance-based (df10) + Dogfight env = make_env_curriculum(1000, 0); // Progressive mode + env.stage = CURRICULUM_FULL_RANDOM; // Force stage 4 (FULL_RANDOM) directly + + float min_heading = 999.0f; + float max_heading = -999.0f; + int varied_count = 0; // Count of headings not near 0 + + for (int seed = 0; seed < 50; seed++) { + srand(seed * 11 + 17); + c_reset(&env); + + // Verify we're in stage 4 (FULL_RANDOM after 2026-01-18 reorder) + assert(env.stage == CURRICULUM_FULL_RANDOM); + + float heading = get_opponent_heading(&env); + if (heading < min_heading) min_heading = heading; + if (heading > max_heading) max_heading = heading; + if (fabsf(heading) > 30.0f) varied_count++; // Not facing +X + } + + // Headings should vary across the full 360° range + float heading_range = max_heading - min_heading; + assert(heading_range > 90.0f); // At least 90° variation + assert(varied_count > 10); // At least some not facing default direction + + printf("test_spawn_heading_variety PASS (range=%.0f°, varied=%d)\n", + heading_range, varied_count); +} + +void test_curriculum_stages_differ() { + // Test that different curriculum stages produce different spawn patterns + // Set stage directly since curriculum is now performance-based (df10) + Dogfight env = make_env_curriculum(1000, 0); // Progressive mode (randomize=0) + + // Stage 0: TAIL_CHASE - opponent ahead, same direction + env.stage = CURRICULUM_TAIL_CHASE; + srand(42); + c_reset(&env); + float bearing_tail = get_bearing(&env); + float heading_tail = get_opponent_heading(&env); + assert(env.stage == CURRICULUM_TAIL_CHASE); + + // Stage 1: HEAD_ON - opponent ahead, facing us + env.stage = CURRICULUM_HEAD_ON; + srand(42); + c_reset(&env); + float bearing_head = get_bearing(&env); + assert(env.stage == CURRICULUM_HEAD_ON); + + // Stage 2: VERTICAL - opponent above/below (after 2026-01-18 reorder, was stage 3) + env.stage = CURRICULUM_VERTICAL; + srand(42); + c_reset(&env); + float bearing_vert = get_bearing(&env); + assert(env.stage == CURRICULUM_VERTICAL); + + // Stage 6: CROSSING - opponent to side (after 2026-01-18 reorder, was stage 2) + env.stage = CURRICULUM_CROSSING; + srand(42); + c_reset(&env); + float bearing_cross = get_bearing(&env); + assert(env.stage == CURRICULUM_CROSSING); + + // TAIL_CHASE should have opponent nearly ahead (small bearing) + assert(bearing_tail < 30.0f); + + // HEAD_ON should have opponent ahead + assert(bearing_head < 30.0f); + + // VERTICAL should have opponent ahead (same heading, different altitude) + assert(bearing_vert < 45.0f); + + // CROSSING should have opponent more to the side (larger bearing) + assert(bearing_cross > 45.0f); + + // TAIL_CHASE opponent should face same direction as player (~0° heading) + assert(fabsf(heading_tail) < 30.0f); + + printf("test_curriculum_stages_differ PASS (tail=%.0f°, head=%.0f°, vert=%.0f°, cross=%.0f°)\n", + bearing_tail, bearing_head, bearing_vert, bearing_cross); +} + +void test_spawn_distance_range() { + // Test that spawn distances are within expected ranges + Dogfight env = make_env_curriculum(1000, 1); + + float min_dist = 9999.0f; + float max_dist = 0.0f; + + for (int seed = 0; seed < 50; seed++) { + srand(seed * 13 + 7); + c_reset(&env); + + Vec3 rel = sub3(env.opponent.pos, env.player.pos); + float dist = norm3(rel); + + if (dist < min_dist) min_dist = dist; + if (dist > max_dist) max_dist = dist; + } + + // Distances should be reasonable (200-700m typical range across all stages) + assert(min_dist > 100.0f); // Not too close + assert(max_dist < 800.0f); // Not too far + assert(max_dist - min_dist > 100.0f); // Some variety + + printf("test_spawn_distance_range PASS (min=%.0f, max=%.0f)\n", min_dist, max_dist); +} + +void test_neg_g_penalty() { + // Test that pushing forward (negative G) gets worse reward than pulling back + // Now uses actual g_force (not elevator position), so we need multiple steps + // for the G-force to develop from the maneuver + // + // We set a high neg_g penalty to ensure it dominates other reward differences + // caused by trajectory changes during the maneuver + Dogfight env = make_env(1000); + env.rcfg.neg_g = 0.5f; // High penalty to dominate other rewards + c_reset(&env); + env.actions[4] = -1.0f; // Don't fire + + // Pull back for multiple steps + env.player.pos = vec3(0, 0, 1500); + env.player.vel = vec3(150, 0, 0); // Fast enough for significant G + env.player.ori = quat(1, 0, 0, 0); + env.opponent.pos = vec3(400, 0, 1500); + env.opponent.vel = vec3(150, 0, 0); + env.opponent.ori = quat(1, 0, 0, 0); + + env.actions[1] = -0.5f; // Pull back (nose up, positive G) + float total_reward_pull = 0.0f; + float pull_g_sum = 0.0f; + for (int i = 0; i < 30; i++) { + c_step(&env); + total_reward_pull += env.rewards[0]; + pull_g_sum += env.player.g_force; + } + float pull_g_avg = pull_g_sum / 30.0f; + + // Push forward for multiple steps + c_reset(&env); + env.actions[4] = -1.0f; + env.player.pos = vec3(0, 0, 1500); + env.player.vel = vec3(150, 0, 0); + env.player.ori = quat(1, 0, 0, 0); + env.opponent.pos = vec3(400, 0, 1500); + env.opponent.vel = vec3(150, 0, 0); + env.opponent.ori = quat(1, 0, 0, 0); + + env.actions[1] = 0.5f; // Push forward (nose down, negative G) + float total_reward_push = 0.0f; + float push_g_sum = 0.0f; + for (int i = 0; i < 30; i++) { + c_step(&env); + total_reward_push += env.rewards[0]; + push_g_sum += env.player.g_force; + } + float push_g_avg = push_g_sum / 30.0f; + + // Verify G-forces are correct direction (pull = positive, push = negative) + assert(pull_g_avg > 1.0f); // Pull should give >1G + assert(push_g_avg < 0.5f); // Push should give <0.5G (triggering penalty) + + // Pull back should have better total reward (push fwd triggers neg_g penalty) + assert(total_reward_pull > total_reward_push); + printf("test_neg_g_penalty PASS (pull=%.4f > push=%.4f, pull_g=%.1f, push_g=%.1f)\n", + total_reward_pull, total_reward_push, pull_g_avg, push_g_avg); +} + +void test_rudder_penalty() { + // Test that no rudder gets better reward than full rudder + Dogfight env = make_env(1000); + c_reset(&env); + env.actions[4] = -1.0f; // Don't fire + + // No rudder + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(100, 0, 0); + env.player.ori = quat(1, 0, 0, 0); + env.opponent.pos = vec3(300, 0, 1000); + env.opponent.vel = vec3(100, 0, 0); + env.opponent.ori = quat(1, 0, 0, 0); + env.actions[3] = 0.0f; // No rudder + c_step(&env); + float reward_no_rudder = env.rewards[0]; + + // Full rudder + c_reset(&env); + env.actions[4] = -1.0f; + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(100, 0, 0); + env.player.ori = quat(1, 0, 0, 0); + env.opponent.pos = vec3(300, 0, 1000); + env.opponent.vel = vec3(100, 0, 0); + env.opponent.ori = quat(1, 0, 0, 0); + env.actions[3] = 1.0f; // Full rudder + c_step(&env); + float reward_rudder = env.rewards[0]; + + // No rudder should have better reward + assert(reward_no_rudder > reward_rudder); + printf("test_rudder_penalty PASS (no_rud=%.5f > rud=%.5f)\n", reward_no_rudder, reward_rudder); +} + +// Generic test: all observation schemes produce bounded values +// Works regardless of which schemes exist or their indices +void test_obs_bounds_all_schemes() { + int schemes_tested = 0; + int total_obs_checked = 0; + + // Test all schemes from 0 to OBS_SCHEME_COUNT-1 + for (int scheme = 0; scheme < OBS_SCHEME_COUNT; scheme++) { + // Create env with this scheme + Dogfight env = {0}; + env.observations = obs_buf; + env.actions = act_buf; + env.rewards = rew_buf; + env.terminals = term_buf; + env.max_steps = 1000; + RewardConfig rcfg = { + .aim_scale = 0.05f, .closing_scale = 0.003f, + .neg_g = 0.02f, + .speed_min = 50.0f, + }; + init(&env, scheme, &rcfg, 0, 0, 0.7f, 0); + + // Reset to get valid observations + c_reset(&env); + + // Verify obs_size is positive and reasonable + assert(env.obs_size > 0 && env.obs_size <= 32); + + // All observations should be bounded [-2, 2] (some have [-1,1], some [0,1]) + // Using [-2, 2] as generous outer bound that catches NaN/Inf/unbounded + for (int i = 0; i < env.obs_size; i++) { + float val = env.observations[i]; + assert(!isnan(val) && !isinf(val)); + assert(val >= -2.0f && val <= 2.0f); + total_obs_checked++; + } + + // Run a few steps and check bounds again + for (int step = 0; step < 10; step++) { + // Neutral actions + env.actions[0] = 0.0f; // throttle + env.actions[1] = 0.0f; // pitch + env.actions[2] = 0.0f; // roll + env.actions[3] = 0.0f; // yaw + env.actions[4] = 0.0f; // fire + + c_step(&env); + + // Check bounds after step + for (int i = 0; i < env.obs_size; i++) { + float val = env.observations[i]; + assert(!isnan(val) && !isinf(val)); + assert(val >= -2.0f && val <= 2.0f); + } + } + + schemes_tested++; + } + + printf("test_obs_bounds_all_schemes PASS (%d schemes, %d obs checked)\n", + schemes_tested, total_obs_checked); +} + +int main() { + printf("Running dogfight tests...\n\n"); + + // Phase 1 + test_vec3_math(); + test_quat_math(); + test_init(); + test_reset_plane(); + test_c_reset(); + test_compute_observations(); + test_c_step_moves_forward(); + test_oob_terminates(); + test_max_steps_terminates(); + test_supersonic_terminates(); + + // Phase 2 + test_opponent_spawns(); + test_relative_observations(); + test_pursuit_reward(); + + // Phase 3 + test_aircraft_params(); + test_throttle_accelerates(); + test_plane_falls_without_lift(); + test_controls_affect_orientation(); + test_dynamic_pressure(); + test_lift_opposes_gravity(); + test_drag_slows_plane(); + test_stall_clamps_lift(); + test_glimit_clamps_acceleration(); + test_forces_sum_correctly(); + test_integration_updates_state(); + + // Phase 3.5 + test_closing_velocity_reward(); + test_tail_position_reward(); + test_speed_penalty(); + + // Phase 4 + test_chase_camera_behind_player(); + test_camera_orbit_updates(); + test_client_struct_defaults(); + + // Phase 5 + test_trigger_fires(); + test_fire_cooldown(); + test_cone_hit_detection(); + test_hit_reward(); + test_kill_terminates_episode(); + test_combat_constants(); + + // Phase 5.5: df11 reward tests (simplified: aim, closing, neg_g, stall, rudder) + test_aim_reward(); + test_aim_reward_range_dependent(); + test_rudder_penalty_accumulates(); + test_neg_g_penalty(); + test_rudder_penalty(); + + // Phase 6: Spawn variety tests + test_spawn_bearing_variety(); + test_spawn_heading_variety(); + test_curriculum_stages_differ(); + test_spawn_distance_range(); + + // Phase 7: Generic observation tests + test_obs_bounds_all_schemes(); + + printf("\nAll 46 tests PASS\n"); + return 0; +} diff --git a/pufferlib/ocean/dogfight/flightlib.h b/pufferlib/ocean/dogfight/flightlib.h new file mode 100644 index 000000000..98c89aaa8 --- /dev/null +++ b/pufferlib/ocean/dogfight/flightlib.h @@ -0,0 +1,446 @@ +// flightlib.h - Flight physics and simulation library for dogfight environment +// Modeled after dronelib.h pattern - self-contained physics simulation module +// +// Contains: +// - Math types (Vec3, Quat) and operations +// - Aircraft parameters (WW2 fighter class) +// - Plane struct (flight object state) +// - Physics functions (step_plane_with_physics, etc.) + +#ifndef FLIGHTLIB_H +#define FLIGHTLIB_H + +#include +#include +#include +#include + +// Allow DEBUG to be defined before including this header +#ifndef DEBUG +#define DEBUG 0 +#endif + +#ifndef PI +#define PI 3.14159265358979f +#endif + +// ============================================================================ +// MATH TYPES +// ============================================================================ + +typedef struct { float x, y, z; } Vec3; +typedef struct { float w, x, y, z; } Quat; + +// ============================================================================ +// MATH UTILITIES +// ============================================================================ + +static inline float clampf(float v, float lo, float hi) { + return v < lo ? lo : (v > hi ? hi : v); +} + +static inline float rndf(float a, float b) { + return a + ((float)rand() / (float)RAND_MAX) * (b - a); +} + +// --- Vec3 operations --- + +static inline Vec3 vec3(float x, float y, float z) { return (Vec3){x, y, z}; } +static inline Vec3 add3(Vec3 a, Vec3 b) { return (Vec3){a.x + b.x, a.y + b.y, a.z + b.z}; } +static inline Vec3 sub3(Vec3 a, Vec3 b) { return (Vec3){a.x - b.x, a.y - b.y, a.z - b.z}; } +static inline Vec3 mul3(Vec3 a, float s) { return (Vec3){a.x * s, a.y * s, a.z * s}; } +static inline float dot3(Vec3 a, Vec3 b) { return a.x * b.x + a.y * b.y + a.z * b.z; } +static inline float norm3(Vec3 a) { return sqrtf(dot3(a, a)); } + +static inline Vec3 normalize3(Vec3 v) { + float n = norm3(v); + if (n < 1e-8f) return vec3(0, 0, 0); + return mul3(v, 1.0f / n); +} + +static inline Vec3 cross3(Vec3 a, Vec3 b) { + return vec3( + a.y * b.z - a.z * b.y, + a.z * b.x - a.x * b.z, + a.x * b.y - a.y * b.x + ); +} + +// --- Quaternion operations --- + +static inline Quat quat(float w, float x, float y, float z) { return (Quat){w, x, y, z}; } + +static inline Quat quat_mul(Quat a, Quat b) { + return (Quat){ + a.w*b.w - a.x*b.x - a.y*b.y - a.z*b.z, + a.w*b.x + a.x*b.w + a.y*b.z - a.z*b.y, + a.w*b.y - a.x*b.z + a.y*b.w + a.z*b.x, + a.w*b.z + a.x*b.y - a.y*b.x + a.z*b.w + }; +} + +static inline void quat_normalize(Quat* q) { + float n = sqrtf(q->w*q->w + q->x*q->x + q->y*q->y + q->z*q->z); + if (n > 1e-8f) { + float inv = 1.0f / n; + q->w *= inv; q->x *= inv; q->y *= inv; q->z *= inv; + } +} + +static inline Vec3 quat_rotate(Quat q, Vec3 v) { + Quat qv = {0.0f, v.x, v.y, v.z}; + Quat q_conj = {q.w, -q.x, -q.y, -q.z}; + Quat tmp = quat_mul(q, qv); + Quat res = quat_mul(tmp, q_conj); + return (Vec3){res.x, res.y, res.z}; +} + +static inline Quat quat_from_axis_angle(Vec3 axis, float angle) { + float half = angle * 0.5f; + float s = sinf(half); + return (Quat){cosf(half), axis.x * s, axis.y * s, axis.z * s}; +} + +// ============================================================================ +// AIRCRAFT PARAMETERS - P-51D Mustang Reference +// ============================================================================ +// Based on P51d_REFERENCE_DATA.md - validated against historical data +// Test condition: 9,000 lb (4,082 kg) combat weight, sea level ISA +// +// THEORETICAL PERFORMANCE (P-51D targets): +// Max speed (SL, Military): 355 mph (159 m/s) +// Max speed (SL, WEP): 368 mph (164 m/s) +// Stall speed (clean): 100 mph (45 m/s) +// ROC (SL, Military): 3,030 ft/min (15.4 m/s) +// +// LIFT MODEL: +// C_L = C_L_alpha * (alpha + incidence - alpha_zero) +// The P-51D has a cambered airfoil (NAA 45-100) with alpha_zero = -1.2° +// Wing incidence is +1.5° relative to fuselage datum +// At 0° body pitch: effective AOA = 1.5° - (-1.2°) = 2.7°, C_L ~ 0.26 +// +// DRAG POLAR: Cd = Cd0 + K * Cl^2 +// - Cd0 = 0.0163 (P-51D published value, very clean laminar flow wing) +// - K = 0.072 = 1/(pi * e * AR) where e=0.75, AR=5.86 +// ============================================================================ + +#define MASS 4082.0f // kg (P-51D combat weight: 9,000 lb) +#define WING_AREA 21.65f // m^2 (P-51D: 233 ft^2) +#define C_D0 0.0163f // parasitic drag coefficient (P-51D laminar flow) +#define K 0.072f // induced drag factor: 1/(pi*0.75*5.86) +#define K_SIDESLIP 0.7f // sideslip drag factor (JSBSim: 0.05 CD at 15 deg) +#define C_L_MAX 1.48f // max lift coefficient before stall (P-51D clean) +#define C_L_ALPHA 5.56f // lift curve slope (P-51D: 0.097/deg = 5.56/rad) +#define ALPHA_ZERO -0.021f // zero-lift angle (rad), -1.2° for cambered airfoil +#define WING_INCIDENCE 0.026f // wing incidence angle (rad), +1.5° (P-51D) +#define ENGINE_POWER 1112000.0f // watts (P-51D Military: 1,490 hp) +#define ETA_PROP 0.80f // propeller efficiency (P-51D cruise: 0.80-0.85) +#define GRAVITY 9.81f // m/s^2 +#define G_LIMIT_POS 6.0f // max positive G (pulling up) - pilot limit +#define G_LIMIT_NEG 1.5f // max negative G (pushing over) - blood to head is painful +#define RHO 1.225f // air density kg/m^3 (sea level ISA) + +// Inverse constants for faster computation (multiply instead of divide) +#define INV_MASS 0.000245f // 1/4082 +#define INV_GRAVITY 0.10197f // 1/9.81 +#define RAD_TO_DEG 57.2957795f // 180/PI + +#define MAX_PITCH_RATE 2.5f // rad/s +#define MAX_ROLL_RATE 3.0f // rad/s +#define MAX_YAW_RATE 0.50f // rad/s (~29 deg/s command, realistic ~7 deg/s achieved) + +// ============================================================================ +// PLANE STRUCT - Flight object state +// ============================================================================ + +typedef struct { + Vec3 pos; + Vec3 vel; + Vec3 prev_vel; // Previous velocity for acceleration calculation + Quat ori; + float throttle; + float g_force; // Current G-loading (for reward calculation) + float yaw_from_rudder; // Accumulated yaw from rudder (for damping) + int fire_cooldown; // Ticks until can fire again (0 = ready) +} Plane; + +// ============================================================================ +// PHYSICS FUNCTIONS +// ============================================================================ + +static inline void reset_plane(Plane *p, Vec3 pos, Vec3 vel) { + p->pos = pos; + p->vel = vel; + p->prev_vel = vel; // Initialize to current vel (no acceleration at start) + p->ori = quat(1, 0, 0, 0); + p->throttle = 0.5f; + p->g_force = 1.0f; // 1G at start (level flight) + p->yaw_from_rudder = 0.0f; // No accumulated rudder yaw at start + p->fire_cooldown = 0; +} + +// ============================================================================ +// PHYSICS MODEL - step_plane_with_physics() +// ============================================================================ +// This implements a simplified 6-DOF flight model with: +// - Rate-based attitude control (not position control) +// - Point-mass aerodynamics (no moments/stability derivatives) +// - Propeller thrust model (T = P*eta/V, capped at static thrust) +// - Drag polar: Cd = Cd0 + K*Cl^2 +// - Wing incidence angle (built-in AOA for near-level cruise) +// +// COORDINATE SYSTEM: +// World frame: X=East, Y=North, Z=Up (right-handed, Z-up) +// Body frame: X=Forward (nose), Y=Right (wing), Z=Up (canopy) +// +// WING INCIDENCE: +// The wing is mounted at WING_INCIDENCE (~2 deg) relative to fuselage. +// Effective AOA for lift = body_alpha + WING_INCIDENCE +// This allows near-level flight at cruise speed with zero pitch input. +// +// REMAINING LIMITATIONS: +// - No pitching moment / static stability (Cm_alpha) +// - Rate-based controls (not position-based) +// - Symmetric stall model (real stall is asymmetric) +// ============================================================================ +static inline void step_plane_with_physics(Plane *p, float *actions, float dt) { + // Save previous velocity for acceleration calculation (v²/r) + p->prev_vel = p->vel; + + // ======================================================================== + // 1. BODY FRAME AXES (transform from body to world coordinates) + // ======================================================================== + // These are the aircraft's body axes expressed in world coordinates + Vec3 forward = quat_rotate(p->ori, vec3(1, 0, 0)); // Nose direction + Vec3 right = quat_rotate(p->ori, vec3(0, 1, 0)); // Right wing direction + Vec3 up = quat_rotate(p->ori, vec3(0, 0, 1)); // Canopy direction + + // ======================================================================== + // 2. CONTROL INPUTS -> ANGULAR RATES + // ======================================================================== + // Actions are [-1, 1], mapped to physical rates + // NOTE: These are RATE commands, not POSITION commands! + // Holding elevator=0.5 pitches DOWN continuously (standard joystick convention) + float throttle = (actions[0] + 1.0f) * 0.5f; // [-1,1] -> [0,1] + float pitch_rate = actions[1] * MAX_PITCH_RATE; // rad/s, + = nose down (push fwd) + float roll_rate = actions[2] * MAX_ROLL_RATE; // rad/s, + = roll right + + // ======================================================================== + // 2a. RUDDER DAMPING - Realistic sideslip-limited yaw + // ======================================================================== + // Real rudder physics: deflection creates sideslip angle (β), NOT sustained + // yaw rate. Vertical tail creates restoring moment that limits β to ~10°. + // Once equilibrium sideslip is reached, yaw rate approaches zero. + // + // Implementation: Track accumulated yaw from rudder, reduce effectiveness + // as it accumulates toward max sideslip angle (~10°). + float rudder_yaw_cmd = -actions[3] * MAX_YAW_RATE; // Commanded yaw rate + float max_rudder_yaw = 0.175f; // ~10 degrees max sideslip (0.175 rad) + + // Damping: effectiveness drops to 0 as accumulated yaw approaches limit + float damping = 1.0f - clampf(fabsf(p->yaw_from_rudder) / max_rudder_yaw, 0.0f, 1.0f); + float yaw_rate = rudder_yaw_cmd * damping; + + // Update accumulated yaw state + if (fabsf(actions[3]) < 0.1f) { + // Rudder released: weathervane tendency returns nose to airflow + // Vertical tail realigns with velocity, sideslip decays + p->yaw_from_rudder *= 0.95f; + } else { + // Rudder active: accumulate yaw (limited by damping above) + p->yaw_from_rudder += yaw_rate * dt; + } + + // ======================================================================== + // 3. ATTITUDE INTEGRATION (Quaternion kinematics) + // ======================================================================== + // q_dot = 0.5 * q * w where w is angular velocity in body frame + // This is the standard quaternion derivative formula + Vec3 omega_body = vec3(roll_rate, pitch_rate, yaw_rate); // body-frame w + Quat omega_quat = quat(0, omega_body.x, omega_body.y, omega_body.z); + Quat q_dot = quat_mul(p->ori, omega_quat); + p->ori.w += 0.5f * q_dot.w * dt; + p->ori.x += 0.5f * q_dot.x * dt; + p->ori.y += 0.5f * q_dot.y * dt; + p->ori.z += 0.5f * q_dot.z * dt; + quat_normalize(&p->ori); // Prevent drift from numerical integration + + // ======================================================================== + // 4. ANGLE OF ATTACK (AOA, a) + // ======================================================================== + // AOA = angle between velocity vector and body X-axis (nose) + // Positive a = nose above flight path = generating positive lift + // + // SIGN CONVENTION: + // If velocity has component opposite to body Z (up), nose is above + // flight path, so a is positive. + float V = norm3(p->vel); + if (V < 1.0f) V = 1.0f; // Prevent division by zero + + Vec3 vel_norm = normalize3(p->vel); + float cos_alpha = dot3(vel_norm, forward); + cos_alpha = clampf(cos_alpha, -1.0f, 1.0f); + float alpha = acosf(cos_alpha); // Always positive [0, pi] + + // Determine sign: positive when nose is ABOVE velocity vector + // If vel dot up < 0, velocity is "below" the body frame -> nose above -> a > 0 + float sign_alpha = (dot3(p->vel, up) < 0) ? 1.0f : -1.0f; + alpha *= sign_alpha; + + // ======================================================================== + // 5. LIFT COEFFICIENT (Linear + Stall Clamp) + // ======================================================================== + // C_L = C_L_alpha * (alpha - alpha_zero) + // For cambered airfoils, alpha_zero < 0 (generates lift at 0° AOA) + // P-51D NAA 45-100 airfoil: alpha_zero = -1.2° + // + // Effective AOA for lift = body_alpha + wing_incidence - alpha_zero + // At 0° body pitch: alpha_eff = 0 + 1.5° - (-1.2°) = 2.7° + // This gives C_L = 5.56 * 0.047 = 0.26, allowing near-level cruise + // + // Stall occurs at alpha_eff ~ 19° (P-51D clean), C_L_max = 1.48 + float alpha_effective = alpha + WING_INCIDENCE - ALPHA_ZERO; + float C_L = C_L_ALPHA * alpha_effective; + C_L = clampf(C_L, -C_L_MAX, C_L_MAX); // Stall limiting (symmetric) + + // ======================================================================== + // 6. DYNAMIC PRESSURE + // ======================================================================== + // q = 0.5*rho*V^2 [Pa or N/m^2] + // This is the "pressure" available for aerodynamic forces + // At 100 m/s: q = 0.5 * 1.225 * 10000 = 6,125 Pa + float q_dyn = 0.5f * RHO * V * V; + + // ======================================================================== + // 7. LIFT FORCE + // ======================================================================== + // L = Cl * q * S [Newtons] + // For level flight: L = W = m*g = 29,430 N + // Required Cl at 100 m/s: Cl = 29430 / (6125 * 22) = 0.218 + // Required a = 0.218 / 5.7 = 0.038 rad ~ 2.2 deg + float L_mag = C_L * q_dyn * WING_AREA; + + // ======================================================================== + // 8. DRAG FORCE (Drag Polar) + // ======================================================================== + // Cd = Cd0 + K * Cl^2 + K_SIDESLIP * beta^2 + float C_D_sideslip = K_SIDESLIP * p->yaw_from_rudder * p->yaw_from_rudder; + float C_D = C_D0 + K * C_L * C_L + C_D_sideslip; + float D_mag = C_D * q_dyn * WING_AREA; + + // ======================================================================== + // 9. THRUST FORCE (Propeller Model) + // ======================================================================== + // Power-based: P = T * V -> T = P * eta / V + // At low speed, thrust is limited by static thrust capability + // + // At V=80 m/s, full throttle: T = 800,000 / 80 = 10,000 N + // At V=143 m/s (max speed): T = 800,000 / 143 = 5,594 N ~ D + float P_avail = ENGINE_POWER * throttle; + float T_dynamic = (P_avail * ETA_PROP) / V; // Thrust from power equation + float T_static = 0.3f * P_avail; // Static thrust limit + float T_mag = fminf(T_static, T_dynamic); // Can't exceed either limit + + // ======================================================================== + // 10. FORCE DIRECTIONS (All in world frame) + // ======================================================================== + Vec3 drag_dir = mul3(vel_norm, -1.0f); // Opposite to velocity + Vec3 thrust_dir = forward; // Along body X-axis (nose) + + // Lift direction: perpendicular to velocity, in plane of velocity & wing + // lift_dir = vel x right, then normalized + // This ensures lift is perpendicular to V and perpendicular to span + Vec3 lift_dir = cross3(vel_norm, right); + float lift_dir_mag = norm3(lift_dir); + if (lift_dir_mag > 0.01f) { + lift_dir = mul3(lift_dir, 1.0f / lift_dir_mag); + } else { + lift_dir = up; // Fallback if velocity parallel to wing (rare) + } + + // ======================================================================== + // 11. WEIGHT (Gravity) + // ======================================================================== + Vec3 weight = vec3(0, 0, -MASS * GRAVITY); // Always -Z in world frame + + // ======================================================================== + // 12. SUM FORCES -> ACCELERATION + // ======================================================================== + Vec3 F_thrust = mul3(thrust_dir, T_mag); + Vec3 F_lift = mul3(lift_dir, L_mag); + Vec3 F_drag = mul3(drag_dir, D_mag); + + // Aerodynamic forces only (what pilot feels - "specific force") + // In level flight: lift ≈ weight, so F_aero_up ≈ m*g, giving g_force ≈ 1.0 + Vec3 F_aero = add3(F_thrust, add3(F_lift, F_drag)); + + // Body-up axis (perpendicular to wings, toward canopy) + Vec3 body_up = quat_rotate(p->ori, vec3(0, 0, 1)); + + // G-force = aero force along body-up / (mass * g) + // This is what the pilot feels (pushed into seat = positive G) + float g_force = dot3(F_aero, body_up) * INV_MASS * INV_GRAVITY; + + // Total force includes weight for actual physics + Vec3 F_total = add3(F_aero, weight); + + // ======================================================================== + // 13. G-LIMIT (Asymmetric for Positive/Negative G) + // ======================================================================== + // Pilots can handle much more positive G (blood to feet, 6G+) than + // negative G (blood to head, -1.5G is very uncomfortable). + // Limit the body-normal acceleration asymmetrically. + Vec3 accel = mul3(F_total, INV_MASS); + + // Asymmetric limits on felt G + if (g_force > G_LIMIT_POS) { + // Positive G exceeded - clamp + float excess = (g_force - G_LIMIT_POS) * GRAVITY; // Excess accel in m/s^2 + accel = sub3(accel, mul3(body_up, excess)); + g_force = G_LIMIT_POS; + } else if (g_force < -G_LIMIT_NEG) { + // Negative G exceeded - clamp (need to ADD acceleration along body_up) + float deficit = (-G_LIMIT_NEG - g_force) * GRAVITY; // How much to add back + accel = add3(accel, mul3(body_up, deficit)); // ADD, not subtract! + g_force = -G_LIMIT_NEG; + } + + if (DEBUG >= 10) printf("=== PHYSICS ===\n"); + if (DEBUG >= 10) printf("speed=%.1f m/s (stall~45, max~159 P-51D)\n", V); + if (DEBUG >= 10) printf("throttle=%.2f\n", throttle); + if (DEBUG >= 10) printf("alpha_body=%.2f deg, alpha_eff=%.2f deg (inc=%.1f, a0=%.1f), C_L=%.3f\n", + alpha * RAD_TO_DEG, alpha_effective * RAD_TO_DEG, + WING_INCIDENCE * RAD_TO_DEG, ALPHA_ZERO * RAD_TO_DEG, C_L); + if (DEBUG >= 10) printf("thrust=%.0f N, lift=%.0f N, drag=%.0f N, weight=%.0f N\n", T_mag, L_mag, D_mag, MASS * GRAVITY); + if (DEBUG >= 10) printf("g_force=%.2f g (limit=+%.1f/-%.1f)\n", g_force, G_LIMIT_POS, G_LIMIT_NEG); + + // ======================================================================== + // 14. INTEGRATION (Semi-implicit Euler) + // ======================================================================== + // v(t+dt) = v(t) + a * dt + // x(t+dt) = x(t) + v(t+dt) * dt (using NEW velocity) + p->vel = add3(p->vel, mul3(accel, dt)); + p->pos = add3(p->pos, mul3(p->vel, dt)); + + p->throttle = throttle; + p->g_force = g_force; // Store for reward calculation +} + +// Simple forward motion for opponent (no physics, just maintains heading) +static inline void step_plane(Plane *p, float dt) { + // Save previous velocity for acceleration calculation + p->prev_vel = p->vel; + + Vec3 forward = quat_rotate(p->ori, vec3(1, 0, 0)); + float speed = norm3(p->vel); + if (speed < 1.0f) speed = 80.0f; + p->vel = mul3(forward, speed); + p->pos = add3(p->pos, mul3(p->vel, dt)); + + if (DEBUG >= 10) printf("=== TARGET ===\n"); + if (DEBUG >= 10) printf("target_speed=%.1f m/s (expected=80)\n", speed); + if (DEBUG >= 10) printf("target_pos=(%.1f, %.1f, %.1f)\n", p->pos.x, p->pos.y, p->pos.z); + if (DEBUG >= 10) printf("target_fwd=(%.2f, %.2f, %.2f)\n", forward.x, forward.y, forward.z); +} + +#endif // FLIGHTLIB_H diff --git a/pufferlib/ocean/dogfight/physics_log.md b/pufferlib/ocean/dogfight/physics_log.md new file mode 100644 index 000000000..7daa0faaf --- /dev/null +++ b/pufferlib/ocean/dogfight/physics_log.md @@ -0,0 +1,25 @@ +# Physics Sanity Log + +Historical record of physics test results at specific commits. + +**P-51D Reference values** (from P51d_REFERENCE_DATA.md): +- Max speed: 159 m/s (355 mph, Military power, sea level) +- Stall speed: 45 m/s (100 mph, 9000 lb, clean config) + +--- + +## How to use + +1. Run tests: `cd pufferlib/ocean/dogfight && python test_flight.py` +2. If at a clean commit worth recording, add entry below +3. Include commit hash from `git rev-parse --short HEAD` + +--- + +## Results + +| Commit | Date | max_speed | stall | climb | L/D | turn_30 | turn_60 | pitch | roll | Notes | +|--------|------|-----------|-------|-------|-----|---------|---------|-------|------|-------| +| P-51D | ref | 159 | 45 | 15 | 14.6| - | - | UP | YES | Reference targets | +| 0116b97c | 2026-01-13 | 86.5 | 75.5 | -4.9 | - | - | - | UP | YES | Old tests, pre-physics fix | +| 1c30c546 | 2026-01-14 | 149.6 | 50 | 16.3 | 14.7 | 2.2 | 9.4 | UP | YES | Coordinated turn tests, 97% eff | diff --git a/pufferlib/ocean/dogfight/test_flight.py b/pufferlib/ocean/dogfight/test_flight.py new file mode 100644 index 000000000..730965a07 --- /dev/null +++ b/pufferlib/ocean/dogfight/test_flight.py @@ -0,0 +1,114 @@ +""" +Physics validation tests for dogfight environment. +Uses force_state() to set exact initial conditions for accurate measurements. + +Run: python pufferlib/ocean/dogfight/test_flight.py + python pufferlib/ocean/dogfight/test_flight.py --render # with visualization + python pufferlib/ocean/dogfight/test_flight.py --render --test pitch_direction # single test + +This is the main entry point that aggregates all test modules: +- test_flight_physics.py: Flight physics tests (speed, climb, turn, G-force) +- test_flight_obs_static.py: Static observation scheme tests +- test_flight_obs_dynamic.py: Dynamic maneuver observation tests +- test_flight_obs_pursuit.py: OBS_PURSUIT (scheme 1) specific tests +- test_flight_energy.py: Energy physics tests (conservation, bleed rates, E-M theory) + +TODO - FLIGHT PHYSICS TESTS NEEDED: +===================================== +1. RUDDER-ONLY TURN TEST (HIGH PRIORITY) + - Current MAX_YAW_RATE = 1.5 rad/s (86 deg/s) is WAY too high + - P-51D rudder should give ~5-15 deg/s yaw rate max, with significant sideslip + - Test: wings level, full rudder, measure actual yaw rate and heading change + - Compare against P-51D flight test data (see P51d_REFERENCE_DATA.md) + - Expected: rudder alone should NOT be effective for turning - need bank + +2. COORDINATED TURN TEST + - Bank to 30, 45, 60 deg and measure sustained turn rate + - P-51D should get ~17.5 deg/s at max sustained (corner velocity) + - Verify turn rate vs bank angle relationship + +3. ROLL RATE TEST + - Full aileron deflection, measure time to roll 90 and 360 deg + - P-51D: ~90-100 deg/s roll rate at 300 mph + +4. PITCH AUTHORITY TEST + - Full elevator, measure pitch rate and G-loading + - Should be speed-dependent (less authority at low speed) +""" + +from test_flight_base import ( + get_args, get_render_mode, + RESULTS, + P51D_MAX_SPEED, P51D_STALL_SPEED, P51D_CLIMB_RATE, +) + +# Import test registries from each module +from test_flight_physics import TESTS as PHYSICS_TESTS +from test_flight_obs_static import TESTS as OBS_STATIC_TESTS +from test_flight_obs_dynamic import TESTS as OBS_DYNAMIC_TESTS +from test_flight_obs_pursuit import TESTS as OBS_PURSUIT_TESTS +from test_flight_energy import TESTS as ENERGY_TESTS + +# Aggregate all tests into a single registry +TESTS = { + **PHYSICS_TESTS, + **OBS_STATIC_TESTS, + **OBS_DYNAMIC_TESTS, + **OBS_PURSUIT_TESTS, + **ENERGY_TESTS, +} + + +def print_summary(): + """Print summary table.""" + print("\n" + "=" * 60) + print("SUMMARY") + print("=" * 60) + + def fmt(key): + v = RESULTS.get(key) + if v is None: + return 'N/A' + if isinstance(v, float): + return f"{v:.1f}" + return str(v) + + print(f"| Metric | Result | P-51D Target |") + print(f"|----------------|--------|--------------|") + print(f"| max_speed | {fmt('max_speed'):>6} | {P51D_MAX_SPEED:.0f} m/s |") + print(f"| cruise_speed | {fmt('cruise_speed'):>6} | - |") + print(f"| stall_speed | {fmt('stall_speed'):>6} | {P51D_STALL_SPEED:.0f} m/s |") + print(f"| climb_rate | {fmt('climb_rate'):>6} | {P51D_CLIMB_RATE:.0f} m/s |") + print(f"| glide_L/D | {fmt('glide_LD'):>6} | 14.6 |") + print(f"| turn_rate | {fmt('turn_rate'):>6} | 5.6 deg/s (45 deg bank) |") + print(f"| rudder_yaw | {fmt('rudder_yaw_rate'):>6} | 5-15 deg/s (wings lvl) |") + print(f"| pitch_dir | {fmt('pitch_direction'):>6} | DOWN (+elev) |") + print(f"| roll_works | {fmt('roll_works'):>6} | YES |") + + +if __name__ == "__main__": + args = get_args() + + print("P-51D Physics Validation Tests") + print("=" * 60) + + if args.test: + # Run single test + if args.test in TESTS: + print(f"Running single test: {args.test}") + if get_render_mode(): + print("Rendering enabled - press ESC to exit") + print("=" * 60) + TESTS[args.test]() + else: + print(f"Unknown test: {args.test}") + print(f"Available tests: {', '.join(sorted(TESTS.keys()))}") + else: + # Run all tests + print("Using force_state() for precise initial conditions") + if get_render_mode(): + print("Rendering enabled - press ESC to exit") + print("=" * 60) + for test_func in TESTS.values(): + test_func() + print_summary() diff --git a/pufferlib/ocean/dogfight/test_flight_base.py b/pufferlib/ocean/dogfight/test_flight_base.py new file mode 100644 index 000000000..714961f3c --- /dev/null +++ b/pufferlib/ocean/dogfight/test_flight_base.py @@ -0,0 +1,170 @@ +""" +Shared infrastructure for dogfight flight physics tests. +Importable by all test modules. + +Run: python pufferlib/ocean/dogfight/test_flight.py + python pufferlib/ocean/dogfight/test_flight.py --render # with visualization + python pufferlib/ocean/dogfight/test_flight.py --render --test pitch_direction # single test +""" +import argparse +import numpy as np + + +def parse_args(): + parser = argparse.ArgumentParser(description='P-51D Physics Validation Tests') + parser.add_argument('--render', action='store_true', help='Enable visual rendering') + parser.add_argument('--fps', type=int, default=50, help='Target FPS when rendering (default 50 = real-time, try 5-10 for slow-mo)') + parser.add_argument('--test', type=str, default=None, help='Run specific test only') + return parser.parse_args() + + +# Parse args once at module load - can be overridden by test modules +_ARGS = None + +def get_args(): + """Get parsed args, parsing only once.""" + global _ARGS + if _ARGS is None: + _ARGS = parse_args() + return _ARGS + + +def get_render_mode(): + """Get render mode from args.""" + args = get_args() + return 'human' if args.render else None + + +def get_render_fps(): + """Get render FPS from args.""" + args = get_args() + return args.fps if args.render else None + + +# Constants (must match dogfight.h) +MAX_SPEED = 250.0 +WORLD_MAX_Z = 3000.0 +WORLD_HALF_X = 5000.0 +WORLD_HALF_Y = 5000.0 +GUN_RANGE = 1000.0 + +# Tolerance for observation tests +OBS_ATOL = 0.05 # Absolute tolerance +OBS_RTOL = 0.1 # Relative tolerance + +# P-51D reference values (from P51d_REFERENCE_DATA.md) +P51D_MAX_SPEED = 159.0 # m/s (355 mph, Military power, SL) +P51D_STALL_SPEED = 45.0 # m/s (100 mph, 9000 lb, clean) +P51D_CLIMB_RATE = 15.4 # m/s (3030 ft/min, Military power) +P51D_TURN_RATE = 17.5 # deg/s at max sustained turn (DCS testing data) + +# PID values for level flight autopilot (found via pid_sweep.py) +# These give stable level flight with vz_std < 0.3 m/s +LEVEL_FLIGHT_KP = 0.001 # Proportional gain on vz error +LEVEL_FLIGHT_KD = 0.001 # Derivative gain (damping) + +# Shared results dictionary for summary +RESULTS = {} + +# Observation indices to highlight for each test (scheme 0 - ANGLES) +# These are the key observations to watch during visual inspection +# Scheme 0: px(0), py(1), pz(2), speed(3), pitch(4), roll(5), yaw(6), tgt_az(7), tgt_el(8), dist(9), closure(10), opp_hdg(11) +TEST_HIGHLIGHTS = { + 'knife_edge_pull': [4, 5, 6], # pitch, roll, yaw - watch yaw change, roll should stay ~90° + 'knife_edge_flight': [4, 5, 6], # pitch, roll, yaw - watch altitude loss and yaw authority + 'sustained_turn': [4, 5], # pitch, roll - watch bank angle + 'turn_60': [4, 5], # pitch, roll - 60° bank turn + 'pitch_direction': [4], # pitch - confirm direction matches input + 'roll_direction': [5], # roll - confirm direction matches input + 'rudder_only_turn': [6], # yaw - watch yaw rate + 'g_level_flight': [4], # pitch - should stay near 0 + 'g_push_forward': [4], # pitch - pushing forward + 'g_pull_back': [4], # pitch - pulling back + 'g_limit_negative': [4, 5], # pitch, roll - negative G limit + 'g_limit_positive': [4, 5], # pitch, roll - positive G limit + 'climb_rate': [2, 4], # pz (altitude), pitch + 'glide_ratio': [2, 3], # pz (altitude), speed + 'stall_speed': [3], # speed - watch it decrease +} + + +def setup_highlights(env, test_name): + """Set observation highlights if this test has them defined and rendering is enabled.""" + if get_render_mode() and test_name in TEST_HIGHLIGHTS: + env.set_obs_highlight(TEST_HIGHLIGHTS[test_name]) + + +# ============================================================================= +# State accessor functions using get_state() (independent of obs_scheme) +# ============================================================================= + +def get_speed_from_state(env): + """Get total speed from raw state.""" + s = env.get_state() + return np.sqrt(s['vx']**2 + s['vy']**2 + s['vz']**2) + + +def get_vz_from_state(env): + """Get vertical velocity from raw state.""" + return env.get_state()['vz'] + + +def get_alt_from_state(env): + """Get altitude from raw state.""" + return env.get_state()['pz'] + + +def get_up_vector_from_state(env): + """Get up vector from raw state.""" + s = env.get_state() + return s['up_x'], s['up_y'], s['up_z'] + + +def get_velocity_from_state(env): + """Get velocity vector from raw state.""" + s = env.get_state() + return s['vx'], s['vy'], s['vz'] + + +def level_flight_pitch_from_state(env, kp=LEVEL_FLIGHT_KP, kd=LEVEL_FLIGHT_KD): + """ + PD autopilot for level flight (vz = 0). + Uses tuned PID values from pid_sweep.py for stable flight. + """ + vz = get_vz_from_state(env) + # Negative because: if climbing (vz>0), need nose down (negative elevator) + elevator = -kp * vz - kd * vz + return np.clip(elevator, -0.2, 0.2) + + +# ============================================================================= +# Legacy functions (use observations - for obs_scheme testing only) +# ============================================================================= + +def get_speed(obs): + """Get total speed from observation (LEGACY - assumes WORLD_FRAME).""" + vx = obs[0, 3] * MAX_SPEED + vy = obs[0, 4] * MAX_SPEED + vz = obs[0, 5] * MAX_SPEED + return np.sqrt(vx**2 + vy**2 + vz**2) + + +def get_vz(obs): + """Get vertical velocity from observation (LEGACY - assumes WORLD_FRAME).""" + return obs[0, 5] * MAX_SPEED + + +def get_alt(obs): + """Get altitude from observation (LEGACY - assumes WORLD_FRAME).""" + return obs[0, 2] * WORLD_MAX_Z + + +def level_flight_pitch(obs, kp=LEVEL_FLIGHT_KP, kd=LEVEL_FLIGHT_KD): + """ + PD autopilot for level flight (vz = 0). LEGACY - assumes WORLD_FRAME. + Uses tuned PID values from pid_sweep.py for stable flight. + """ + vz = get_vz(obs) + # Negative because: if climbing (vz>0), need nose down (negative elevator) + elevator = -kp * vz - kd * vz + return np.clip(elevator, -0.2, 0.2) diff --git a/pufferlib/ocean/dogfight/test_flight_energy.py b/pufferlib/ocean/dogfight/test_flight_energy.py new file mode 100644 index 000000000..ffe647de6 --- /dev/null +++ b/pufferlib/ocean/dogfight/test_flight_energy.py @@ -0,0 +1,783 @@ +""" +Energy physics tests for dogfight environment. +Tests energy conservation, bleed rates, and E-M theory concepts. + +Key Physics: + Specific Energy: Es = h + v^2/(2g) [meters of altitude equivalent] + Kinetic Energy: KE = 0.5 * m * v^2 + Potential Energy: PE = m * g * h + Total Energy: E = KE + PE = m * (g*h + 0.5*v^2) + + Specific Excess Power: Ps = (T - D) * V / W [m/s rate of energy change] + + In a turn at bank angle phi: + Required lift: L = W / cos(phi) + Load factor: n = 1 / cos(phi) + Induced drag increases with n^2 + +Run: python pufferlib/ocean/dogfight/test_flight_energy.py + python pufferlib/ocean/dogfight/test_flight_energy.py --render --fps 10 + python pufferlib/ocean/dogfight/test_flight_energy.py --test knife_edge_pull_energy +""" +import numpy as np +from dogfight import Dogfight + +from test_flight_base import ( + get_render_mode, get_render_fps, setup_highlights, + RESULTS, TEST_HIGHLIGHTS, + get_speed_from_state, get_alt_from_state, +) + +# Physics constants +G = 9.81 # m/s^2 +MASS = 4082 # kg (P-51D loaded weight) + + +def compute_specific_energy(speed, altitude): + """ + Compute specific energy (energy per unit weight). + Es = h + v^2/(2g) [meters of altitude equivalent] + + This is the total mechanical energy expressed as equivalent altitude. + A plane at 1000m going 100 m/s has Es = 1000 + 100^2/(2*9.81) = 1510m + """ + return altitude + (speed ** 2) / (2 * G) + + +def compute_energies(speed, altitude): + """ + Compute kinetic, potential, and total energy. + Returns (KE, PE, Total) in Joules. + """ + ke = 0.5 * MASS * speed ** 2 + pe = MASS * G * altitude + total = ke + pe + return ke, pe, total + + +def get_energy_state(env): + """Get current energy state from environment.""" + state = env.get_state() + speed = np.sqrt(state['vx']**2 + state['vy']**2 + state['vz']**2) + alt = state['pz'] + + ke, pe, total = compute_energies(speed, alt) + es = compute_specific_energy(speed, alt) + + return { + 'speed': speed, + 'alt': alt, + 'ke': ke, # Kinetic energy (J) + 'pe': pe, # Potential energy (J) + 'total': total, # Total energy (J) + 'es': es, # Specific energy (m) + 'vz': state['vz'], + } + + +# ============================================================================= +# ENERGY TESTS +# ============================================================================= + +def test_knife_edge_pull_energy(): + """ + Knife-edge (90 deg bank) + full elevator pull + zero throttle. + + This is a HIGH DRAG scenario: + - 90 deg bank: wings vertical, no vertical lift + - Full elevator pull: high angle of attack = massive induced drag + - Zero throttle: no thrust to offset drag + + Expected: + - Kinetic energy drops (drag slows plane) + - Potential energy drops (no lift, plane falls) + - Total energy drops RAPIDLY (both components bleeding) + + This tests that high-G maneuvers correctly penalize energy. + """ + env = Dogfight(num_envs=1, render_mode=get_render_mode(), render_fps=get_render_fps()) + env.reset() + + # Set up knife-edge: 90 deg right roll + roll_90 = np.radians(90) + qw = np.cos(roll_90 / 2) + qx = -np.sin(roll_90 / 2) # Negative for right roll + + # Start at good speed and altitude + V = 150.0 # m/s - high speed for dramatic effect + env.force_state( + player_pos=(0, 0, 2000), + player_vel=(V, 0, 0), + player_ori=(qw, qx, 0.0, 0.0), + player_throttle=0.0, # ZERO THROTTLE + ) + + # Record initial energy state + initial = get_energy_state(env) + + print(f" Initial state:") + print(f" Speed: {initial['speed']:.1f} m/s") + print(f" Altitude: {initial['alt']:.1f} m") + print(f" Specific Energy: {initial['es']:.1f} m") + print(f" KE: {initial['ke']/1e6:.2f} MJ, PE: {initial['pe']/1e6:.2f} MJ") + + # Run with full elevator pull, zero throttle + data = [] + for step in range(150): # 3 seconds + # Zero throttle (-1 maps to 0%), full pull (-1), no aileron, no rudder + action = np.array([[-1.0, -1.0, 0.0, 0.0, 0.0]], dtype=np.float32) + _, _, term, _, _ = env.step(action) + + e = get_energy_state(env) + data.append(e) + + if step % 50 == 0: + print(f" Step {step:3d}: speed={e['speed']:.1f}, alt={e['alt']:.0f}, Es={e['es']:.0f}m") + + if term[0]: + print(f" (terminated at step {step})") + break + + # Analyze energy changes + final = data[-1] + + speed_loss = initial['speed'] - final['speed'] + alt_loss = initial['alt'] - final['alt'] + ke_loss = initial['ke'] - final['ke'] + pe_loss = initial['pe'] - final['pe'] + total_loss = initial['total'] - final['total'] + es_loss = initial['es'] - final['es'] + + # Calculate rates + time_elapsed = len(data) * 0.02 + es_bleed_rate = es_loss / time_elapsed # m/s of specific energy + + print(f"\n Final state after {time_elapsed:.1f}s:") + print(f" Speed: {final['speed']:.1f} m/s (lost {speed_loss:.1f})") + print(f" Altitude: {final['alt']:.1f} m (lost {alt_loss:.1f})") + print(f" Specific Energy: {final['es']:.1f} m (lost {es_loss:.1f})") + print(f" KE loss: {ke_loss/1e6:.2f} MJ, PE loss: {pe_loss/1e6:.2f} MJ") + print(f" Energy bleed rate: {es_bleed_rate:.1f} m/s of Es") + + # Verify ALL energies decreased + ke_dropped = ke_loss > 0 + pe_dropped = pe_loss > 0 + total_dropped = total_loss > 0 + + # Should lose significant energy (at least 100m of Es in 3 seconds) + significant_loss = es_loss > 100 + + passed = ke_dropped and pe_dropped and total_dropped and significant_loss + RESULTS['knife_edge_pull_energy'] = passed + + status = "OK" if passed else "FAIL" + print(f"\nknife_pull_E: KE={'DROP' if ke_dropped else 'RISE'}, PE={'DROP' if pe_dropped else 'RISE'}, " + f"Es_loss={es_loss:.0f}m [{status}]") + + if not ke_dropped: + print(f" FAIL: Kinetic energy should DROP (drag!), but it increased") + if not pe_dropped: + print(f" FAIL: Potential energy should DROP (no lift!), but it increased") + if not significant_loss: + print(f" FAIL: Should lose >100m of Es in high-drag maneuver, only lost {es_loss:.0f}m") + + env.close() + return passed + + +def test_energy_level_flight(): + """ + Level flight at cruise: energy should be roughly constant. + + With throttle balanced against drag, Ps ≈ 0, so total energy + should remain stable (small fluctuations from autopilot corrections). + """ + env = Dogfight(num_envs=1, render_mode=get_render_mode(), render_fps=get_render_fps()) + env.reset() + + # Start at cruise speed, level + V = 120.0 + env.force_state( + player_pos=(0, 0, 1500), + player_vel=(V, 0, 0), + player_throttle=0.5, + ) + + initial = get_energy_state(env) + energies = [initial['es']] + + # Simple level flight autopilot + prev_vz = 0 + kp, kd = 0.001, 0.001 + + for step in range(500): # 10 seconds + state = env.get_state() + vz = state['vz'] + elevator = -kp * vz - kd * (vz - prev_vz) / 0.02 + elevator = np.clip(elevator, -0.2, 0.2) + prev_vz = vz + + action = np.array([[0.0, elevator, 0.0, 0.0, 0.0]], dtype=np.float32) + env.step(action) + + e = get_energy_state(env) + energies.append(e['es']) + + final = get_energy_state(env) + es_change = final['es'] - initial['es'] + es_std = np.std(energies) + + # Energy should be stable (change < 50m, std < 20m) + stable = abs(es_change) < 50 and es_std < 30 + + RESULTS['energy_level_flight'] = stable + status = "OK" if stable else "CHECK" + print(f"energy_level: Es_change={es_change:+.1f}m, std={es_std:.1f}m [{status}]") + + env.close() + return stable + + +def test_energy_dive_acceleration(): + """ + Dive at 45 degrees, zero throttle: potential -> kinetic conversion. + + Total energy should decrease slowly (drag), but kinetic should + increase as potential decreases (trading altitude for speed). + """ + env = Dogfight(num_envs=1, render_mode=get_render_mode(), render_fps=get_render_fps()) + env.reset() + + # 45 degree dive + pitch_down = np.radians(-45) + qw = np.cos(pitch_down / 2) + qy = -np.sin(pitch_down / 2) + + env.force_state( + player_pos=(0, 0, 2500), + player_vel=(80, 0, 0), # Start slow + player_ori=(qw, 0, qy, 0), + player_throttle=0.0, + ) + + initial = get_energy_state(env) + + for step in range(200): # 4 seconds + action = np.array([[-1.0, 0.0, 0.0, 0.0, 0.0]], dtype=np.float32) + _, _, term, _, _ = env.step(action) + if term[0] or env.get_state()['pz'] < 500: + break + + final = get_energy_state(env) + + speed_gain = final['speed'] - initial['speed'] + alt_loss = initial['alt'] - final['alt'] + ke_gain = final['ke'] - initial['ke'] + pe_loss = initial['pe'] - final['pe'] + es_loss = initial['es'] - final['es'] + + # Verify energy transfer + ke_increased = ke_gain > 0 + pe_decreased = pe_loss > 0 + # Total should decrease (drag), but not by much + es_loss_reasonable = 0 < es_loss < alt_loss * 0.3 # Less than 30% to drag + + passed = ke_increased and pe_decreased and es_loss_reasonable + RESULTS['energy_dive'] = passed + + status = "OK" if passed else "CHECK" + print(f"energy_dive: speed+{speed_gain:.0f}, alt-{alt_loss:.0f}, Es_loss={es_loss:.0f}m ({100*es_loss/alt_loss:.0f}% to drag) [{status}]") + + env.close() + return passed + + +def test_energy_climb_deceleration(): + """ + Climb at 30 degrees, full throttle: kinetic -> potential conversion. + + With full throttle, should gain altitude while losing some speed, + but total energy should increase (thrust > drag). + """ + env = Dogfight(num_envs=1, render_mode=get_render_mode(), render_fps=get_render_fps()) + env.reset() + + # 30 degree climb + pitch_up = np.radians(30) + qw = np.cos(-pitch_up / 2) + qy = np.sin(-pitch_up / 2) + + env.force_state( + player_pos=(0, 0, 1000), + player_vel=(140, 0, 0), # Start fast + player_ori=(qw, 0, qy, 0), + player_throttle=1.0, + ) + + initial = get_energy_state(env) + + for step in range(300): # 6 seconds + action = np.array([[1.0, 0.0, 0.0, 0.0, 0.0]], dtype=np.float32) + _, _, term, _, _ = env.step(action) + if term[0]: + break + + final = get_energy_state(env) + + speed_loss = initial['speed'] - final['speed'] + alt_gain = final['alt'] - initial['alt'] + es_change = final['es'] - initial['es'] + + # With full throttle in climb, total energy should increase or stay stable + # (Thrust provides Ps > 0) + energy_ok = es_change > -50 # Allow small loss from drag + alt_gained = alt_gain > 100 + + passed = energy_ok and alt_gained + RESULTS['energy_climb'] = passed + + status = "OK" if passed else "CHECK" + print(f"energy_climb: speed-{speed_loss:.0f}, alt+{alt_gain:.0f}, Es_change={es_change:+.0f}m [{status}]") + + env.close() + return passed + + +def test_energy_sustained_turn_bleed(): + """ + Sustained turn at 60 degrees bank: measure energy bleed rate. + + In a banked turn, induced drag increases with load factor squared. + At 60 deg bank, n = 2.0, so induced drag is 4x level flight. + Even with full throttle, energy should bleed. + """ + env = Dogfight(num_envs=1, render_mode=get_render_mode(), render_fps=get_render_fps()) + env.reset() + + # 60 degree right bank + bank = np.radians(60) + qw = np.cos(bank / 2) + qx = -np.sin(bank / 2) + + env.force_state( + player_pos=(0, 0, 1500), + player_vel=(120, 0, 0), + player_ori=(qw, qx, 0.0, 0.0), + player_throttle=1.0, + ) + + initial = get_energy_state(env) + + # PID to hold bank and altitude + prev_vz = 0 + prev_bank_err = 0 + + energies = [] + for step in range(250): # 5 seconds + state = env.get_state() + vz = state['vz'] + up_y, up_z = state['up_y'], state['up_z'] + bank_actual = np.arccos(np.clip(up_z, -1, 1)) + + # Elevator to hold altitude + elev = -0.05 * (-vz) + 0.005 * (vz - prev_vz) / 0.02 + elev = np.clip(elev, -1.0, 1.0) + prev_vz = vz + + # Aileron to hold bank + bank_err = bank - bank_actual + ail = -2.0 * bank_err - 0.1 * (bank_err - prev_bank_err) / 0.02 + ail = np.clip(ail, -1.0, 1.0) + prev_bank_err = bank_err + + action = np.array([[1.0, elev, ail, 0.0, 0.0]], dtype=np.float32) + env.step(action) + + e = get_energy_state(env) + energies.append(e['es']) + + final = get_energy_state(env) + es_loss = initial['es'] - final['es'] + time_elapsed = len(energies) * 0.02 + bleed_rate = es_loss / time_elapsed + + # At 60 deg bank (2G), should lose energy even at full throttle + # Expect 5-20 m/s of Es bleed + bleeding = es_loss > 10 + + RESULTS['energy_turn_bleed'] = bleeding + status = "OK" if bleeding else "CHECK" + print(f"energy_turn: Es_loss={es_loss:.0f}m in {time_elapsed:.1f}s, bleed={bleed_rate:.1f} m/s [{status}]") + + if not bleeding: + print(f" NOTE: 60 deg turn should bleed energy (high induced drag)") + + env.close() + return bleeding + + +def test_energy_loop(): + """ + Full loop maneuver: measure total energy loss. + + A loop involves sustained high-G (3-4G at bottom), which creates + massive induced drag. Energy should drop 10-20% through a loop. + """ + env = Dogfight(num_envs=1, render_mode=get_render_mode(), render_fps=get_render_fps()) + env.reset() + + # Start fast and level for loop entry + env.force_state( + player_pos=(0, 0, 1500), + player_vel=(150, 0, 0), + player_throttle=1.0, + ) + + initial = get_energy_state(env) + + # Pull through loop (full back stick) + g_max = 0 + for step in range(200): # ~4 seconds for loop + action = np.array([[1.0, -0.8, 0.0, 0.0, 0.0]], dtype=np.float32) # Strong pull + env.step(action) + + g = env.get_state()['g_force'] + g_max = max(g_max, g) + + # Check if we've completed loop (fwd_z goes negative then positive again) + state = env.get_state() + if step > 50 and state['fwd_z'] > -0.1 and state['fwd_x'] > 0.5: + break + + final = get_energy_state(env) + es_loss = initial['es'] - final['es'] + pct_loss = 100 * es_loss / initial['es'] + + # Loop should lose 5-25% energy from drag + energy_lost = 5 < pct_loss < 35 + + RESULTS['energy_loop'] = energy_lost + status = "OK" if energy_lost else "CHECK" + print(f"energy_loop: Es_loss={es_loss:.0f}m ({pct_loss:.1f}%), max_G={g_max:.1f} [{status}]") + + env.close() + return energy_lost + + +def test_energy_split_s(): + """ + Split-S: half roll + pull through (dive recovery). + + Trades altitude for speed. Total energy decreases (drag during pull), + but kinetic energy increases significantly. + """ + env = Dogfight(num_envs=1, render_mode=get_render_mode(), render_fps=get_render_fps()) + env.reset() + + # Start high and slow + env.force_state( + player_pos=(0, 0, 2500), + player_vel=(100, 0, 0), + player_throttle=0.5, + ) + + initial = get_energy_state(env) + + # Phase 1: Half roll (invert) + for step in range(25): + action = np.array([[0.5, 0.0, 1.0, 0.0, 0.0]], dtype=np.float32) + env.step(action) + + # Phase 2: Pull through (now inverted, pull = dive down then back up) + for step in range(150): + action = np.array([[1.0, -1.0, 0.0, 0.0, 0.0]], dtype=np.float32) + _, _, term, _, _ = env.step(action) + + state = env.get_state() + # Stop when nose is back above horizon + if state['fwd_z'] > 0.3 and state['pz'] < initial['alt']: + break + if term[0]: + break + + final = get_energy_state(env) + + speed_gain = final['speed'] - initial['speed'] + alt_loss = initial['alt'] - final['alt'] + es_loss = initial['es'] - final['es'] + + # Split-S should gain speed while losing altitude + speed_increased = speed_gain > 20 + alt_decreased = alt_loss > 200 + + passed = speed_increased and alt_decreased + RESULTS['energy_split_s'] = passed + + status = "OK" if passed else "CHECK" + print(f"energy_split_s: speed+{speed_gain:.0f}, alt-{alt_loss:.0f}, Es_loss={es_loss:.0f}m [{status}]") + + env.close() + return passed + + +def test_energy_zoom_climb(): + """ + Zoom climb: trade kinetic for potential energy. + + Start already pointing straight up with vertical velocity. + Zero throttle - pure kinetic -> potential conversion. + Tests energy conservation with only drag losses. + """ + env = Dogfight(num_envs=1, render_mode=get_render_mode(), render_fps=get_render_fps()) + env.reset() + + # Start vertical: 90 deg pitch up + pitch_up = np.radians(90) + qw = np.cos(-pitch_up / 2) + qy = np.sin(-pitch_up / 2) + + V_start = 150.0 # Good starting speed + env.force_state( + player_pos=(0, 0, 500), + player_vel=(0, 0, V_start), # Velocity straight UP + player_ori=(qw, 0, qy, 0), # Nose pointing UP + player_throttle=0.0, + ) + + initial = get_energy_state(env) + + # Theoretical max altitude gain (no drag): dh = v^2/(2g) + theoretical_gain = V_start**2 / (2 * G) + + max_alt = initial['alt'] + + # Coast up with zero throttle, zero controls + for step in range(400): + action = np.array([[-1.0, 0.0, 0.0, 0.0, 0.0]], dtype=np.float32) + env.step(action) + + state = env.get_state() + max_alt = max(max_alt, state['pz']) + + # Stop when we start falling + if state['vz'] < 0: + break + + alt_gain = max_alt - initial['alt'] + efficiency = 100 * alt_gain / theoretical_gain + + final = get_energy_state(env) + es_loss = initial['es'] - final['es'] + + # Should convert at least 70% of kinetic to potential (only drag losses) + efficient = efficiency > 65 + + RESULTS['energy_zoom'] = efficient + status = "OK" if efficient else "CHECK" + print(f"energy_zoom: alt_gain={alt_gain:.0f}m (theory={theoretical_gain:.0f}m, eff={efficiency:.0f}%) [{status}]") + + env.close() + return efficient + + +def test_energy_throttle_effect(): + """ + Test that throttle controls energy rate (Specific Excess Power). + + At constant speed/altitude: + - Full throttle: Ps > 0 (can accelerate or climb) + - Zero throttle: Ps < 0 (will decelerate or sink) + """ + env = Dogfight(num_envs=1, render_mode=get_render_mode(), render_fps=get_render_fps()) + + results = {} + + for throttle_name, throttle_val in [('full', 1.0), ('half', 0.0), ('zero', -1.0)]: + env.reset() + + # Start at cruise speed, level + env.force_state( + player_pos=(0, 0, 1500), + player_vel=(120, 0, 0), + player_throttle=0.5, + ) + + initial = get_energy_state(env) + + # Hold level flight with given throttle + prev_vz = 0 + for step in range(200): # 4 seconds + state = env.get_state() + vz = state['vz'] + elev = -0.001 * vz - 0.001 * (vz - prev_vz) / 0.02 + prev_vz = vz + + action = np.array([[throttle_val, elev, 0.0, 0.0, 0.0]], dtype=np.float32) + env.step(action) + + final = get_energy_state(env) + es_change = final['es'] - initial['es'] + results[throttle_name] = es_change + + # Verify throttle effect on energy + full_positive = results['full'] > results['half'] + zero_negative = results['zero'] < results['half'] + + passed = full_positive and zero_negative + RESULTS['energy_throttle'] = passed + + status = "OK" if passed else "CHECK" + print(f"energy_throttle: full={results['full']:+.0f}m, half={results['half']:+.0f}m, zero={results['zero']:+.0f}m [{status}]") + + env.close() + return passed + + +def test_energy_high_g_bleed(): + """ + Compare energy bleed at different G levels. + + Higher G = more induced drag = faster energy bleed. + Tests at 2G, 4G, 6G pulls. + """ + env = Dogfight(num_envs=1, render_mode=get_render_mode(), render_fps=get_render_fps()) + + results = {} + + # Different elevator settings for different G levels + for g_target, elev in [('2G', -0.3), ('4G', -0.6), ('6G', -1.0)]: + env.reset() + + env.force_state( + player_pos=(0, 0, 2000), + player_vel=(150, 0, 0), + player_throttle=1.0, # Full throttle + ) + + initial = get_energy_state(env) + g_values = [] + + for step in range(50): # 1 second + action = np.array([[1.0, elev, 0.0, 0.0, 0.0]], dtype=np.float32) + env.step(action) + g_values.append(env.get_state()['g_force']) + + final = get_energy_state(env) + es_loss = initial['es'] - final['es'] + avg_g = np.mean(g_values) + + results[g_target] = {'es_loss': es_loss, 'avg_g': avg_g} + + # Higher G should mean more energy loss + bleed_increases = (results['2G']['es_loss'] < results['4G']['es_loss'] < results['6G']['es_loss']) + + RESULTS['energy_g_bleed'] = bleed_increases + status = "OK" if bleed_increases else "CHECK" + + print(f"energy_g_bleed:") + for g_target in ['2G', '4G', '6G']: + r = results[g_target] + print(f" {g_target}: avg_G={r['avg_g']:.1f}, Es_loss={r['es_loss']:.0f}m") + print(f" Higher G = more bleed: {bleed_increases} [{status}]") + + env.close() + return bleed_increases + + +def test_sideslip_drag(): + """ + Test that sideslip creates additional drag. + + Full rudder should build up sideslip (yaw_from_rudder), which adds drag. + Compare energy loss with and without rudder input. + """ + env = Dogfight(num_envs=1, render_mode=get_render_mode(), render_fps=get_render_fps()) + + results = {} + + for test_name, rudder_input in [('no_rudder', 0.0), ('full_rudder', 1.0)]: + env.reset() + + env.force_state( + player_pos=(0, 0, 1500), + player_vel=(120, 0, 0), + player_throttle=0.0, # Zero throttle to isolate drag effect + ) + + initial = get_energy_state(env) + + # Hold wings level with aileron, apply rudder + prev_roll = 0 + for step in range(150): # 3 seconds + state = env.get_state() + up_y, up_z = state['up_y'], state['up_z'] + roll = np.arctan2(up_y, up_z) + + # Wings level PID + aileron = 1.0 * (0 - roll) - 0.05 * (roll - prev_roll) / 0.02 + aileron = np.clip(aileron, -1.0, 1.0) + prev_roll = roll + + action = np.array([[-1.0, 0.0, aileron, rudder_input, 0.0]], dtype=np.float32) + env.step(action) + + final = get_energy_state(env) + results[test_name] = initial['es'] - final['es'] + + # Rudder should cause MORE energy loss due to sideslip drag + more_drag_with_rudder = results['full_rudder'] > results['no_rudder'] + 5 + + RESULTS['sideslip_drag'] = more_drag_with_rudder + status = "OK" if more_drag_with_rudder else "FAIL" + + diff = results['full_rudder'] - results['no_rudder'] + print(f"sideslip_drag: no_rudder={results['no_rudder']:.0f}m, full_rudder={results['full_rudder']:.0f}m, diff={diff:+.0f}m [{status}]") + + if not more_drag_with_rudder: + print(f" FAIL: Full rudder should create more drag from sideslip") + + env.close() + return more_drag_with_rudder + + +# Test registry for this module +TESTS = { + 'sideslip_drag': test_sideslip_drag, + 'knife_edge_pull_energy': test_knife_edge_pull_energy, + 'energy_level_flight': test_energy_level_flight, + 'energy_dive': test_energy_dive_acceleration, + 'energy_climb': test_energy_climb_deceleration, + 'energy_turn_bleed': test_energy_sustained_turn_bleed, + 'energy_loop': test_energy_loop, + 'energy_split_s': test_energy_split_s, + 'energy_zoom': test_energy_zoom_climb, + 'energy_throttle': test_energy_throttle_effect, + 'energy_g_bleed': test_energy_high_g_bleed, +} + + +if __name__ == "__main__": + from test_flight_base import get_args + args = get_args() + + print("Energy Physics Tests") + print("=" * 60) + + if args.test: + if args.test in TESTS: + print(f"Running single test: {args.test}") + if get_render_mode(): + print("Rendering enabled - press ESC to exit") + print("=" * 60) + TESTS[args.test]() + else: + print(f"Unknown test: {args.test}") + print(f"Available tests: {', '.join(TESTS.keys())}") + else: + print("Running all energy tests") + if get_render_mode(): + print("Rendering enabled - press ESC to exit") + print("=" * 60) + for test_func in TESTS.values(): + test_func() + print() # Blank line between tests diff --git a/pufferlib/ocean/dogfight/test_flight_obs_dynamic.py b/pufferlib/ocean/dogfight/test_flight_obs_dynamic.py new file mode 100644 index 000000000..ee5d9e5d0 --- /dev/null +++ b/pufferlib/ocean/dogfight/test_flight_obs_dynamic.py @@ -0,0 +1,691 @@ +""" +Dynamic maneuver observation tests for dogfight environment. +Tests observation continuity and bounds during active flight maneuvers. + +Run: python pufferlib/ocean/dogfight/test_flight_obs_dynamic.py --test obs_during_loop +""" +import numpy as np +from dogfight import Dogfight + +from test_flight_base import ( + get_render_mode, get_render_fps, + RESULTS, +) +from test_flight_obs_static import obs_continuity_check + + +def test_obs_during_loop(): + """ + Full inside loop maneuver - verify observations during complete pitch cycle. + + Purpose: Ensure Euler angle observations (pitch) smoothly transition through + full range [-1, 1] during a loop without discontinuities. + + Expected behavior: + - Pitch sweeps through full range (0 -> -0.5 (nose up 90deg) -> +/-1 (inverted) -> +0.5 -> 0) + - Roll stays near 0 throughout (wings level loop) + - No sudden jumps in any observation (discontinuity = bug) + + This tests the quaternion->euler conversion under continuous rotation. + """ + env = Dogfight(num_envs=1, obs_scheme=0, render_mode=get_render_mode(), render_fps=get_render_fps()) + env.reset() + + # Start with good speed at safe altitude, target ahead to avoid edge cases + env.force_state( + player_pos=(0, 0, 1500), + player_vel=(150, 0, 0), # Fast for complete loop + player_throttle=1.0, + opponent_pos=(1000, 0, 1500), # Target ahead + opponent_vel=(100, 0, 0), + ) + + pitches = [] + rolls = [] + prev_obs = None + continuity_errors = [] + + for step in range(350): # ~7 seconds should complete most of loop + action = np.array([[1.0, -0.8, 0.0, 0.0, 0.0]], dtype=np.float32) # Full throttle, strong pull + env.step(action) + obs = env.observations[0] + + pitches.append(obs[4]) # pitch + rolls.append(obs[5]) # roll + + # Check continuity + passed, err = obs_continuity_check(obs, prev_obs, step) + if not passed: + continuity_errors.append(err) + prev_obs = obs.copy() + + # Check termination (might hit bounds) + state = env.get_state() + if state['pz'] < 100: + break + + # Analysis + pitch_range = max(pitches) - min(pitches) + max_roll_drift = max(abs(r) for r in rolls) + + # Verify: + # 1. Pitch spans significant range (at least 0.8 of [-1, 1] = 1.6) + # 2. Roll stays bounded (less than 0.4 drift from wings level) + # 3. No discontinuities + + pitch_ok = pitch_range > 0.8 # Should cover most of the range + roll_ok = max_roll_drift < 0.4 # Wings should stay relatively level + continuity_ok = len(continuity_errors) == 0 + + all_ok = pitch_ok and roll_ok and continuity_ok + RESULTS['obs_loop'] = all_ok + status = "OK" if all_ok else "CHECK" + + print(f"obs_loop: pitch_range={pitch_range:.2f}, roll_drift={max_roll_drift:.2f}, errors={len(continuity_errors)} [{status}]") + + if not pitch_ok: + print(f" WARNING: Pitch range {pitch_range:.2f} < 0.8 - loop may be incomplete") + if not roll_ok: + print(f" WARNING: Roll drifted {max_roll_drift:.2f} - wings not level during loop") + if continuity_errors: + for err in continuity_errors[:3]: + print(f" {err}") + + env.close() + return all_ok + + +def test_obs_during_roll(): + """ + Full 360deg aileron roll - verify roll and horizon_visible observations. + + Purpose: Ensure roll observation smoothly transitions through +/-180deg without + discontinuity, and horizon_visible follows expected pattern. + + Expected behavior (scheme 2): + - Roll: 0 -> -1 (90deg right) -> +/-1 (inverted wrap) -> +1 (270deg) -> 0 + - horizon_visible: 1 -> 0 -> -1 -> 0 -> 1 + + The +/-180deg crossover is the critical test - if there's a wrap bug, + roll will jump from +1 to -1 instantly instead of smoothly transitioning. + """ + env = Dogfight(num_envs=1, obs_scheme=2, render_mode=get_render_mode(), render_fps=get_render_fps()) + env.reset() + + env.force_state( + player_pos=(0, 0, 1500), + player_vel=(100, 0, 0), + player_throttle=1.0, + opponent_pos=(500, 0, 1500), + opponent_vel=(100, 0, 0), + ) + + rolls = [] + horizons = [] + prev_obs = None + continuity_errors = [] + + # Roll at MAX_ROLL_RATE=3.0 rad/s = 172deg/s, so 360deg takes ~2.1 seconds = 105 steps + for step in range(120): # ~2.4 seconds for full 360deg with margin + action = np.array([[0.7, 0.0, 1.0, 0.0, 0.0]], dtype=np.float32) # Full right aileron + env.step(action) + obs = env.observations[0] + + # In scheme 2: roll is at index 3, horizon_visible at index 8 + rolls.append(obs[3]) + horizons.append(obs[8]) + + # Check continuity with higher tolerance for roll (can change faster) + passed, err = obs_continuity_check(obs, prev_obs, step, max_delta=0.4) + if not passed: + continuity_errors.append(err) + prev_obs = obs.copy() + + # Analysis + roll_min = min(rolls) + roll_max = max(rolls) + roll_range = roll_max - roll_min + horizon_min = min(horizons) + horizon_max = max(horizons) + + # Check for discontinuities specifically in roll (the main concern) + roll_jumps = [] + for i in range(1, len(rolls)): + delta = abs(rolls[i] - rolls[i-1]) + if delta > 0.5: # Large jump indicates wrap-around bug + roll_jumps.append((i, rolls[i-1], rolls[i], delta)) + + # Verify: + # 1. Roll covers most of range (near +/-1) + # 2. Horizon covers full range (1 to -1) + # 3. No sudden roll jumps (discontinuity) + + roll_ok = roll_range > 1.5 # Should span nearly [-1, 1] + horizon_ok = horizon_max > 0.8 and horizon_min < -0.8 + no_jumps = len(roll_jumps) == 0 + + all_ok = roll_ok and horizon_ok and no_jumps + RESULTS['obs_roll'] = all_ok + status = "OK" if all_ok else "CHECK" + + print(f"obs_roll: roll=[{roll_min:.2f},{roll_max:.2f}], horizon=[{horizon_min:.2f},{horizon_max:.2f}], jumps={len(roll_jumps)} [{status}]") + + if not roll_ok: + print(f" WARNING: Roll range {roll_range:.2f} < 1.5 - incomplete roll") + if not horizon_ok: + print(f" WARNING: Horizon didn't reach extremes") + if roll_jumps: + for step, prev, curr, delta in roll_jumps[:3]: + print(f" Roll discontinuity at step {step}: {prev:.2f} -> {curr:.2f} (delta={delta:.2f})") + + env.close() + return all_ok + + +def test_obs_vertical_pitch(): + """ + Vertical pitch (+/-90deg) gimbal lock detection test. + + Purpose: Detect gimbal lock behavior when pitch reaches +/-90deg where + the euler angle representation becomes singular. + + At pitch = +/-90deg: + - roll = atan2(2*(w*x + y*z), 1 - 2*(x^2 + y^2)) becomes undefined + - May cause roll to snap/oscillate wildly + + This documents the behavior rather than asserting specific values, + since gimbal lock is a known limitation of euler angles. + """ + env = Dogfight(num_envs=1, obs_scheme=0, render_mode=get_render_mode(), render_fps=get_render_fps()) + env.reset() + + # Test nose straight up (90deg pitch) + pitch_90 = np.radians(90) + qw = np.cos(pitch_90 / 2) + qy = -np.sin(pitch_90 / 2) # Negative for nose UP + + env.force_state( + player_pos=(0, 0, 1500), + player_vel=(100, 0, 0), + player_ori=(qw, 0, qy, 0), # Nose straight up + opponent_pos=(500, 0, 1500), + opponent_vel=(100, 0, 0), + ) + + # Step once to compute observations + action = np.array([[0.5, 0.0, 0.0, 0.0, 0.0]], dtype=np.float32) + env.step(action) + obs_up = env.observations[0].copy() + pitch_up = obs_up[4] + roll_up = obs_up[5] + + # Test nose straight down (-90deg pitch) + env.reset() + qw = np.cos(-pitch_90 / 2) + qy = -np.sin(-pitch_90 / 2) # Positive for nose DOWN + + env.force_state( + player_pos=(0, 0, 1500), + player_vel=(100, 0, 0), + player_ori=(qw, 0, qy, 0), # Nose straight down + opponent_pos=(500, 0, 1500), + opponent_vel=(100, 0, 0), + ) + + env.step(action) + obs_down = env.observations[0].copy() + pitch_down = obs_down[4] + roll_down = obs_down[5] + + # Check bounds and NaN + all_bounded = True + for obs in [obs_up, obs_down]: + for val in obs: + if np.isnan(val) or np.isinf(val) or val < -1.0 or val > 1.0: + all_bounded = False + + # Pitch should be near +/-0.5 (90deg/180deg = 0.5) + pitch_up_ok = abs(abs(pitch_up) - 0.5) < 0.15 + pitch_down_ok = abs(abs(pitch_down) - 0.5) < 0.15 + + RESULTS['obs_vertical'] = all_bounded + status = "OK" if all_bounded else "WARN" + + print(f"obs_vertical: up=(pitch={pitch_up:.3f}, roll={roll_up:.3f}), down=(pitch={pitch_down:.3f}, roll={roll_down:.3f}) [{status}]") + + if not pitch_up_ok: + print(f" NOTE: Pitch up {pitch_up:.3f} not near +/-0.5 (expected for 90deg pitch)") + if not pitch_down_ok: + print(f" NOTE: Pitch down {pitch_down:.3f} not near +/-0.5") + if not all_bounded: + print(f" WARNING: Observations out of bounds or NaN at vertical pitch") + if abs(roll_up) > 0.3 or abs(roll_down) > 0.3: + print(f" NOTE: Roll unstable at vertical pitch (gimbal lock region)") + + env.close() + return all_bounded + + +def test_obs_azimuth_crossover(): + """ + Target azimuth +/-180deg crossover test. + + Purpose: Verify azimuth doesn't jump discontinuously when target + crosses from behind-left to behind-right (through +/-180deg). + + Risk: Azimuth might jump from +1 to -1 instantly instead of transitioning + smoothly, causing RL agent to see huge observation delta. + + Test: Sweep opponent from right-behind through directly-behind to left-behind + and check for discontinuities. + """ + env = Dogfight(num_envs=1, obs_scheme=0, render_mode=get_render_mode(), render_fps=get_render_fps()) + action = np.array([[0.0, 0.0, 0.0, 0.0, 0.0]], dtype=np.float32) + + azimuths = [] + y_positions = [] + + # Sweep opponent from right-behind (y=-200) through left-behind (y=+200) + # This forces azimuth to cross through +/-180deg (behind the player) + for step in range(50): + env.reset() + y_offset = -200 + step * 8 # Sweep from y=-200 to y=+200 + + env.force_state( + player_pos=(0, 0, 1000), + player_vel=(100, 0, 0), + player_ori=(1, 0, 0, 0), # Identity - facing +X + opponent_pos=(-200, y_offset, 1000), # Behind player, sweeping Y + opponent_vel=(100, 0, 0), + ) + + env.step(action) + azimuths.append(env.observations[0][7]) + y_positions.append(y_offset) + + # Check for discontinuities + azimuth_jumps = [] + for i in range(1, len(azimuths)): + delta = abs(azimuths[i] - azimuths[i-1]) + if delta > 0.5: # Large jump = discontinuity + azimuth_jumps.append((i, y_positions[i], azimuths[i-1], azimuths[i], delta)) + + # Verify azimuth range covers +/-1 (behind = +/-180deg) + az_min = min(azimuths) + az_max = max(azimuths) + range_ok = az_max > 0.8 and az_min < -0.8 + + # Discontinuity at +/-180deg crossover is EXPECTED for atan2-based azimuth + # This test documents the behavior - a discontinuity here is not necessarily + # a bug, but agents should be aware of it + has_discontinuity = len(azimuth_jumps) > 0 + + RESULTS['obs_azimuth_cross'] = range_ok + status = "OK" if range_ok else "CHECK" + + print(f"obs_az_cross: range=[{az_min:.2f},{az_max:.2f}], discontinuities={len(azimuth_jumps)} [{status}]") + + if has_discontinuity: + print(f" NOTE: Azimuth has discontinuity at +/-180deg (expected for atan2)") + for _, y_pos, prev_az, curr_az, delta in azimuth_jumps[:2]: + print(f" At y={y_pos:.0f}: azimuth {prev_az:.2f} -> {curr_az:.2f} (delta={delta:.2f})") + print(f" Consider: Use sin/cos encoding to avoid wrap-around for RL") + + if not range_ok: + print(f" WARNING: Azimuth didn't reach +/-1 (behind player)") + + env.close() + return range_ok + + +def test_obs_yaw_wrap(): + """ + Yaw observation +/-180deg wrap test. + + Purpose: Verify yaw observation behavior when heading crosses +/-180deg. + Tests CONTINUOUS heading transition across the wrap boundary. + + The critical test: sweep from +170deg to -170deg (crossing +180deg/-180deg). + If yaw wraps, we'll see a jump from ~+1 to ~-1. + + For RL, yaw wrap at +/-180deg is less problematic than roll wrap because: + - Normal flight rarely involves facing directly backwards + - Roll wrap happens during inverted flight (loops, barrel rolls) + """ + env = Dogfight(num_envs=1, obs_scheme=0, render_mode=get_render_mode(), render_fps=get_render_fps()) + action = np.array([[0.0, 0.0, 0.0, 0.0, 0.0]], dtype=np.float32) + + yaws = [] + headings = [] + + # Test 1: Sweep ACROSS the +/-180deg boundary (170deg to 190deg = -170deg) + # This is the critical test - continuous transition through the wrap point + for heading_deg in range(170, 195, 2): # 170deg to 194deg in 2deg steps + env.reset() + + # Normalize to [-180, 180] range for quaternion + h = heading_deg if heading_deg <= 180 else heading_deg - 360 + heading_rad = np.radians(h) + qw = np.cos(heading_rad / 2) + qz = np.sin(heading_rad / 2) + + vx = 100 * np.cos(heading_rad) + vy = -100 * np.sin(heading_rad) + + env.force_state( + player_pos=(0, 0, 1500), + player_vel=(vx, vy, 0), + player_ori=(qw, 0, 0, qz), + opponent_pos=(500, 0, 1500), + opponent_vel=(100, 0, 0), + ) + + env.step(action) + obs = env.observations[0] + + yaws.append(obs[6]) + headings.append(heading_deg) + + # Check for discontinuities at the +/-180deg crossing + yaw_jumps = [] + for i in range(1, len(yaws)): + delta = abs(yaws[i] - yaws[i-1]) + if delta > 0.3: # 2deg step should give ~0.022 change, 0.3 is a big jump + yaw_jumps.append((headings[i-1], headings[i], yaws[i-1], yaws[i], delta)) + + yaw_min = min(yaws) + yaw_max = max(yaws) + + # Also do a full range check + full_range_yaws = [] + for heading_deg in range(-180, 185, 30): + env.reset() + heading_rad = np.radians(heading_deg) + qw = np.cos(heading_rad / 2) + qz = np.sin(heading_rad / 2) + vx = 100 * np.cos(heading_rad) + vy = -100 * np.sin(heading_rad) + + env.force_state( + player_pos=(0, 0, 1500), + player_vel=(vx, vy, 0), + player_ori=(qw, 0, 0, qz), + opponent_pos=(500, 0, 1500), + opponent_vel=(100, 0, 0), + ) + env.step(action) + full_range_yaws.append(env.observations[0][6]) + + full_min = min(full_range_yaws) + full_max = max(full_range_yaws) + full_range = full_max - full_min + + has_wrap = len(yaw_jumps) > 0 + range_ok = full_range > 1.5 + + RESULTS['obs_yaw_wrap'] = range_ok + status = "OK" if range_ok else "CHECK" + + print(f"obs_yaw_wrap: full_range=[{full_min:.2f},{full_max:.2f}], crossover_jumps={len(yaw_jumps)} [{status}]") + + if has_wrap: + print(f" WRAP DETECTED at +/-180deg heading:") + for h1, h2, y1, y2, delta in yaw_jumps[:2]: + print(f" heading {h1}deg->{h2}deg: yaw {y1:.2f} -> {y2:.2f} (delta={delta:.2f})") + print(f" Consider: Use sin/cos encoding for yaw to avoid wrap") + else: + print(f" No discontinuity at +/-180deg crossing (yaw: {yaw_min:.2f} to {yaw_max:.2f})") + + env.close() + return range_ok + + +def test_obs_elevation_extremes(): + """ + Elevation observation at +/-90deg (target directly above/below). + + Purpose: Verify elevation doesn't have singularity when target is + directly above or below player. Elevation uses asin which is bounded + by definition, so this should be stable. + + Test: Place target directly above and below player, verify elevation + is correct and bounded. + """ + env = Dogfight(num_envs=1, obs_scheme=0, render_mode=get_render_mode(), render_fps=get_render_fps()) + action = np.array([[0.0, 0.0, 0.0, 0.0, 0.0]], dtype=np.float32) + + # Target directly above (500m up) + env.reset() + env.force_state( + player_pos=(0, 0, 1000), + player_vel=(100, 0, 0), + player_ori=(1, 0, 0, 0), + opponent_pos=(0, 0, 1500), # Directly above + opponent_vel=(100, 0, 0), + ) + env.step(action) + elev_above = env.observations[0][8] + + # Target directly below (500m down) + env.reset() + env.force_state( + player_pos=(0, 0, 1000), + player_vel=(100, 0, 0), + player_ori=(1, 0, 0, 0), + opponent_pos=(0, 0, 500), # Directly below + opponent_vel=(100, 0, 0), + ) + env.step(action) + elev_below = env.observations[0][8] + + # Target at extreme angle (nearly overhead, slightly forward) + env.reset() + env.force_state( + player_pos=(0, 0, 1000), + player_vel=(100, 0, 0), + player_ori=(1, 0, 0, 0), + opponent_pos=(10, 0, 1500), # Slightly forward, mostly above + opponent_vel=(100, 0, 0), + ) + env.step(action) + elev_steep_up = env.observations[0][8] + + # Verify values + all_bounded = True + for val in [elev_above, elev_below, elev_steep_up]: + if np.isnan(val) or np.isinf(val) or val < -1.0 or val > 1.0: + all_bounded = False + + # Target above should have positive elevation (close to +1) + above_ok = elev_above > 0.8 + # Target below should have negative elevation (close to -1) + below_ok = elev_below < -0.8 + # Steep up should be very high + steep_ok = elev_steep_up > 0.9 + + all_ok = all_bounded and above_ok and below_ok and steep_ok + RESULTS['obs_elevation_extremes'] = all_ok + status = "OK" if all_ok else "CHECK" + + print(f"obs_elev_ext: above={elev_above:.3f}, below={elev_below:.3f}, steep={elev_steep_up:.3f} [{status}]") + + if not above_ok: + print(f" WARNING: Target above should have elev >0.8, got {elev_above:.3f}") + if not below_ok: + print(f" WARNING: Target below should have elev <-0.8, got {elev_below:.3f}") + if not all_bounded: + print(f" WARNING: Elevation out of bounds or NaN at extreme angles") + + env.close() + return all_ok + + +def test_obs_complex_maneuver(): + """ + Complex maneuver (barrel roll) - simultaneous pitch, roll, yaw changes. + + Purpose: Verify all observations stay bounded and continuous during + complex combined rotations that exercise multiple rotation axes. + + This tests edge cases that might not appear in single-axis tests. + """ + env = Dogfight(num_envs=1, obs_scheme=0, render_mode=get_render_mode(), render_fps=get_render_fps()) + env.reset() + + env.force_state( + player_pos=(0, 0, 1500), + player_vel=(120, 0, 0), + player_throttle=1.0, + opponent_pos=(500, 0, 1500), + opponent_vel=(100, 0, 0), + ) + + prev_obs = None + continuity_errors = [] + bound_errors = [] + + for step in range(200): # ~4 seconds of complex maneuver + # Barrel roll: pull + roll (creates helical path) + action = np.array([[0.8, -0.3, 0.8, 0.2, 0.0]], dtype=np.float32) + env.step(action) + obs = env.observations[0] + + # Check bounds + for i, val in enumerate(obs): + if np.isnan(val) or np.isinf(val): + bound_errors.append(f"NaN/Inf at step {step}, obs[{i}]={val}") + elif val < -1.0 or val > 1.0: + bound_errors.append(f"Out of bounds at step {step}, obs[{i}]={val:.3f}") + + # Check continuity (higher tolerance for complex maneuver) + passed, err = obs_continuity_check(obs, prev_obs, step, max_delta=0.5) + if not passed: + continuity_errors.append(err) + prev_obs = obs.copy() + + # Check termination + state = env.get_state() + if state['pz'] < 200: + break + + bounds_ok = len(bound_errors) == 0 + continuity_ok = len(continuity_errors) <= 5 # Allow some discontinuities at wrap points + + all_ok = bounds_ok and continuity_ok + RESULTS['obs_complex'] = all_ok + status = "OK" if all_ok else "CHECK" + + print(f"obs_complex: bound_errors={len(bound_errors)}, continuity_errors={len(continuity_errors)} [{status}]") + + if bound_errors: + for err in bound_errors[:3]: + print(f" {err}") + if continuity_errors: + print(f" NOTE: {len(continuity_errors)} continuity errors (wrap points expected)") + for err in continuity_errors[:3]: + print(f" {err}") + + env.close() + return all_ok + + +def test_quaternion_normalization(): + """ + Quaternion normalization drift test. + + Purpose: Verify quaternion stays normalized (magnitude ~1.0) during + extended flight with various maneuvers. Floating point accumulation + could cause drift from unit quaternion over time. + + Non-unit quaternion -> incorrect euler angles -> bad observations. + """ + env = Dogfight(num_envs=1, obs_scheme=0, render_mode=get_render_mode(), render_fps=get_render_fps()) + env.reset() + + env.force_state( + player_pos=(0, 0, 1500), + player_vel=(100, 0, 0), + player_throttle=1.0, + opponent_pos=(500, 0, 1500), + opponent_vel=(100, 0, 0), + ) + + quat_mags = [] + + for step in range(500): # ~10 seconds of varied maneuvers + # Varied maneuvers to stress quaternion integration + t = step * 0.02 # Time in seconds + aileron = 0.5 * np.sin(t * 2.0) # Rolling + elevator = 0.3 * np.cos(t * 1.5) # Pitching + rudder = 0.2 * np.sin(t * 0.8) # Yawing + + action = np.array([[0.7, elevator, aileron, rudder, 0.0]], dtype=np.float32) + env.step(action) + + state = env.get_state() + qw, qx, qy, qz = state['ow'], state['ox'], state['oy'], state['oz'] + mag = np.sqrt(qw**2 + qx**2 + qy**2 + qz**2) + quat_mags.append(mag) + + # Safety check - don't let plane crash + if state['pz'] < 200: + break + + # Calculate drift statistics + max_drift = max(abs(m - 1.0) for m in quat_mags) + mean_drift = np.mean([abs(m - 1.0) for m in quat_mags]) + final_mag = quat_mags[-1] if quat_mags else 1.0 + + # Quaternion should stay very close to unit length + drift_ok = max_drift < 0.01 # Allow 1% drift + + RESULTS['quat_norm'] = drift_ok + status = "OK" if drift_ok else "WARN" + + print(f"quat_norm: max_drift={max_drift:.6f}, mean_drift={mean_drift:.6f}, final_mag={final_mag:.6f} [{status}]") + + if not drift_ok: + print(f" WARNING: Quaternion drift {max_drift:.6f} > 0.01 - may cause euler angle errors") + print(f" Consider: Normalize quaternion after integration in C code") + + env.close() + return drift_ok + + +# Test registry for this module +TESTS = { + 'obs_during_loop': test_obs_during_loop, + 'obs_during_roll': test_obs_during_roll, + 'obs_vertical_pitch': test_obs_vertical_pitch, + 'obs_azimuth_crossover': test_obs_azimuth_crossover, + 'obs_yaw_wrap': test_obs_yaw_wrap, + 'obs_elevation_extremes': test_obs_elevation_extremes, + 'obs_complex_maneuver': test_obs_complex_maneuver, + 'quat_normalization': test_quaternion_normalization, +} + + +if __name__ == "__main__": + from test_flight_base import get_args + args = get_args() + + print("Dynamic Observation Tests") + print("=" * 60) + + if args.test: + if args.test in TESTS: + print(f"Running single test: {args.test}") + if get_render_mode(): + print("Rendering enabled - press ESC to exit") + print("=" * 60) + TESTS[args.test]() + else: + print(f"Unknown test: {args.test}") + print(f"Available tests: {', '.join(TESTS.keys())}") + else: + print("Running all dynamic observation tests") + if get_render_mode(): + print("Rendering enabled - press ESC to exit") + print("=" * 60) + for test_func in TESTS.values(): + test_func() diff --git a/pufferlib/ocean/dogfight/test_flight_obs_pursuit.py b/pufferlib/ocean/dogfight/test_flight_obs_pursuit.py new file mode 100644 index 000000000..6b9a8b519 --- /dev/null +++ b/pufferlib/ocean/dogfight/test_flight_obs_pursuit.py @@ -0,0 +1,529 @@ +""" +OBS_PURSUIT (scheme 1) specific tests for dogfight environment. +Tests energy observations, target aspect, closure rate, and wrap behavior. + +Observation layout for OBS_PURSUIT (13 observations): + 0: speed - clamp(speed/250, 0, 1) [0, 1] + 1: potential - alt/3000 [0, 1] + 2: pitch - pitch / (PI/2) [-1, 1] + 3: roll - roll / PI [-1, 1] **WRAPS** + 4: own_energy - (potential + kinetic) / 2 [0, 1] + 5: target_az - target_az / PI [-1, 1] **WRAPS** + 6: target_el - target_el / (PI/2) [-1, 1] + 7: dist - clamp(dist/500, 0, 2) - 1 [-1, 1] + 8: closure - clamp(closure/250, -1, 1) [-1, 1] + 9: target_roll - target_roll / PI [-1, 1] **WRAPS** + 10: target_pitch - target_pitch / (PI/2) [-1, 1] + 11: target_aspect- dot(opp_fwd, to_player) [-1, 1] + 12: energy_adv - clamp(own_E - opp_E, -1, 1) [-1, 1] + +Run: python pufferlib/ocean/dogfight/test_flight_obs_pursuit.py --test obs_pursuit_bounds +""" +import numpy as np +from dogfight import Dogfight + +from test_flight_base import ( + get_render_mode, get_render_fps, + RESULTS, +) + + +def test_obs_pursuit_bounds(): + """ + Run random maneuvers in OBS_PURSUIT (scheme 1) and verify all observations + stay in valid ranges. This catches NaN/Inf/out-of-bounds issues. + + OBS_PURSUIT has 13 observations with specific bounds: + - Indices 0, 1, 4: [0, 1] (speed, potential, own_energy) + - All others: [-1, 1] + """ + env = Dogfight(num_envs=1, obs_scheme=1, render_mode=get_render_mode(), render_fps=get_render_fps()) + env.reset() + + violations = [] + np.random.seed(42) # Reproducible + + for step in range(500): + # Random maneuvers + throttle = np.random.uniform(0.3, 1.0) + elevator = np.random.uniform(-0.5, 0.5) + aileron = np.random.uniform(-0.8, 0.8) + rudder = np.random.uniform(-0.3, 0.3) + action = np.array([[throttle, elevator, aileron, rudder, 0.0]], dtype=np.float32) + + _, _, term, _, _ = env.step(action) + obs = env.observations[0] + + for i, val in enumerate(obs): + if np.isnan(val) or np.isinf(val): + violations.append(f"NaN/Inf at step {step}, obs[{i}]") + # Indices 0, 1, 4 are [0, 1], rest are [-1, 1] + if i in [0, 1, 4]: # speed, potential, energy are [0, 1] + if val < -0.01 or val > 1.01: + violations.append(f"obs[{i}]={val:.3f} out of [0,1] at step {step}") + else: + if val < -1.01 or val > 1.01: + violations.append(f"obs[{i}]={val:.3f} out of [-1,1] at step {step}") + + if term[0]: + env.reset() + + passed = len(violations) == 0 + RESULTS['obs_pursuit_bounds'] = passed + status = "OK" if passed else "FAIL" + print(f"obs_pursuit_bounds: 500 steps, violations={len(violations)} [{status}]") + if violations: + for v in violations[:5]: + print(f" {v}") + env.close() + return passed + + +def test_obs_pursuit_energy_conservation(): + """ + Vertical climb: watch kinetic -> potential energy conversion. + + Physics: In ideal climb (no drag): E = mgh + 0.5mv^2 = constant + At v=100 m/s, h_max = v^2/(2g) = 509.7m (drag-free) + With drag, actual h_max < 509.7m + + Energy observation (obs[4]) should decrease slightly due to drag, + but not increase significantly (conservation violation). + """ + env = Dogfight(num_envs=1, obs_scheme=1, render_mode=get_render_mode(), render_fps=get_render_fps()) + env.reset() + + # 90deg pitch, 100 m/s, low throttle + pitch_90 = np.radians(90) + qw = np.cos(pitch_90 / 2) + qy = -np.sin(pitch_90 / 2) # Negative for nose UP + + env.force_state( + player_pos=(0, 0, 1000), + player_vel=(0, 0, 100), # 100 m/s vertical velocity + player_ori=(qw, 0, qy, 0), # Nose straight up + player_throttle=0.1, # Minimal throttle + opponent_pos=(500, 0, 1000), + opponent_vel=(100, 0, 0), + ) + + data = [] + for step in range(200): # ~4 seconds + action = np.array([[0.1, 0.0, 0.0, 0.0, 0.0]], dtype=np.float32) # Minimal throttle + env.step(action) + obs = env.observations[0] + state = env.get_state() + + data.append({ + 'step': step, + 'vz': state['vz'], + 'alt': state['pz'], + 'speed_obs': obs[0], + 'potential_obs': obs[1], + 'own_energy': obs[4], + }) + + # Stop when vertical velocity near zero (apex) + if state['vz'] < 5: + break + + # Analysis + initial_energy = data[0]['own_energy'] + final_energy = data[-1]['own_energy'] + alt_gained = data[-1]['alt'] - data[0]['alt'] + + # Energy should not INCREASE significantly (conservation violation) + # Allow 5% tolerance for thrust contribution at low throttle + energy_increase = final_energy > initial_energy + 0.05 + + # Altitude gain should be reasonable (with drag losses) + # Ideal: 509.7m, expect ~300-550m with drag + alt_reasonable = 200 < alt_gained < 600 + + passed = not energy_increase and alt_reasonable + RESULTS['obs_pursuit_energy_climb'] = passed + status = "OK" if passed else "CHECK" + + print(f"obs_pursuit_energy_climb: E: {initial_energy:.3f}->{final_energy:.3f}, alt_gain={alt_gained:.0f}m [{status}]") + if energy_increase: + print(f" WARNING: Energy increased {final_energy - initial_energy:.3f} (conservation violation?)") + if not alt_reasonable: + print(f" WARNING: Alt gain {alt_gained:.0f}m outside expected 200-600m") + + env.close() + return passed + + +def test_obs_pursuit_energy_dive(): + """ + Dive: watch potential -> kinetic energy conversion. + + Start high (2500m), pitch down, let gravity accelerate. + Energy should be relatively stable (gravity -> speed, drag -> loss). + """ + env = Dogfight(num_envs=1, obs_scheme=1, render_mode=get_render_mode(), render_fps=get_render_fps()) + env.reset() + + # Start high, pitch down 45deg + pitch_down = np.radians(-45) + qw = np.cos(pitch_down / 2) + qy = -np.sin(pitch_down / 2) + + env.force_state( + player_pos=(0, 0, 2500), + player_vel=(50, 0, 0), + player_ori=(qw, 0, qy, 0), + player_throttle=0.0, # Idle + opponent_pos=(500, 0, 2500), + opponent_vel=(100, 0, 0), + ) + + data = [] + for step in range(200): + action = np.array([[0.0, 0.0, 0.0, 0.0, 0.0]], dtype=np.float32) # Idle, let gravity work + _, _, term, _, _ = env.step(action) + obs = env.observations[0] + state = env.get_state() + + speed = np.sqrt(state['vx']**2 + state['vy']**2 + state['vz']**2) + data.append({ + 'step': step, + 'speed': speed, + 'alt': state['pz'], + 'speed_obs': obs[0], + 'potential_obs': obs[1], + 'own_energy': obs[4], + }) + + if state['pz'] < 800 or term[0]: # Stop at 800m or termination + break + + initial_energy = data[0]['own_energy'] + final_energy = data[-1]['own_energy'] + speed_gained = data[-1]['speed'] - data[0]['speed'] + alt_lost = data[0]['alt'] - data[-1]['alt'] + + # Energy should decrease slightly (drag) but not increase + energy_increase = final_energy > initial_energy + 0.05 + # Speed should increase (gravity) + speed_gain_ok = speed_gained > 20 + + passed = not energy_increase and speed_gain_ok + RESULTS['obs_pursuit_energy_dive'] = passed + status = "OK" if passed else "CHECK" + + print(f"obs_pursuit_energy_dive: E: {initial_energy:.3f}->{final_energy:.3f}, speed_gain={speed_gained:.0f}m/s, alt_loss={alt_lost:.0f}m [{status}]") + if energy_increase: + print(f" WARNING: Energy increased during unpowered dive") + + env.close() + return passed + + +def test_obs_pursuit_energy_advantage(): + """ + Test energy advantage observation (obs[12]) with different altitude/speed configs. + + Energy advantage = own_energy - opponent_energy, clamped to [-1, 1] + - Higher/faster player should have positive advantage + - Lower/slower player should have negative advantage + - Equal state should have ~0 advantage + """ + env = Dogfight(num_envs=1, obs_scheme=1, render_mode=get_render_mode(), render_fps=get_render_fps()) + action = np.array([[0.0, 0.0, 0.0, 0.0, 0.0]], dtype=np.float32) + + # Case 1: Player higher, same speed -> positive advantage + env.reset() + env.force_state( + player_pos=(0, 0, 2000), player_vel=(100, 0, 0), + opponent_pos=(500, 0, 1000), opponent_vel=(100, 0, 0), + ) + env.step(action) + adv_high = env.observations[0][12] + + # Case 2: Player lower, same speed -> negative advantage + env.reset() + env.force_state( + player_pos=(0, 0, 1000), player_vel=(100, 0, 0), + opponent_pos=(500, 0, 2000), opponent_vel=(100, 0, 0), + ) + env.step(action) + adv_low = env.observations[0][12] + + # Case 3: Same altitude, player faster -> positive advantage + env.reset() + env.force_state( + player_pos=(0, 0, 1500), player_vel=(150, 0, 0), + opponent_pos=(500, 0, 1500), opponent_vel=(80, 0, 0), + ) + env.step(action) + adv_fast = env.observations[0][12] + + # Case 4: Equal state -> zero advantage + env.reset() + env.force_state( + player_pos=(0, 0, 1500), player_vel=(100, 0, 0), + opponent_pos=(500, 0, 1500), opponent_vel=(100, 0, 0), + ) + env.step(action) + adv_equal = env.observations[0][12] + + # Verify + high_ok = adv_high > 0.1 + low_ok = adv_low < -0.1 + fast_ok = adv_fast > 0.0 + equal_ok = abs(adv_equal) < 0.05 + + passed = high_ok and low_ok and fast_ok and equal_ok + RESULTS['obs_pursuit_energy_adv'] = passed + status = "OK" if passed else "FAIL" + + print(f"obs_pursuit_energy_adv: high={adv_high:.3f}, low={adv_low:.3f}, fast={adv_fast:.3f}, equal={adv_equal:.3f} [{status}]") + if not high_ok: + print(f" FAIL: Higher player should have positive advantage, got {adv_high:.3f}") + if not low_ok: + print(f" FAIL: Lower player should have negative advantage, got {adv_low:.3f}") + if not equal_ok: + print(f" FAIL: Equal state should have ~0 advantage, got {adv_equal:.3f}") + + env.close() + return passed + + +def test_obs_pursuit_target_aspect(): + """ + Test target aspect observation (obs[11]). + + target_aspect = dot(opponent_forward, to_player) + - Head-on (opponent facing us): ~+1.0 + - Tail (opponent facing away): ~-1.0 + - Beam (perpendicular): ~0.0 + + IMPORTANT: Must set opponent_ori to match opponent_vel, otherwise + physics step will severely alter velocity (flying "backward" is not stable). + """ + env = Dogfight(num_envs=1, obs_scheme=1, render_mode=get_render_mode(), render_fps=get_render_fps()) + action = np.array([[0.5, 0.0, 0.0, 0.0, 0.0]], dtype=np.float32) # Some throttle + + # Head-on: opponent facing toward player (yaw=180deg = facing -X) + # Quaternion for yaw=180deg: qw=0, qz=1 + env.reset() + env.force_state( + player_pos=(0, 0, 1500), player_vel=(100, 0, 0), + opponent_pos=(500, 0, 1500), opponent_vel=(-100, 0, 0), + opponent_ori=(0, 0, 0, 1), # Yaw=180deg = facing -X (toward player) + ) + env.step(action) + aspect_head_on = env.observations[0][11] + + # Tail: opponent facing away from player (identity = facing +X) + env.reset() + env.force_state( + player_pos=(0, 0, 1500), player_vel=(100, 0, 0), + opponent_pos=(500, 0, 1500), opponent_vel=(100, 0, 0), + opponent_ori=(1, 0, 0, 0), # Identity = facing +X (away from player) + ) + env.step(action) + aspect_tail = env.observations[0][11] + + # Beam: opponent perpendicular (yaw=-90deg = facing +Y) + # Quaternion for yaw=-90deg: qw=cos(-45deg)~0.707, qz=sin(-45deg)~-0.707 + cos45 = np.cos(np.radians(-45)) + sin45 = np.sin(np.radians(-45)) + env.reset() + env.force_state( + player_pos=(0, 0, 1500), player_vel=(100, 0, 0), + opponent_pos=(500, 0, 1500), opponent_vel=(0, 100, 0), + opponent_ori=(cos45, 0, 0, sin45), # Yaw=-90deg = facing +Y + ) + env.step(action) + aspect_beam = env.observations[0][11] + + # Verify + head_on_ok = aspect_head_on > 0.85 # Near +1 + tail_ok = aspect_tail < -0.85 # Near -1 + beam_ok = abs(aspect_beam) < 0.3 # Near 0 + + passed = head_on_ok and tail_ok and beam_ok + RESULTS['obs_pursuit_aspect'] = passed + status = "OK" if passed else "FAIL" + + print(f"obs_pursuit_aspect: head_on={aspect_head_on:.3f}, tail={aspect_tail:.3f}, beam={aspect_beam:.3f} [{status}]") + if not head_on_ok: + print(f" FAIL: Head-on should be >0.85, got {aspect_head_on:.3f}") + if not tail_ok: + print(f" FAIL: Tail should be <-0.85, got {aspect_tail:.3f}") + if not beam_ok: + print(f" FAIL: Beam should be near 0, got {aspect_beam:.3f}") + + env.close() + return passed + + +def test_obs_pursuit_closure_rate(): + """ + Test closure rate observation (obs[8]). + + closure = dot(relative_vel, normalized_to_target) + - Closing (getting closer): positive + - Separating (getting farther): negative + - Head-on (both approaching): high positive + + IMPORTANT: Must set opponent_ori to match opponent_vel to avoid + physics instability (flying backward causes extreme drag). + """ + env = Dogfight(num_envs=1, obs_scheme=1, render_mode=get_render_mode(), render_fps=get_render_fps()) + action = np.array([[0.5, 0.0, 0.0, 0.0, 0.0]], dtype=np.float32) # Some throttle + + # Closing: player faster toward target (chasing) + # Both facing +X (default orientation) + env.reset() + env.force_state( + player_pos=(0, 0, 1500), player_vel=(150, 0, 0), + opponent_pos=(500, 0, 1500), opponent_vel=(50, 0, 0), + opponent_ori=(1, 0, 0, 0), # Facing +X (same as velocity) + ) + env.step(action) + closure_closing = env.observations[0][8] + + # Separating: target running away faster + env.reset() + env.force_state( + player_pos=(0, 0, 1500), player_vel=(80, 0, 0), + opponent_pos=(500, 0, 1500), opponent_vel=(150, 0, 0), + opponent_ori=(1, 0, 0, 0), # Facing +X + ) + env.step(action) + closure_separating = env.observations[0][8] + + # Head-on: both approaching each other + # Opponent facing -X (toward player): yaw=180deg -> qw=0, qz=1 + env.reset() + env.force_state( + player_pos=(0, 0, 1500), player_vel=(100, 0, 0), + opponent_pos=(500, 0, 1500), opponent_vel=(-100, 0, 0), + opponent_ori=(0, 0, 0, 1), # Yaw=180deg = facing -X + ) + env.step(action) + closure_head_on = env.observations[0][8] + + # Verify + closing_ok = closure_closing > 0.3 + separating_ok = closure_separating < -0.2 + head_on_ok = closure_head_on > 0.7 + + passed = closing_ok and separating_ok and head_on_ok + RESULTS['obs_pursuit_closure'] = passed + status = "OK" if passed else "FAIL" + + print(f"obs_pursuit_closure: closing={closure_closing:.3f}, separating={closure_separating:.3f}, head_on={closure_head_on:.3f} [{status}]") + if not closing_ok: + print(f" FAIL: Closing rate should be >0.3, got {closure_closing:.3f}") + if not separating_ok: + print(f" FAIL: Separating rate should be <-0.2, got {closure_separating:.3f}") + if not head_on_ok: + print(f" FAIL: Head-on closure should be >0.7, got {closure_head_on:.3f}") + + env.close() + return passed + + +def test_obs_pursuit_target_angles_wrap(): + """ + Check target_az (obs[5]) and target_roll (obs[9]) for wrap discontinuities. + + Sweep target position around player (behind the player through +/-180deg) + and check for large discontinuities in target_az. + """ + env = Dogfight(num_envs=1, obs_scheme=1, render_mode=get_render_mode(), render_fps=get_render_fps()) + action = np.array([[0.0, 0.0, 0.0, 0.0, 0.0]], dtype=np.float32) + + target_azs = [] + y_positions = [] + + # Sweep opponent from right-behind (y=-200) through left-behind (y=+200) + for step in range(50): + env.reset() + y_offset = -200 + step * 8 # Sweep from y=-200 to y=+200 + + env.force_state( + player_pos=(0, 0, 1500), + player_vel=(100, 0, 0), + player_ori=(1, 0, 0, 0), # Identity - facing +X + opponent_pos=(-200, y_offset, 1500), # Behind player, sweeping Y + opponent_vel=(100, 0, 0), + ) + + env.step(action) + target_azs.append(env.observations[0][5]) + y_positions.append(y_offset) + + # Check for discontinuities + az_jumps = [] + for i in range(1, len(target_azs)): + delta = abs(target_azs[i] - target_azs[i-1]) + if delta > 0.5: # Large jump = discontinuity + az_jumps.append((i, y_positions[i], target_azs[i-1], target_azs[i], delta)) + + # Verify azimuth range covers near +/-1 (behind = +/-180deg) + az_min = min(target_azs) + az_max = max(target_azs) + range_ok = az_max > 0.8 and az_min < -0.8 + + # Discontinuity at +/-180deg crossover is EXPECTED for atan2-based azimuth + has_discontinuity = len(az_jumps) > 0 + + RESULTS['obs_pursuit_az_wrap'] = range_ok + status = "OK" if range_ok else "CHECK" + + print(f"obs_pursuit_az_wrap: range=[{az_min:.2f},{az_max:.2f}], discontinuities={len(az_jumps)} [{status}]") + + if has_discontinuity: + print(f" NOTE: target_az has discontinuity at +/-180deg (expected for atan2)") + for _, y_pos, prev_az, curr_az, delta in az_jumps[:2]: + print(f" At y={y_pos:.0f}: az {prev_az:.2f} -> {curr_az:.2f} (delta={delta:.2f})") + print(f" Consider: Use sin/cos encoding for RL training") + + if not range_ok: + print(f" WARNING: target_az didn't reach +/-1 (behind player)") + + env.close() + return range_ok + + +# Test registry for this module +TESTS = { + 'obs_pursuit_bounds': test_obs_pursuit_bounds, + 'obs_pursuit_energy_climb': test_obs_pursuit_energy_conservation, + 'obs_pursuit_energy_dive': test_obs_pursuit_energy_dive, + 'obs_pursuit_energy_adv': test_obs_pursuit_energy_advantage, + 'obs_pursuit_aspect': test_obs_pursuit_target_aspect, + 'obs_pursuit_closure': test_obs_pursuit_closure_rate, + 'obs_pursuit_az_wrap': test_obs_pursuit_target_angles_wrap, +} + + +if __name__ == "__main__": + from test_flight_base import get_args + args = get_args() + + print("OBS_PURSUIT (Scheme 1) Tests") + print("=" * 60) + + if args.test: + if args.test in TESTS: + print(f"Running single test: {args.test}") + if get_render_mode(): + print("Rendering enabled - press ESC to exit") + print("=" * 60) + TESTS[args.test]() + else: + print(f"Unknown test: {args.test}") + print(f"Available tests: {', '.join(TESTS.keys())}") + else: + print("Running all OBS_PURSUIT tests") + if get_render_mode(): + print("Rendering enabled - press ESC to exit") + print("=" * 60) + for test_func in TESTS.values(): + test_func() diff --git a/pufferlib/ocean/dogfight/test_flight_obs_static.py b/pufferlib/ocean/dogfight/test_flight_obs_static.py new file mode 100644 index 000000000..c1b169590 --- /dev/null +++ b/pufferlib/ocean/dogfight/test_flight_obs_static.py @@ -0,0 +1,342 @@ +""" +Static observation scheme tests for dogfight environment. +Tests observation bounds, dimensions, and values at specific orientations. + +Run: python pufferlib/ocean/dogfight/test_flight_obs_static.py --test obs_bounds +""" +import numpy as np +from dogfight import Dogfight, OBS_SIZES + +from test_flight_base import ( + get_render_mode, get_render_fps, + RESULTS, OBS_ATOL, OBS_RTOL, +) + + +def obs_assert_close(actual, expected, name, atol=OBS_ATOL, rtol=OBS_RTOL): + """Assert two values are close, with descriptive error.""" + if np.isclose(actual, expected, atol=atol, rtol=rtol): + return True + else: + print(f" {name}: {actual:.4f} != {expected:.4f} [FAIL]") + return False + + +def obs_continuity_check(obs, prev_obs, step, max_delta=0.3): + """ + Check observation continuity and bounds during dynamic flight. + + Returns tuple: (passed, error_msg) + - All obs should be in [-1, 1] (proper bounds for NN input) + - No NaN/Inf values + - No sudden jumps > max_delta between timesteps (discontinuity detection) + + Args: + obs: Current observation array + prev_obs: Previous observation array (or None for first step) + step: Current timestep (for error messages) + max_delta: Maximum allowed change per timestep (default 0.3) + + Returns: + (passed: bool, error_msg: str or None) + """ + # Check for NaN/Inf + if np.any(np.isnan(obs)): + nan_indices = np.where(np.isnan(obs))[0] + return False, f"NaN at step {step}, indices: {nan_indices}" + + if np.any(np.isinf(obs)): + inf_indices = np.where(np.isinf(obs))[0] + return False, f"Inf at step {step}, indices: {inf_indices}" + + # Check bounds [-1, 1] + for i, val in enumerate(obs): + if val < -1.0 or val > 1.0: + return False, f"Obs[{i}]={val:.3f} out of bounds [-1,1] at step {step}" + + # Check continuity (no sudden jumps) + if prev_obs is not None: + for i in range(len(obs)): + delta = abs(obs[i] - prev_obs[i]) + if delta > max_delta: + return False, f"Discontinuity at step {step}: obs[{i}] jumped {prev_obs[i]:.3f} -> {obs[i]:.3f} (delta={delta:.3f})" + + return True, None + + +def test_obs_scheme_dimensions(): + """Verify all obs schemes have correct dimensions.""" + all_passed = True + for scheme, expected_size in OBS_SIZES.items(): + env = Dogfight(num_envs=1, obs_scheme=scheme, render_mode=get_render_mode(), render_fps=get_render_fps()) + env.reset() + obs = env.observations[0] + actual = len(obs) + passed = actual == expected_size + all_passed &= passed + status = "OK" if passed else "FAIL" + print(f"obs_dim_{scheme}: {actual} obs (expected {expected_size}) [{status}]") + env.close() + RESULTS['obs_dimensions'] = all_passed + return all_passed + + +def test_obs_identity_orientation(): + """ + Test identity orientation: player at origin, target ahead. + Expect: pitch=0, roll=0, yaw=0, azimuth=0, elevation=0 + """ + env = Dogfight(num_envs=1, obs_scheme=0, render_mode=get_render_mode(), render_fps=get_render_fps()) + env.reset() + + env.force_state( + player_pos=(0, 0, 1000), + player_vel=(100, 0, 0), + player_ori=(1, 0, 0, 0), # Identity quaternion + opponent_pos=(400, 0, 1000), + opponent_vel=(100, 0, 0), + ) + + action = np.array([[0.0, 0.0, 0.0, 0.0, 0.0]], dtype=np.float32) + env.step(action) + obs = env.observations[0] + + passed = True + passed &= obs_assert_close(obs[4], 0.0, "pitch") + passed &= obs_assert_close(obs[5], 0.0, "roll") + passed &= obs_assert_close(obs[6], 0.0, "yaw") + passed &= obs_assert_close(obs[7], 0.0, "azimuth") + passed &= obs_assert_close(obs[8], 0.0, "elevation") + + RESULTS['obs_identity'] = passed + status = "OK" if passed else "FAIL" + print(f"obs_identity: identity orientation [{status}]") + env.close() + return passed + + +def test_obs_pitched_up(): + """ + Pitched up 30 degrees. + Expect: pitch = -30/180 = -0.167 (negative = nose UP) + """ + env = Dogfight(num_envs=1, obs_scheme=0, render_mode=get_render_mode(), render_fps=get_render_fps()) + env.reset() + + pitch_rad = np.radians(30) + qw = np.cos(-pitch_rad / 2) + qy = np.sin(-pitch_rad / 2) + + env.force_state( + player_pos=(0, 0, 1000), + player_vel=(100, 0, 0), + player_ori=(qw, 0, qy, 0), + opponent_pos=(400, 0, 1000), + opponent_vel=(100, 0, 0), + ) + + action = np.array([[0.0, 0.0, 0.0, 0.0, 0.0]], dtype=np.float32) + env.step(action) + obs = env.observations[0] + + expected_pitch = -30.0 / 180.0 + passed = obs_assert_close(obs[4], expected_pitch, "pitch") + + RESULTS['obs_pitched'] = passed + status = "OK" if passed else "FAIL" + print(f"obs_pitched: pitch={obs[4]:.3f} (expect {expected_pitch:.3f}) [{status}]") + env.close() + return passed + + +def test_obs_target_angles(): + """Test target azimuth/elevation computation.""" + env = Dogfight(num_envs=1, obs_scheme=0, render_mode=get_render_mode(), render_fps=get_render_fps()) + + # Target to the right + env.reset() + env.force_state( + player_pos=(0, 0, 1000), + player_vel=(100, 0, 0), + player_ori=(1, 0, 0, 0), + opponent_pos=(0, -400, 1000), # Right (negative Y) + opponent_vel=(100, 0, 0), + ) + action = np.array([[0.0, 0.0, 0.0, 0.0, 0.0]], dtype=np.float32) + env.step(action) + azimuth_right = env.observations[0][7] + + # Target above + env.reset() + env.force_state( + player_pos=(0, 0, 1000), + player_vel=(100, 0, 0), + player_ori=(1, 0, 0, 0), + opponent_pos=(0, 0, 1400), + opponent_vel=(100, 0, 0), + ) + env.step(action) + elev_above = env.observations[0][8] + + passed = True + passed &= obs_assert_close(azimuth_right, -0.5, "azimuth_right") + passed &= obs_assert_close(elev_above, 1.0, "elev_above", atol=0.1) + + RESULTS['obs_target_angles'] = passed + status = "OK" if passed else "FAIL" + print(f"obs_target: az_right={azimuth_right:.3f}, elev_up={elev_above:.3f} [{status}]") + env.close() + return passed + + +def test_obs_horizon_visible(): + """Test horizon_visible in scheme 2 (level=1, knife=0, inverted=-1).""" + env = Dogfight(num_envs=1, obs_scheme=2, render_mode=get_render_mode(), render_fps=get_render_fps()) + action = np.array([[0.0, 0.0, 0.0, 0.0, 0.0]], dtype=np.float32) + + # Level + env.reset() + env.force_state(player_pos=(0, 0, 1000), player_vel=(100, 0, 0), player_ori=(1, 0, 0, 0), + opponent_pos=(400, 0, 1000), opponent_vel=(100, 0, 0)) + env.step(action) + h_level = env.observations[0][8] + + # Knife-edge (90 deg roll) + env.reset() + roll_90 = np.radians(90) + env.force_state(player_pos=(0, 0, 1000), player_vel=(100, 0, 0), + player_ori=(np.cos(-roll_90/2), np.sin(-roll_90/2), 0, 0), + opponent_pos=(400, 0, 1000), opponent_vel=(100, 0, 0)) + env.step(action) + h_knife = env.observations[0][8] + + # Inverted (180 deg roll) + env.reset() + roll_180 = np.radians(180) + env.force_state(player_pos=(0, 0, 1000), player_vel=(100, 0, 0), + player_ori=(np.cos(-roll_180/2), np.sin(-roll_180/2), 0, 0), + opponent_pos=(400, 0, 1000), opponent_vel=(100, 0, 0)) + env.step(action) + h_inv = env.observations[0][8] + + passed = True + passed &= obs_assert_close(h_level, 1.0, "level") + passed &= obs_assert_close(h_knife, 0.0, "knife", atol=0.1) + passed &= obs_assert_close(h_inv, -1.0, "inverted") + + RESULTS['obs_horizon'] = passed + status = "OK" if passed else "FAIL" + print(f"obs_horizon: level={h_level:.2f}, knife={h_knife:.2f}, inv={h_inv:.2f} [{status}]") + env.close() + return passed + + +def test_obs_edge_cases(): + """Test edge cases: azimuth at 180°, zero speed, extreme distance.""" + env = Dogfight(num_envs=1, obs_scheme=0, render_mode=get_render_mode(), render_fps=get_render_fps()) + action = np.array([[0.0, 0.0, 0.0, 0.0, 0.0]], dtype=np.float32) + passed = True + + # Target behind-left (near +180°) + env.reset() + env.force_state(player_pos=(0, 0, 1000), player_vel=(100, 0, 0), player_ori=(1, 0, 0, 0), + opponent_pos=(-400, 10, 1000), opponent_vel=(100, 0, 0)) + env.step(action) + az_left = env.observations[0][7] + + # Target behind-right (near -180°) + env.reset() + env.force_state(player_pos=(0, 0, 1000), player_vel=(100, 0, 0), player_ori=(1, 0, 0, 0), + opponent_pos=(-400, -10, 1000), opponent_vel=(100, 0, 0)) + env.step(action) + az_right = env.observations[0][7] + + # Extreme distance (5km) + env.reset() + env.force_state(player_pos=(0, 0, 1000), player_vel=(100, 0, 0), player_ori=(1, 0, 0, 0), + opponent_pos=(5000, 0, 1000), opponent_vel=(100, 0, 0)) + env.step(action) + dist_obs = env.observations[0][9] + + passed &= az_left > 0.9 # Should be near +1 + passed &= az_right < -0.9 # Should be near -1 + passed &= -1.0 <= dist_obs <= 1.0 # Should be clamped + + RESULTS['obs_edge_cases'] = passed + status = "OK" if passed else "FAIL" + print(f"obs_edges: az_180={az_left:.2f}/{az_right:.2f}, dist_clamp={dist_obs:.2f} [{status}]") + env.close() + return passed + + +def test_obs_bounds(): + """Test that random states produce bounded observations in [-1, 1] for NN input.""" + env = Dogfight(num_envs=1, obs_scheme=0, render_mode=get_render_mode(), render_fps=get_render_fps()) + action = np.array([[0.0, 0.0, 0.0, 0.0, 0.0]], dtype=np.float32) + passed = True + out_of_bounds = [] + + for trial in range(30): + env.reset() + pos = (np.random.uniform(-4000, 4000), np.random.uniform(-4000, 4000), np.random.uniform(100, 2900)) + vel = tuple(np.random.randn(3) * 100) + ori = np.random.randn(4) + ori /= np.linalg.norm(ori) + if ori[0] < 0: ori = -ori + opp_pos = (pos[0] + np.random.uniform(-500, 500), pos[1] + np.random.uniform(-500, 500), pos[2] + np.random.uniform(-500, 500)) + + env.force_state(player_pos=pos, player_vel=vel, player_ori=tuple(ori), + opponent_pos=opp_pos, opponent_vel=(100, 0, 0)) + env.step(action) + + for i, val in enumerate(env.observations[0]): + if val < -1.0 or val > 1.0: + passed = False + out_of_bounds.append((trial, i, val)) + + RESULTS['obs_bounds'] = passed + status = "OK" if passed else "FAIL" + print(f"obs_bounds: 30 random states, all in [-1.0, 1.0] [{status}]") + if out_of_bounds: + for trial, idx, val in out_of_bounds[:5]: # Show first 5 violations + print(f" trial {trial}: obs[{idx}]={val:.3f} out of bounds") + env.close() + return passed + + +# Test registry for this module +TESTS = { + 'obs_dimensions': test_obs_scheme_dimensions, + 'obs_identity': test_obs_identity_orientation, + 'obs_pitched': test_obs_pitched_up, + 'obs_target_angles': test_obs_target_angles, + 'obs_horizon': test_obs_horizon_visible, + 'obs_edge_cases': test_obs_edge_cases, + 'obs_bounds': test_obs_bounds, +} + + +if __name__ == "__main__": + from test_flight_base import get_args + args = get_args() + + print("Static Observation Tests") + print("=" * 60) + + if args.test: + if args.test in TESTS: + print(f"Running single test: {args.test}") + if get_render_mode(): + print("Rendering enabled - press ESC to exit") + print("=" * 60) + TESTS[args.test]() + else: + print(f"Unknown test: {args.test}") + print(f"Available tests: {', '.join(TESTS.keys())}") + else: + print("Running all static observation tests") + if get_render_mode(): + print("Rendering enabled - press ESC to exit") + print("=" * 60) + for test_func in TESTS.values(): + test_func() diff --git a/pufferlib/ocean/dogfight/test_flight_physics.py b/pufferlib/ocean/dogfight/test_flight_physics.py new file mode 100644 index 000000000..40ac7d4a5 --- /dev/null +++ b/pufferlib/ocean/dogfight/test_flight_physics.py @@ -0,0 +1,1398 @@ +""" +Physics validation tests for dogfight environment. +Tests flight physics: speed, climb, turn, control directions, G-forces. + +Run: python pufferlib/ocean/dogfight/test_flight_physics.py --test max_speed +""" +import numpy as np +from dogfight import Dogfight, AutopilotMode + +from test_flight_base import ( + get_render_mode, get_render_fps, + RESULTS, TEST_HIGHLIGHTS, setup_highlights, + P51D_MAX_SPEED, P51D_STALL_SPEED, P51D_CLIMB_RATE, P51D_TURN_RATE, + LEVEL_FLIGHT_KP, LEVEL_FLIGHT_KD, + get_speed_from_state, get_vz_from_state, get_alt_from_state, + level_flight_pitch_from_state, +) + + +def test_max_speed(): + """ + Full throttle level flight starting near max speed. + Should stabilize around 159 m/s (P-51D Military power). + """ + env = Dogfight(num_envs=1, render_mode=get_render_mode(), render_fps=get_render_fps()) + env.reset() + + # Start at 150 m/s (near expected max), center of world, flying +X + env.force_state( + player_pos=(-1000, 0, 1000), + player_vel=(150, 0, 0), + player_throttle=1.0, + ) + + prev_speed = get_speed_from_state(env) + stable_count = 0 + + for step in range(1500): # 30 seconds + elevator = level_flight_pitch_from_state(env) + action = np.array([[1.0, elevator, 0.0, 0.0, 0.0]], dtype=np.float32) + _, _, term, _, _ = env.step(action) + + if term[0]: + print(" (terminated - hit bounds)") + break + + speed = get_speed_from_state(env) + if abs(speed - prev_speed) < 0.05: + stable_count += 1 + if stable_count > 100: + break + else: + stable_count = 0 + prev_speed = speed + + final_speed = get_speed_from_state(env) + RESULTS['max_speed'] = final_speed + diff = final_speed - P51D_MAX_SPEED + status = "OK" if abs(diff) < 15 else "CHECK" + print(f"max_speed: {final_speed:6.1f} m/s (P-51D: {P51D_MAX_SPEED:.0f}, diff: {diff:+.1f}) [{status}]") + + +def test_acceleration(): + """ + Full throttle starting at 100 m/s - verify plane accelerates. + Should see speed increase toward max speed (~150 m/s). + """ + env = Dogfight(num_envs=1, render_mode=get_render_mode(), render_fps=get_render_fps()) + env.reset() + + # Start at 100 m/s (well below max speed) + env.force_state( + player_pos=(-1000, 0, 1000), + player_vel=(100, 0, 0), + player_throttle=1.0, + ) + + initial_speed = get_speed_from_state(env) + speeds = [initial_speed] + + for step in range(500): # 10 seconds + elevator = level_flight_pitch_from_state(env) + action = np.array([[1.0, elevator, 0.0, 0.0, 0.0]], dtype=np.float32) + _, _, term, _, _ = env.step(action) + + if term[0]: + print(" (terminated - hit bounds)") + break + + speed = get_speed_from_state(env) + speeds.append(speed) + + final_speed = speeds[-1] + speed_gain = final_speed - initial_speed + RESULTS['acceleration'] = speed_gain + + # Should gain at least 20 m/s in 10 seconds + status = "OK" if speed_gain > 20 else "CHECK" + print(f"acceleration: {initial_speed:.0f} -> {final_speed:.0f} m/s (gained {speed_gain:+.1f} m/s) [{status}]") + + +def test_deceleration(): + """ + Zero throttle starting at 150 m/s - verify plane decelerates due to drag. + Should see speed decrease as drag slows the plane. + """ + env = Dogfight(num_envs=1, render_mode=get_render_mode(), render_fps=get_render_fps()) + env.reset() + + # Start at 150 m/s with zero throttle + env.force_state( + player_pos=(-1000, 0, 1000), + player_vel=(150, 0, 0), + player_throttle=0.0, + ) + + initial_speed = get_speed_from_state(env) + speeds = [initial_speed] + + for step in range(500): # 10 seconds + elevator = level_flight_pitch_from_state(env) + # Zero throttle (action[0] = -1 maps to 0% throttle) + action = np.array([[-1.0, elevator, 0.0, 0.0, 0.0]], dtype=np.float32) + _, _, term, _, _ = env.step(action) + + if term[0]: + print(" (terminated - hit bounds)") + break + + speed = get_speed_from_state(env) + speeds.append(speed) + + final_speed = speeds[-1] + speed_loss = initial_speed - final_speed + RESULTS['deceleration'] = speed_loss + + # Should lose at least 20 m/s in 10 seconds due to drag + status = "OK" if speed_loss > 20 else "CHECK" + print(f"deceleration: {initial_speed:.0f} -> {final_speed:.0f} m/s (lost {speed_loss:+.1f} m/s) [{status}]") + + +def test_cruise_speed(): + """50% throttle level flight - cruise speed.""" + env = Dogfight(num_envs=1, render_mode=get_render_mode(), render_fps=get_render_fps()) + env.reset() + + # Start at moderate speed + env.force_state( + player_pos=(-1000, 0, 1000), + player_vel=(120, 0, 0), + player_throttle=0.5, + ) + + prev_speed = get_speed_from_state(env) + stable_count = 0 + + for step in range(1500): + elevator = level_flight_pitch_from_state(env) + action = np.array([[0.0, elevator, 0.0, 0.0, 0.0]], dtype=np.float32) # 50% throttle + _, _, term, _, _ = env.step(action) + + if term[0]: + break + + speed = get_speed_from_state(env) + if abs(speed - prev_speed) < 0.05: + stable_count += 1 + if stable_count > 100: + break + else: + stable_count = 0 + prev_speed = speed + + final_speed = get_speed_from_state(env) + RESULTS['cruise_speed'] = final_speed + print(f"cruise_speed: {final_speed:6.1f} m/s (50% throttle)") + + +def test_stall_speed(): + """ + Find stall speed by testing level flight at decreasing speeds. + + At each speed, set the exact pitch angle needed for level flight, + then verify the physics can maintain altitude. Stall occurs when + required C_L exceeds C_L_max. + + This bypasses autopilot limitations by setting pitch directly. + """ + env = Dogfight(num_envs=1, render_mode=get_render_mode(), render_fps=get_render_fps()) + + # Physics constants (must match flightlib.h) + W = 4082 * 9.81 # Weight (N) + rho = 1.225 # Air density + S = 21.65 # Wing area + C_L_max = 1.48 # Max lift coefficient + C_L_alpha = 5.56 # Lift curve slope + alpha_zero = -0.021 # Zero-lift angle (rad) + wing_inc = 0.026 # Wing incidence (rad) + + # Theoretical stall speed + V_stall_theory = np.sqrt(2 * W / (rho * S * C_L_max)) + + # Test speeds from high to low + stall_speed = None + last_flyable = None + + for V in range(70, 35, -5): + env.reset() + + # C_L needed for level flight at this speed + q_dyn = 0.5 * rho * V * V + C_L_needed = W / (q_dyn * S) + + # Check if within aerodynamic limits + if C_L_needed > C_L_max: + # Can't fly level - this is stall + stall_speed = V + break + + # Calculate pitch angle needed for this C_L + # C_L = C_L_alpha * (alpha + wing_inc - alpha_zero) + alpha_needed = C_L_needed / C_L_alpha - wing_inc + alpha_zero + + # Create pitch-up quaternion (rotation about Y axis) + # Negative angle because positive Y rotation = nose DOWN (right-hand rule) + pitch_rad = alpha_needed + ori_w = np.cos(-pitch_rad / 2) + ori_y = np.sin(-pitch_rad / 2) + + # Set up plane at exact pitch for level flight + env.force_state( + player_pos=(0, 0, 1000), + player_vel=(V, 0, 0), + player_ori=(ori_w, 0, ori_y, 0), + player_throttle=0.0, # Zero throttle - just testing lift + ) + + # Run for 2 seconds with zero controls, measure vz + vzs = [] + for _ in range(100): # 2 seconds + vz = get_vz_from_state(env) + vzs.append(vz) + action = np.array([[-1.0, 0.0, 0.0, 0.0, 0.0]], dtype=np.float32) + _, _, term, _, _ = env.step(action) + if term[0]: + break + + avg_vz = np.mean(vzs[-50:]) if len(vzs) >= 50 else np.mean(vzs) + + # If maintaining altitude (vz near 0 or positive), plane can fly + if avg_vz >= -5: # Allow small sink rate + last_flyable = V + + # Stall speed is between last_flyable and the speed where C_L > C_L_max + if stall_speed is None: + stall_speed = 35 # Below our test range + elif last_flyable is not None: + # Interpolate: stall is where we transition from flyable to not + stall_speed = last_flyable + + RESULTS['stall_speed'] = stall_speed + diff = stall_speed - P51D_STALL_SPEED + status = "OK" if abs(diff) < 10 else "CHECK" + print(f"stall_speed: {stall_speed:6.1f} m/s (P-51D: {P51D_STALL_SPEED:.0f}, diff: {diff:+.1f}, theory: {V_stall_theory:.0f}) [{status}]") + + +def test_climb_rate(): + """ + Measure climb rate at Vy (best climb speed) with optimal pitch. + + Sets up plane at Vy with the pitch angle calculated for steady climb, + then measures actual climb rate. This tests that physics produces + correct excess thrust at climb speed. + + Approach: Calculate pitch for expected P-51D climb (15.4 m/s at 74 m/s), + set that state with force_state(), run with zero elevator (pitch holds), + and verify physics produces the expected climb rate. + """ + env = Dogfight(num_envs=1, render_mode=get_render_mode(), render_fps=get_render_fps()) + + # Physics constants (must match flightlib.h) + W = 4082 * 9.81 # Weight (N) + rho = 1.225 # Air density + S = 21.65 # Wing area + C_L_alpha = 5.56 # Lift curve slope + alpha_zero = -0.021 # Zero-lift angle (rad) + wing_inc = 0.026 # Wing incidence (rad) + + Vy = 74.0 # Best climb speed (m/s) + + # Calculate climb geometry for P-51D expected performance + expected_ROC = P51D_CLIMB_RATE # 15.4 m/s + gamma = np.arcsin(expected_ROC / Vy) # Climb angle ~12° + + # In steady climb: L = W * cos(gamma) + L_needed = W * np.cos(gamma) + q_dyn = 0.5 * rho * Vy * Vy + C_L = L_needed / (q_dyn * S) + + # Calculate AOA needed for this lift + alpha = C_L / C_L_alpha - wing_inc + alpha_zero + + # Body pitch = AOA + climb angle (nose above horizon) + pitch = alpha + gamma + + # Create pitch-up quaternion (negative angle because positive Y rotation = nose DOWN) + ori_w = np.cos(-pitch / 2) + ori_y = np.sin(-pitch / 2) + + # Set up plane in steady climb: velocity vector along climb path + vx = Vy * np.cos(gamma) + vz = Vy * np.sin(gamma) # This IS the expected climb rate + + env.reset() + setup_highlights(env, 'climb_rate') + env.force_state( + player_pos=(0, 0, 500), + player_vel=(vx, 0, vz), # Velocity along climb path + player_ori=(ori_w, 0, ori_y, 0), # Pitch for steady climb + player_throttle=1.0, + ) + + # Run with zero elevator (pitch holds constant) and measure vz + vzs = [] + speeds = [] + + for step in range(1000): # 20 seconds + # Use state-based accessors (independent of obs_scheme) + vz_now = get_vz_from_state(env) + speed = get_speed_from_state(env) + + # Skip first 5 seconds for settling, then collect data + if step >= 250: + vzs.append(vz_now) + speeds.append(speed) + + # Zero elevator - pitch angle holds due to rate-based controls + action = np.array([[1.0, 0.0, 0.0, 0.0, 0.0]], dtype=np.float32) + _, _, term, _, _ = env.step(action) + if term[0]: + break + + avg_vz = np.mean(vzs) if vzs else 0 + avg_speed = np.mean(speeds) if speeds else 0 + + RESULTS['climb_rate'] = avg_vz + diff = avg_vz - P51D_CLIMB_RATE + status = "OK" if abs(diff) < 5 else "CHECK" + print(f"climb_rate: {avg_vz:6.1f} m/s (P-51D: {P51D_CLIMB_RATE:.0f}, diff: {diff:+.1f}, speed: {avg_speed:.0f}/{Vy:.0f}) [{status}]") + + +def test_glide_ratio(): + """ + Power-off glide test - validates drag polar (Cd = Cd0 + K*Cl^2). + + At best glide speed, L/D is maximized. This occurs when induced drag + equals parasitic drag (Cd0 = K*Cl^2). + + From our drag polar: + Cl_opt = sqrt(Cd0/K) = sqrt(0.0163/0.072) = 0.476 + Cd_opt = 2*Cd0 = 0.0326 + L/D_max = Cl_opt/Cd_opt = 14.6 + + Best glide speed: V = sqrt(2W/(rho*S*Cl)) = 80 m/s + Glide angle: gamma = arctan(1/L/D) = 3.9° + Expected sink rate: V * sin(gamma) = V/(L/D) = 5.5 m/s + """ + env = Dogfight(num_envs=1, render_mode=get_render_mode(), render_fps=get_render_fps()) + + # Calculate theoretical values from drag polar + Cd0 = 0.0163 + K = 0.072 + W = 4082 * 9.81 + rho = 1.225 + S = 21.65 + C_L_alpha = 5.56 + alpha_zero = -0.021 + wing_inc = 0.026 + + Cl_opt = np.sqrt(Cd0 / K) # 0.476 + Cd_opt = 2 * Cd0 # 0.0326 + LD_max = Cl_opt / Cd_opt # 14.6 + + # Best glide speed + V_glide = np.sqrt(2 * W / (rho * S * Cl_opt)) # ~80 m/s + + # Glide angle (nose below horizon for descent) + gamma = np.arctan(1 / LD_max) # ~3.9° = 0.068 rad + + # Expected sink rate + sink_expected = V_glide * np.sin(gamma) # ~5.5 m/s + + # AOA needed for Cl_opt + alpha = Cl_opt / C_L_alpha - wing_inc + alpha_zero # ~0.04 rad + + # In steady glide: body pitch = alpha - gamma (nose below velocity) + # But our velocity is along glide path, so body pitch relative to horizontal = alpha - gamma + # For quaternion: we want nose tilted down from horizontal + pitch = alpha - gamma # Negative = nose down + + # Create quaternion for glide attitude (negative because positive Y rotation = nose down) + ori_w = np.cos(-pitch / 2) + ori_y = np.sin(-pitch / 2) + + # Velocity along glide path (descending) + vx = V_glide * np.cos(gamma) + vz = -V_glide * np.sin(gamma) # Negative = descending + + env.reset() + setup_highlights(env, 'glide_ratio') + env.force_state( + player_pos=(0, 0, 2000), # High altitude for long glide + player_vel=(vx, 0, vz), + player_ori=(ori_w, 0, ori_y, 0), + player_throttle=0.0, + ) + + # Run with zero controls - let physics maintain steady glide + vzs = [] + speeds = [] + + for step in range(500): # 10 seconds + # Use state-based accessors (independent of obs_scheme) + vz_now = get_vz_from_state(env) + speed = get_speed_from_state(env) + + # Collect data after 2 seconds of settling + if step >= 100: + vzs.append(vz_now) + speeds.append(speed) + + # Zero controls - pitch angle holds due to rate-based system + action = np.array([[-1.0, 0.0, 0.0, 0.0, 0.0]], dtype=np.float32) + _, _, term, _, _ = env.step(action) + if term[0]: + break + + avg_vz = np.mean(vzs) if vzs else 0 # Should be negative (descending) + avg_sink = -avg_vz # Convert to positive sink rate + avg_speed = np.mean(speeds) if speeds else 0 + measured_LD = avg_speed / avg_sink if avg_sink > 0.1 else 0 + + RESULTS['glide_sink'] = avg_sink + RESULTS['glide_LD'] = measured_LD + + diff = avg_sink - sink_expected + status = "OK" if abs(diff) < 2 else "CHECK" + print(f"glide_ratio: L/D={measured_LD:4.1f} (theory: {LD_max:.1f}, sink: {avg_sink:.1f} m/s, expected: {sink_expected:.1f}) [{status}]") + + +def test_sustained_turn(): + """ + Sustained turn test - verifies banked flight produces a turn. + + Tests that at 30° bank, 100 m/s: + - Plane turns (heading changes) + - Turn rate is positive and consistent + - Altitude loss is bounded + + Note: The physics model produces ~2-3°/s at 30° bank (ideal theory: 3.2°/s). + This is acceptable for RL training - the physics is consistent. + """ + env = Dogfight(num_envs=1, render_mode=get_render_mode(), render_fps=get_render_fps()) + + # Test parameters - 30° bank is gentle and stable + V = 100.0 # m/s + bank_deg = 30.0 # degrees + bank = np.radians(bank_deg) + + # Build quaternion: small pitch up, then bank right + alpha = np.radians(3) # Small fixed pitch for lift + + # Pitch (negative = nose up) + qp_w = np.cos(-alpha / 2) + qp_y = np.sin(-alpha / 2) + + # Roll (negative = bank right due to quaternion convention) + qr_w = np.cos(-bank / 2) + qr_x = np.sin(-bank / 2) + + # Combined: q = qr * qp + ori_w = qr_w * qp_w + ori_x = qr_x * qp_w + ori_y = qr_w * qp_y + ori_z = qr_x * qp_y + + env.reset() + setup_highlights(env, 'sustained_turn') + env.force_state( + player_pos=(0, 0, 1500), + player_vel=(V, 0, 0), + player_ori=(ori_w, ori_x, ori_y, ori_z), + player_throttle=1.0, + ) + + # Run with zero controls + headings = [] + speeds = [] + alts = [] + + for step in range(250): # 5 seconds + state = env.get_state() + vx, vy = state['vx'], state['vy'] + heading = np.arctan2(vy, vx) + speed = np.sqrt(vx**2 + vy**2 + state['vz']**2) + alt = state['pz'] + + if step >= 50: # After 1 second settling + headings.append(heading) + speeds.append(speed) + alts.append(alt) + + action = np.array([[1.0, 0.0, 0.0, 0.0, 0.0]], dtype=np.float32) + _, _, term, _, _ = env.step(action) + if term[0]: + break + + # Calculate turn rate + if len(headings) > 50: + headings = np.unwrap(headings) + heading_change = headings[-1] - headings[0] + time_elapsed = len(headings) * 0.02 + turn_rate_actual = np.degrees(heading_change / time_elapsed) + else: + turn_rate_actual = 0 + + avg_speed = np.mean(speeds) if speeds else 0 + alt_change = alts[-1] - alts[0] if len(alts) > 1 else 0 + + RESULTS['turn_rate'] = abs(turn_rate_actual) + + # Check: positive turn rate (plane is turning), not diving catastrophically + is_turning = abs(turn_rate_actual) > 1.0 + alt_ok = alt_change > -200 # Less than 200m loss in 5 seconds + status = "OK" if (is_turning and alt_ok) else "CHECK" + + print(f"turn_rate: {abs(turn_rate_actual):5.1f}°/s ({bank_deg:.0f}° bank, speed: {avg_speed:.0f}, Δalt: {alt_change:+.0f}m) [{status}]") + + +def test_turn_60(): + """ + Coordinated turn at 60° bank with PID control. + + P-51D reference: 60° bank (2.0g) at 350 mph gives 5°/s + At 100 m/s: theory = g*tan(60°)/V = 9.81*1.732/100 = 9.7°/s + """ + env = Dogfight(num_envs=1, render_mode=get_render_mode(), render_fps=get_render_fps()) + + bank_deg = 60.0 + bank_target = np.radians(bank_deg) + V = 100.0 + + # Right bank quaternion + ori_w = np.cos(bank_target / 2) + ori_x = -np.sin(bank_target / 2) + + env.reset() + env.force_state( + player_pos=(0, 0, 1500), + player_vel=(V, 0, 0), + player_ori=(ori_w, ori_x, 0.0, 0.0), + player_throttle=1.0, + ) + + # PID gains (found via sweep in debug_turn.py) + elev_kp, elev_kd = -0.05, 0.005 + roll_kp, roll_kd = -2.0, -0.1 + + prev_vz = 0.0 + prev_bank_error = 0.0 + + headings, alts, banks = [], [], [] + + for step in range(250): # 5 seconds + # Get state from raw state (independent of obs_scheme) + state = env.get_state() + vz = state['vz'] + alt = state['pz'] + vx, vy = state['vx'], state['vy'] + heading = np.arctan2(vy, vx) + up_y, up_z = state['up_y'], state['up_z'] + bank_actual = np.arccos(np.clip(up_z, -1, 1)) + if up_y < 0: + bank_actual = -bank_actual + + # Elevator PID + vz_error = -vz + vz_deriv = (vz - prev_vz) / 0.02 + elevator = elev_kp * vz_error + elev_kd * vz_deriv + elevator = np.clip(elevator, -1.0, 1.0) + prev_vz = vz + + # Aileron PID + bank_error = bank_target - bank_actual + bank_deriv = (bank_error - prev_bank_error) / 0.02 + aileron = roll_kp * bank_error + roll_kd * bank_deriv + aileron = np.clip(aileron, -1.0, 1.0) + prev_bank_error = bank_error + + if step >= 25: + headings.append(heading) + alts.append(alt) + banks.append(np.degrees(bank_actual)) + + action = np.array([[1.0, elevator, aileron, 0.0, 0.0]], dtype=np.float32) + _, _, term, _, _ = env.step(action) + if term[0]: + break + + # Calculate results + headings = np.unwrap(headings) + turn_rate = np.degrees((headings[-1] - headings[0]) / (len(headings) * 0.02)) + alt_change = alts[-1] - alts[0] + bank_mean = np.mean(banks) + theory_rate = np.degrees(9.81 * np.tan(bank_target) / V) + eff = 100 * turn_rate / theory_rate + + RESULTS['turn_rate_60'] = turn_rate + + status = "OK" if (85 < eff < 105 and abs(alt_change) < 50) else "CHECK" + print(f"turn_60: {turn_rate:5.1f}°/s (theory: {theory_rate:.1f}, eff: {eff:.0f}%, bank: {bank_mean:.0f}°, Δalt: {alt_change:+.0f}m) [{status}]") + + +def test_pitch_direction(): + """Verify positive elevator = nose DOWN (standard joystick: push forward).""" + env = Dogfight(num_envs=1, render_mode=get_render_mode(), render_fps=get_render_fps()) + env.reset() + + env.force_state(player_vel=(80, 0, 0)) + + # Get initial forward vector Z component (nose pointing direction) + initial_fwd_z = env.get_state()['fwd_z'] + + # Apply positive elevator (+1.0 = push forward) + action = np.array([[0.5, 1.0, 0.0, 0.0, 0.0]], dtype=np.float32) + for step in range(50): + env.step(action) + + # Check if nose went DOWN (fwd_z should decrease) + final_fwd_z = env.get_state()['fwd_z'] + nose_down = final_fwd_z < initial_fwd_z # fwd_z decreases when nose pitches down + + RESULTS['pitch_direction'] = 'DOWN' if nose_down else 'UP' + status = 'OK' if nose_down else 'WRONG' + print(f"pitch_dir: {RESULTS['pitch_direction']:>6} (+elev = nose DOWN) [{status}]") + + +def test_roll_direction(): + """Verify positive ailerons = roll right.""" + env = Dogfight(num_envs=1, render_mode=get_render_mode(), render_fps=get_render_fps()) + env.reset() + + env.force_state(player_vel=(80, 0, 0)) + + action = np.array([[0.5, 0.0, 1.0, 0.0, 0.0]], dtype=np.float32) + for _ in range(50): + env.step(action) + state = env.get_state() + up_y_changed = abs(state['up_y']) > 0.1 + RESULTS['roll_works'] = 'YES' if up_y_changed else 'NO' + status = 'OK' if up_y_changed else 'WRONG' + print(f"roll_works: {RESULTS['roll_works']:>6} (should be YES) [{status}]") + + +def test_rudder_only_turn(): + """ + Test: Wings level, nose on horizon, full rudder - verify limited heading change. + + Real rudder physics: deflection creates sideslip angle (not sustained yaw rate). + Vertical tail creates restoring moment, limiting sideslip to ~10 degrees. + Once equilibrium sideslip is reached, yaw rate approaches zero. + + Expected behavior: + - Initial yaw rate is high (MAX_YAW_RATE ~29 deg/s) + - Yaw rate decays as sideslip builds + - Total heading change is LIMITED to ~10-15 degrees + - Cannot turn around with just rudder + + This test uses PID control to: + - Hold wings level (ailerons fight any roll) + - Hold nose on horizon (elevator maintains level flight) + - Apply full rudder and measure total heading change + """ + env = Dogfight(num_envs=1, render_mode=get_render_mode(), render_fps=get_render_fps()) + env.reset() + setup_highlights(env, 'rudder_only_turn') + + # Start at cruise speed, wings level + V = 120.0 # m/s cruise + env.force_state( + player_pos=(0, 0, 1000), + player_vel=(V, 0, 0), + player_ori=(1.0, 0.0, 0.0, 0.0), # Identity = wings level, heading +X + player_throttle=1.0, + ) + + # PID gains for wings level (tuned to stay stable with full rudder) + roll_kp = 1.0 # Proportional - lower prevents oscillation + roll_kd = 0.05 # Derivative damping + + # PID gains for level flight (from existing tests) + elev_kp = 0.001 + elev_kd = 0.001 + + prev_roll = 0.0 + prev_vz = 0.0 + + headings = [] + + for step in range(300): # 6 seconds at 50Hz + # Extract state from raw state (independent of obs_scheme) + state = env.get_state() + vx, vy, vz = state['vx'], state['vy'], state['vz'] + up_y, up_z = state['up_y'], state['up_z'] + + # Calculate heading from velocity + heading = np.arctan2(vy, vx) + headings.append(heading) + + # Calculate roll angle from up vector + roll = np.arctan2(up_y, up_z) + + # Wings level PID: drive roll to zero + roll_error = 0.0 - roll + roll_deriv = (roll - prev_roll) / 0.02 + aileron = roll_kp * roll_error - roll_kd * roll_deriv + aileron = np.clip(aileron, -1.0, 1.0) + prev_roll = roll + + # Level flight PID: drive vz to zero + vz_error = 0.0 - vz + vz_deriv = (vz - prev_vz) / 0.02 + elevator = -elev_kp * vz_error - elev_kd * vz_deriv + elevator = np.clip(elevator, -0.3, 0.3) + prev_vz = vz + + # FULL RUDDER + rudder = 1.0 + + # Action: [throttle, elevator, aileron, rudder, trigger] + action = np.array([[1.0, elevator, aileron, rudder, 0.0]], dtype=np.float32) + _, _, term, _, _ = env.step(action) + + if term[0]: + break + + # Analyze heading change + headings = np.unwrap(headings) # Handle wraparound + total_heading_change_deg = np.degrees(headings[-1] - headings[0]) if len(headings) > 1 else 0 + + # Calculate initial yaw rate (first 0.5 seconds = 25 steps) + if len(headings) > 25: + initial_change = headings[25] - headings[0] + initial_yaw_rate_deg_s = np.degrees(initial_change / 0.5) + else: + initial_yaw_rate_deg_s = 0 + + # Calculate final yaw rate (last 2 seconds) + if len(headings) > 200: + final_change = headings[-1] - headings[-100] + final_yaw_rate_deg_s = np.degrees(final_change / 2.0) + else: + final_yaw_rate_deg_s = 0 + + RESULTS['rudder_total_heading'] = total_heading_change_deg + RESULTS['rudder_initial_rate'] = initial_yaw_rate_deg_s + RESULTS['rudder_final_rate'] = final_yaw_rate_deg_s + + # Verify damping behavior: + # Real rudder physics: heading changes slowly because rudder creates sideslip, + # NOT a direct heading rate. The sideforce from sideslip is what turns the velocity. + # + # Expected behavior: + # 1. Total heading change should be limited and small (~3-15 degrees) + # - Rudder can't spin the plane around, it's a small control + # 2. Heading changes at all (rudder has SOME effect) + # 3. Final rate should be similar to initial (slow, steady turn from sideslip) + # + # Note: In a P-51D, full rudder at cruise gives ~5-10° sideslip and very slow turn + heading_changed = abs(total_heading_change_deg) > 2.0 # Rudder does something + heading_limited = abs(total_heading_change_deg) < 20.0 # Can't do unlimited turns + + is_realistic = heading_changed and heading_limited + status = "OK" if is_realistic else "FAIL" + + print(f"rudder_only: heading={total_heading_change_deg:5.1f}° (2-20° OK), " + f"initial={initial_yaw_rate_deg_s:5.1f}°/s, final={final_yaw_rate_deg_s:4.1f}°/s [{status}]") + + if not is_realistic: + if not heading_changed: + print(f" ISSUE: Rudder should change heading (got only {total_heading_change_deg:.1f}°)") + if not heading_limited: + print(f" ISSUE: Heading change should be <20°, got {total_heading_change_deg:.1f}°") + + +def test_knife_edge_pull(): + """ + Knife-edge pull test - validates that elevator becomes YAW when rolled 90°. + + Physics explanation: + - Plane rolled 90° right: right wing DOWN, canopy facing RIGHT + - Body axes after roll: + - Body X (nose): +X world (forward) + - Body Y (right wing): -Z world (DOWN) + - Body Z (canopy): +Y world (RIGHT) + - Negative elevator (pull back) = pitch up in BODY frame = rotation about body Y + - Body Y is now -Z world, so this is rotation about world -Z + - Right-hand rule: thumb on -Z, fingers curl +X toward -Y + - Result: Nose yaws LEFT in world frame (since we pull back = negative elevator) + + Expected behavior: + 1. Heading changes significantly (plane turns left with pull back) + 2. Altitude drops (lift is horizontal, not vertical) + 3. Up vector stays roughly horizontal (still in knife-edge) + 4. This is essentially a "flat turn" using elevator + + This tests that the quaternion kinematics correctly transform body-frame + rotations to world-frame effects. + """ + env = Dogfight(num_envs=1, render_mode=get_render_mode(), render_fps=get_render_fps()) + env.reset() + setup_highlights(env, 'knife_edge_pull') + + # Start at high speed to avoid stall during the pull + V = 150.0 # m/s - well above stall speed even at high AoA + + # Use EXACT 90° right roll via force_state for precise test + # Roll -90° about X axis: q = (cos(45°), -sin(45°), 0, 0) + roll_90 = np.radians(90) + qw = np.cos(roll_90 / 2) + qx = -np.sin(roll_90 / 2) # Negative for right roll + + env.force_state( + player_pos=(0, 0, 1500), + player_vel=(V, 0, 0), # Flying +X + player_ori=(qw, qx, 0.0, 0.0), # EXACT 90° right roll + player_throttle=1.0, + ) + + # Verify knife-edge achieved + state = env.get_state() + up_x, up_y, up_z = state['up_x'], state['up_y'], state['up_z'] + + # Record initial state + alt_start = state['pz'] + vx_start, vy_start = state['vx'], state['vy'] + heading_start = np.arctan2(vy_start, vx_start) + + # --- Phase 2: Full elevator pull in knife-edge --- + headings = [] + alts = [] + up_zs = [] + + for step in range(100): # 2 seconds + state = env.get_state() + vx, vy, vz = state['vx'], state['vy'], state['vz'] + heading = np.arctan2(vy, vx) + alt = state['pz'] + up_z_now = state['up_z'] + + headings.append(heading) + alts.append(alt) + up_zs.append(up_z_now) + + # Full throttle, FULL ELEVATOR PULL, no aileron, no rudder + # Convention: -elevator = pull back = nose up + action = np.array([[1.0, -1.0, 0.0, 0.0, 0.0]], dtype=np.float32) + _, _, term, _, _ = env.step(action) + if term[0]: + break + + # --- Analysis --- + headings = np.unwrap(headings) + heading_change = np.degrees(headings[-1] - headings[0]) + alt_loss = alt_start - alts[-1] + avg_up_z = np.mean(up_zs) + time_elapsed = len(headings) * 0.02 + + # Calculate turn rate + turn_rate = heading_change / time_elapsed if time_elapsed > 0 else 0 + + RESULTS['knife_pull_turn'] = turn_rate + RESULTS['knife_pull_alt_loss'] = alt_loss + + # Expected: + # 1. Significant heading change (turns left with pull back, so negative) + # 2. Altitude loss (no vertical lift) + # 3. Up vector stays near horizontal (|up_z| small) + + # In our coordinate system: X forward, Y left, Z up + # atan2(vy, vx) increases when turning left (positive vy) + heading_ok = heading_change > 20 # Should turn at least 20° left in 2 seconds + alt_ok = alt_loss > 5 # Should lose altitude + roll_maintained = abs(avg_up_z) < 0.3 # Up vector stays roughly horizontal + + all_ok = heading_ok and alt_ok and roll_maintained + status = "OK" if all_ok else "CHECK" + + # Positive heading change = LEFT turn (Y is left in our coords) + direction = "LEFT" if heading_change > 0 else "RIGHT" + print(f"knife_pull: turn={turn_rate:+.1f}°/s ({direction}), alt_lost={alt_loss:.0f}m, |up_z|={abs(avg_up_z):.2f} [{status}]") + + if not heading_ok: + print(f" WARNING: Expected significant left turn, got {heading_change:.1f}° heading change") + if not alt_ok: + print(f" WARNING: Expected altitude loss, got {alt_loss:.1f}m") + if not roll_maintained: + print(f" WARNING: Roll not maintained, up_z={avg_up_z:.2f} (should be near 0)") + + +def test_knife_edge_flight(): + """ + Knife-edge flight test - validates that the plane CANNOT maintain altitude. + + In knife-edge flight (90° roll), the wings are vertical and generate + NO vertical lift. The plane must rely on: + 1. Fuselage side area (very inefficient, NOT modeled) + 2. Rudder sideforce (NOT modeled - rudder only creates yaw rate) + 3. Thrust vector (only if nosed up significantly) + + A P-51D is NOT designed for knife-edge - streamlined fuselage = poor side area. + Even purpose-built aerobatic planes struggle to maintain altitude in true knife-edge. + + Expected behavior: Plane should lose altitude rapidly (~9 m/s sink or more). + The nose may yaw from rudder input, but vertical force is insufficient. + + Sources: + - https://www.thenakedscientists.com/articles/questions/what-produces-lift-during-knife-edge-pass + - https://www.aopa.org/news-and-media/all-news/1998/august/flight-training-magazine/form-and-function + """ + env = Dogfight(num_envs=1, render_mode=get_render_mode(), render_fps=get_render_fps()) + env.reset() + setup_highlights(env, 'knife_edge_flight') + + # Start at cruise speed, wings level, flying +X + V = 120.0 # m/s - fast enough for good control authority + env.force_state( + player_pos=(0, 0, 1500), # High altitude for test duration + player_vel=(V, 0, 0), # Flying +X direction + player_ori=(1.0, 0.0, 0.0, 0.0), # Wings level + player_throttle=1.0, + ) + + # --- Phase 1: Roll to knife-edge (90° right) --- + # Takes about 30 steps at MAX_ROLL_RATE=3.0 rad/s (0.5s to roll 90°) + for step in range(30): + # Full right aileron to roll 90° + action = np.array([[1.0, 0.0, 1.0, 0.0, 0.0]], dtype=np.float32) + env.step(action) + + # Verify we're in knife-edge (up vector should be pointing +Y or -Y) + state = env.get_state() + up_y, up_z = state['up_y'], state['up_z'] + roll_deg = np.degrees(np.arccos(np.clip(up_z, -1, 1))) + + # Record altitude at start of knife-edge + alt_start = state['pz'] + + if abs(roll_deg - 90) > 15: + print(f"knife_edge: [SKIP] Failed to roll to 90° (got {roll_deg:.0f}°)") + return + + # --- Phase 2: Knife-edge with top rudder --- + # Right wing is down (up_y < 0 means rolled right) + # Negative rudder = yaw LEFT in body frame + # In knife-edge, body-left is world-up, so this tries to pitch nose up + + alts = [] + vzs = [] + + for step in range(150): # 3 seconds at 50Hz + state = env.get_state() + alt = state['pz'] + vz = state['vz'] + alts.append(alt) + vzs.append(vz) + + # Full throttle, no elevator, no aileron (hold knife-edge), TOP RUDDER + # Negative rudder = yaw LEFT in body frame + # In knife-edge (rolled 90° right), body-left is world-up + # So this SHOULD help keep nose up... if rudder created sideforce + action = np.array([[1.0, 0.0, 0.0, -1.0, 0.0]], dtype=np.float32) + _, _, term, _, _ = env.step(action) + if term[0]: + break + + alt_end = alts[-1] if alts else alt_start + alt_loss = alt_start - alt_end + avg_vz = np.mean(vzs) if vzs else 0 + time_elapsed = len(alts) * 0.02 # seconds + + # Calculate sink rate + sink_rate = alt_loss / time_elapsed if time_elapsed > 0 else 0 + + RESULTS['knife_edge_sink'] = sink_rate + RESULTS['knife_edge_alt_loss'] = alt_loss + + # Expected: significant altitude loss + # At 1g downward acceleration: v = g*t = 9.81 * 3 = 29 m/s after 3s + # Distance = 0.5 * g * t^2 = 0.5 * 9.81 * 9 = 44 m (free fall) + # With some lift from thrust vector angle, maybe 20-30m loss + # If plane CAN maintain altitude (loss < 5m), physics is WRONG + + is_realistic = alt_loss > 10 # Should lose at least 10m in 3 seconds + status = "OK" if is_realistic else "FAIL - physics allows impossible knife-edge!" + + print(f"knife_edge: sink={sink_rate:5.1f} m/s, alt_lost={alt_loss:.0f}m in {time_elapsed:.1f}s [{status}]") + + if not is_realistic: + print(f" WARNING: P-51D should NOT maintain altitude in knife-edge!") + print(f" Wings are vertical = no lift. Rudder only creates yaw, not sideforce.") + print(f" Consider: Is thrust somehow pointing upward? Is there phantom lift?") + + +def test_mode_weights(): + """ + Test that mode_weights actually biases autopilot randomization. + + Sets 100% weight on AP_LEVEL, triggers multiple resets, + verifies that selected mode is always AP_LEVEL. + """ + env = Dogfight(num_envs=1, render_mode=get_render_mode(), render_fps=get_render_fps()) + env.reset() + + # Set AP_RANDOM mode and bias 100% toward LEVEL + env.set_autopilot(env_idx=0, mode=AutopilotMode.RANDOM) + env.set_mode_weights(level=1.0, turn_left=0.0, turn_right=0.0, climb=0.0, descend=0.0) + + # Trigger multiple resets and check mode each time + level_count = 0 + num_trials = 50 + + for _ in range(num_trials): + env.reset() + mode = env.get_autopilot_mode(env_idx=0) + if mode == AutopilotMode.LEVEL: + level_count += 1 + + pct = 100 * level_count / num_trials + RESULTS['mode_weights'] = pct + + # With 100% weight on LEVEL, should always get LEVEL + status = "OK" if pct == 100 else "CHECK" + print(f"mode_weights: {pct:5.1f}% (should be 100% AP_LEVEL) [{status}]") + + # Also test distribution with mixed weights + env.set_autopilot(env_idx=0, mode=AutopilotMode.RANDOM) # Re-enable randomization + env.set_mode_weights(level=0.5, turn_left=0.25, turn_right=0.25, climb=0.0, descend=0.0) + + counts = {1: 0, 2: 0, 3: 0, 4: 0, 5: 0} # LEVEL, TURN_L, TURN_R, CLIMB, DESCEND + num_trials = 200 + + for _ in range(num_trials): + env.reset() + mode = env.get_autopilot_mode(env_idx=0) + if mode in counts: + counts[mode] += 1 + + # Check that LEVEL is most common (~50%) and CLIMB/DESCEND are rare (~0%) + level_pct = 100 * counts[1] / num_trials + climb_pct = 100 * counts[4] / num_trials + distribution_ok = level_pct > 35 and climb_pct < 10 + status2 = "OK" if distribution_ok else "CHECK" + print(f" distribution: LEVEL={level_pct:.0f}%, TURN_L={100*counts[2]/num_trials:.0f}%, TURN_R={100*counts[3]/num_trials:.0f}%, CLIMB={climb_pct:.0f}% [{status2}]") + + +# ============================================================================= +# G-FORCE TESTS - Validate G-loading physics +# ============================================================================= + +def test_g_level_flight(): + """ + Level flight at cruise speed - verify G ≈ 1.0. + In steady level flight, lift equals weight, so G-loading should be ~1.0. + """ + env = Dogfight(num_envs=1, render_mode=get_render_mode(), render_fps=get_render_fps()) + env.reset() + + # Start at cruise speed, level + env.force_state( + player_pos=(0, 0, 1000), + player_vel=(120, 0, 0), + player_throttle=0.5, + ) + + g_values = [] + for step in range(200): # 4 seconds + elevator = level_flight_pitch_from_state(env) + action = np.array([[0.0, elevator, 0.0, 0.0, 0.0]], dtype=np.float32) + env.step(action) + + g = env.get_state()['g_force'] + g_values.append(g) + + if step % 25 == 0: + print(f" step {step:3d}: G = {g:.2f}") + + avg_g = np.mean(g_values[-100:]) # Last 2 seconds + RESULTS['g_level'] = avg_g + + status = "OK" if 0.8 < avg_g < 1.2 else "CHECK" + print(f"g_level: {avg_g:.2f} G (target: ~1.0) [{status}]") + + +def test_g_push_forward(): + """ + Push elevator forward - verify G decreases toward 0 and negative. + Reset to level flight for each test to avoid looping artifacts. + """ + env = Dogfight(num_envs=1, render_mode=get_render_mode(), render_fps=get_render_fps()) + + print(" Pushing forward (positive elevator = nose down):") + min_g = float('inf') + + for elev in [0.0, 0.25, 0.5, 0.75, 1.0]: + # Reset to level flight for each elevator setting + env.reset() + env.force_state( + player_pos=(0, 0, 1500), + player_vel=(150, 0, 0), + player_throttle=1.0, + ) + + # Run for 10 steps (0.2 sec) and track min G + test_min_g = float('inf') + for _ in range(10): + action = np.array([[1.0, elev, 0.0, 0.0, 0.0]], dtype=np.float32) + env.step(action) + g = env.get_state()['g_force'] + test_min_g = min(test_min_g, g) + + min_g = min(min_g, test_min_g) + print(f" elevator={elev:+.2f}: min G = {test_min_g:+.2f}") + + RESULTS['g_push'] = min_g + + # Full push should give low/negative G + status = "OK" if min_g < 0.5 else "CHECK" + print(f"g_push: {min_g:+.2f} G (push should give < 0.5G) [{status}]") + + +def test_g_pull_back(): + """ + Pull elevator back - verify G increases above 1.0. + Reset to level flight for each test to avoid looping artifacts. + """ + env = Dogfight(num_envs=1, render_mode=get_render_mode(), render_fps=get_render_fps()) + + print(" Pulling back (negative elevator = nose up):") + max_g = float('-inf') + + for elev in [0.0, -0.25, -0.5, -0.75, -1.0]: + # Reset to level flight for each elevator setting + env.reset() + env.force_state( + player_pos=(0, 0, 1500), + player_vel=(150, 0, 0), # Higher speed for more G capability + player_throttle=1.0, + ) + + # Run for 10 steps (0.2 sec) and track max G + test_max_g = float('-inf') + for _ in range(10): + action = np.array([[1.0, elev, 0.0, 0.0, 0.0]], dtype=np.float32) + env.step(action) + g = env.get_state()['g_force'] + test_max_g = max(test_max_g, g) + + max_g = max(max_g, test_max_g) + print(f" elevator={elev:+.2f}: max G = {test_max_g:+.2f}") + + RESULTS['g_pull'] = max_g + + # Full pull should give high G (at 150 m/s, should hit ~5-6G) + status = "OK" if max_g > 4.0 else "CHECK" + print(f"g_pull: {max_g:+.2f} G (pull should give > 4.0G) [{status}]") + + +def test_g_limit_negative(): + """ + Full forward stick - verify G never goes below -1.5G (G_LIMIT_NEG). + Physics should clamp acceleration to prevent exceeding this limit. + """ + env = Dogfight(num_envs=1, render_mode=get_render_mode(), render_fps=get_render_fps()) + env.reset() + + # Start at high speed for maximum control authority + env.force_state( + player_pos=(0, 0, 2000), + player_vel=(150, 0, 0), + player_throttle=1.0, + ) + + g_min = float('inf') + for step in range(150): # 3 seconds of full push + action = np.array([[1.0, 1.0, 0.0, 0.0, 0.0]], dtype=np.float32) # Full forward + env.step(action) + + g = env.get_state()['g_force'] + g_min = min(g_min, g) + + if step % 25 == 0: + print(f" step {step:3d}: G = {g:+.2f} (min so far: {g_min:+.2f})") + + RESULTS['g_min'] = g_min + + # Should never go below -1.5G (with small tolerance) + G_LIMIT_NEG = -1.5 + status = "OK" if g_min >= G_LIMIT_NEG - 0.1 else "FAIL" + print(f"g_limit_neg: {g_min:+.2f} G (limit: {G_LIMIT_NEG}G) [{status}]") + assert g_min >= G_LIMIT_NEG - 0.1, f"G went below limit: {g_min} < {G_LIMIT_NEG}" + + +def test_g_limit_positive(): + """ + Full back stick - verify G never exceeds 6G (G_LIMIT_POS). + Physics should clamp acceleration to prevent exceeding this limit. + """ + env = Dogfight(num_envs=1, render_mode=get_render_mode(), render_fps=get_render_fps()) + env.reset() + + # Start at high speed for maximum G capability + env.force_state( + player_pos=(0, 0, 2000), + player_vel=(180, 0, 0), # Very fast + player_throttle=1.0, + ) + + g_max = float('-inf') + for step in range(150): # 3 seconds of full pull + action = np.array([[1.0, -1.0, 0.0, 0.0, 0.0]], dtype=np.float32) # Full pull + env.step(action) + + g = env.get_state()['g_force'] + g_max = max(g_max, g) + + if step % 25 == 0: + print(f" step {step:3d}: G = {g:+.2f} (max so far: {g_max:+.2f})") + + RESULTS['g_max'] = g_max + + # Should never exceed 6G (with small tolerance) + G_LIMIT_POS = 6.0 + status = "OK" if g_max <= G_LIMIT_POS + 0.1 else "FAIL" + print(f"g_limit_pos: {g_max:+.2f} G (limit: {G_LIMIT_POS}G) [{status}]") + assert g_max <= G_LIMIT_POS + 0.1, f"G exceeded limit: {g_max} > {G_LIMIT_POS}" + + +def test_gentle_pitch_control(): + """ + Test that small elevator inputs produce proportional, gentle pitch changes. + + This is CRITICAL for fine aim adjustments - the agent must be able to make + precise 2.5° corrections, not just bang-bang full deflection. + + Tests: + 1. -0.1 elevator: should give small pitch rate (~5°/s or less) + 2. -0.25 elevator: should give larger pitch rate (~10-15°/s) + 3. Verify linear relationship (not bang-bang) + 4. Calculate time to make 2.5° adjustment + """ + env = Dogfight(num_envs=1, render_mode=get_render_mode(), render_fps=get_render_fps()) + + elevator_values = [-0.05, -0.1, -0.15, -0.2, -0.25, -0.3] + pitch_rates = [] + + print(" Testing gentle elevator inputs (negative = pull back = nose UP):") + + for elev in elevator_values: + env.reset() + + # Start level at cruise speed + env.force_state( + player_pos=(0, 0, 1500), + player_vel=(120, 0, 0), # Cruise speed + player_ori=(1.0, 0.0, 0.0, 0.0), # Wings level + player_throttle=0.7, + ) + + # Record initial pitch + state = env.get_state() + fwd_x_start, fwd_z_start = state['fwd_x'], state['fwd_z'] + pitch_start = np.arctan2(fwd_z_start, fwd_x_start) + + # Apply constant elevator for 1 second (50 steps) + for step in range(50): + action = np.array([[0.4, elev, 0.0, 0.0, 0.0]], dtype=np.float32) + env.step(action) + + # Measure final pitch + state = env.get_state() + fwd_x_end, fwd_z_end = state['fwd_x'], state['fwd_z'] + pitch_end = np.arctan2(fwd_z_end, fwd_x_end) + + pitch_change_deg = np.degrees(pitch_end - pitch_start) + pitch_rate = pitch_change_deg / 1.0 # degrees per second + pitch_rates.append(pitch_rate) + + print(f" elevator={elev:+.2f}: pitch_rate={pitch_rate:+.1f}°/s, pitch_change={pitch_change_deg:+.1f}°") + + # Check for proportional response + # Ratio of pitch rates should roughly match ratio of elevator inputs + rate_at_01 = pitch_rates[1] # -0.1 elevator + rate_at_025 = pitch_rates[4] # -0.25 elevator + + # Store results + RESULTS['pitch_rate_01'] = rate_at_01 + RESULTS['pitch_rate_025'] = rate_at_025 + + # Calculate time to make 2.5° adjustment at -0.1 elevator + if abs(rate_at_01) > 0.1: + time_for_25deg = 2.5 / abs(rate_at_01) + else: + time_for_25deg = float('inf') + + RESULTS['time_for_25deg'] = time_for_25deg + + # Check proportionality: -0.25 should give ~2.5x the rate of -0.1 + expected_ratio = 2.5 + actual_ratio = rate_at_025 / rate_at_01 if abs(rate_at_01) > 0.1 else 0 + + # Verify reasonable pitch rates (not too fast, not too slow) + # -0.1 elevator should give roughly 3-8°/s (gentle but noticeable) + gentle_ok = 2.0 < abs(rate_at_01) < 15.0 + proportional_ok = 1.5 < actual_ratio < 4.0 # Some non-linearity is OK + can_aim = time_for_25deg < 2.0 # Should be able to make 2.5° adjustment in <2 seconds + + all_ok = gentle_ok and proportional_ok and can_aim + status = "OK" if all_ok else "CHECK" + + print(f" Results:") + print(f" -0.1 elevator gives {rate_at_01:+.1f}°/s (want 3-8°/s) [{gentle_ok and 'OK' or 'CHECK'}]") + print(f" -0.25/-0.1 ratio = {actual_ratio:.2f} (want ~2.5, linear) [{proportional_ok and 'OK' or 'CHECK'}]") + print(f" Time to adjust 2.5° at -0.1: {time_for_25deg:.2f}s (want <2s) [{can_aim and 'OK' or 'CHECK'}]") + print(f"gentle_pitch: rate@-0.1={rate_at_01:+.1f}°/s, 2.5°_time={time_for_25deg:.2f}s [{status}]") + + if not gentle_ok: + if abs(rate_at_01) < 2.0: + print(f" WARNING: Pitch too sluggish! Agent can't make timely aim corrections.") + else: + print(f" WARNING: Pitch too sensitive! Agent will overshoot aim.") + + if not proportional_ok: + print(f" WARNING: Non-linear pitch response - may indicate bang-bang controls.") + + +# Test registry for this module +TESTS = { + 'max_speed': test_max_speed, + 'acceleration': test_acceleration, + 'deceleration': test_deceleration, + 'cruise_speed': test_cruise_speed, + 'stall_speed': test_stall_speed, + 'climb_rate': test_climb_rate, + 'glide_ratio': test_glide_ratio, + 'sustained_turn': test_sustained_turn, + 'turn_60': test_turn_60, + 'pitch_direction': test_pitch_direction, + 'roll_direction': test_roll_direction, + 'rudder_only_turn': test_rudder_only_turn, + 'knife_edge_pull': test_knife_edge_pull, + 'knife_edge_flight': test_knife_edge_flight, + 'mode_weights': test_mode_weights, + # G-force tests + 'g_level_flight': test_g_level_flight, + 'g_push_forward': test_g_push_forward, + 'g_pull_back': test_g_pull_back, + 'g_limit_negative': test_g_limit_negative, + 'g_limit_positive': test_g_limit_positive, + # Fine control tests + 'gentle_pitch': test_gentle_pitch_control, +} + + +if __name__ == "__main__": + from test_flight_base import get_args + args = get_args() + + print("P-51D Physics Validation Tests") + print("=" * 60) + + if args.test: + if args.test in TESTS: + print(f"Running single test: {args.test}") + if get_render_mode(): + print("Rendering enabled - press ESC to exit") + print("=" * 60) + TESTS[args.test]() + else: + print(f"Unknown test: {args.test}") + print(f"Available tests: {', '.join(TESTS.keys())}") + else: + print("Running all physics tests") + if get_render_mode(): + print("Rendering enabled - press ESC to exit") + print("=" * 60) + for test_func in TESTS.values(): + test_func() diff --git a/pufferlib/ocean/environment.py b/pufferlib/ocean/environment.py index 93df76506..70cd2d988 100644 --- a/pufferlib/ocean/environment.py +++ b/pufferlib/ocean/environment.py @@ -122,6 +122,7 @@ def make_multiagent(buf=None, **kwargs): 'blastar': 'Blastar', 'convert': 'Convert', 'convert_circle': 'ConvertCircle', + 'dogfight': 'Dogfight', 'pong': 'Pong', 'freeway': 'Freeway', 'enduro': 'Enduro', diff --git a/pufferlib/ocean/torch.py b/pufferlib/ocean/torch.py index 8cf4ffe7d..5d9d0e4d9 100644 --- a/pufferlib/ocean/torch.py +++ b/pufferlib/ocean/torch.py @@ -299,7 +299,7 @@ def decode_actions(self, flat_hidden, state=None): value = self.value_fn(flat_hidden) if self.is_continuous: mean = self.decoder_mean(flat_hidden) - logstd = self.decoder_logstd.expand_as(mean) + logstd = self.decoder_logstd.expand_as(mean).clamp(min=-20, max=2) std = torch.exp(logstd) probs = torch.distributions.Normal(mean, std) batch = flat_hidden.shape[0] @@ -433,7 +433,7 @@ def decode_actions(self, flat_hidden): value = self.value_fn(flat_hidden) if self.is_continuous: mean = self.decoder_mean(flat_hidden) - logstd = self.decoder_logstd.expand_as(mean) + logstd = self.decoder_logstd.expand_as(mean).clamp(min=-20, max=2) std = torch.exp(logstd) probs = torch.distributions.Normal(mean, std) batch = flat_hidden.shape[0] @@ -893,7 +893,7 @@ def decode_actions(self, hidden): logits = self.decoder(hidden).split(self.action_nvec, dim=1) elif self.is_continuous: mean = self.decoder_mean(hidden) - logstd = self.decoder_logstd.expand_as(mean) + logstd = self.decoder_logstd.expand_as(mean).clamp(min=-20, max=2) std = torch.exp(logstd) logits = torch.distributions.Normal(mean, std) else: diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index 3972e722f..16facb7bd 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -939,6 +939,14 @@ def train(env_name, args=None, vecenv=None, policy=None, logger=None, should_sto logger = WandbLogger(args) train_config = { **args['train'], 'env': env_name } + + # Print training hyperparameters for debugging + print(f"=== TRAIN CONFIG ===") + print(f" clip_coef={train_config.get('clip_coef', 'N/A'):.4f}, gae_lambda={train_config.get('gae_lambda', 'N/A'):.4f}") + print(f" learning_rate={train_config.get('learning_rate', 'N/A'):.6f}, max_grad_norm={train_config.get('max_grad_norm', 'N/A'):.4f}") + print(f" gamma={train_config.get('gamma', 'N/A'):.6f}, ent_coef={train_config.get('ent_coef', 'N/A'):.6f}") + print(f" adam_eps={train_config.get('adam_eps', 'N/A'):.2e}") + pufferl = PuffeRL(train_config, vecenv, policy, logger) all_logs = [] @@ -1041,6 +1049,11 @@ def sweep(args=None, env_name=None): raise pufferlib.APIUsageError('Sweeps require either wandb or neptune') method = args['sweep'].pop('method') + + project = args.get('wandb_project', args.get('neptune_project', 'sweep')) + args['sweep'].setdefault('state_file', f'{project}_sweep.json') + args['sweep'].setdefault('override_file', f'{project}_override.json') + try: sweep_cls = getattr(pufferlib.sweep, method) except: diff --git a/pufferlib/sweep.py b/pufferlib/sweep.py index 41401b6f6..b1ab2bfbd 100644 --- a/pufferlib/sweep.py +++ b/pufferlib/sweep.py @@ -1,6 +1,8 @@ import random import math import warnings +import os +import json from copy import deepcopy from contextlib import contextmanager @@ -129,7 +131,8 @@ def unnormalize(self, value): def _params_from_puffer_sweep(sweep_config): param_spaces = {} for name, param in sweep_config.items(): - if name in ('method', 'metric', 'goal', 'downsample', 'use_gpu', 'prune_pareto'): + if name in ('method', 'metric', 'goal', 'downsample', 'use_gpu', 'prune_pareto', + 'state_file', 'override_file'): continue assert isinstance(param, dict) @@ -467,6 +470,9 @@ def __init__(self, self.gp_max_obs = gp_max_obs # train time bumps after 800? self.infer_batch_size = infer_batch_size + self.state_file = sweep_config.get('state_file', 'sweep_state.json') + self.override_file = sweep_config.get('override_file', 'override.json') + # Use 64 bit for GP regression with default_tensor_dtype(torch.float64): # Params taken from HEBO: https://arxiv.org/abs/2012.03826 @@ -493,6 +499,110 @@ def __init__(self, self.gp_cost_buffer = torch.empty(self.gp_max_obs, device=self.device) self.infer_batch_buffer = torch.empty(self.infer_batch_size, self.hyperparameters.num, device=self.device) + self._load_state_if_exists() + + @staticmethod + def _json_default(obj): + """JSON serializer for numpy types.""" + if isinstance(obj, np.ndarray): + return obj.tolist() + if isinstance(obj, (np.floating, np.integer)): + return obj.item() + raise TypeError(f'Not JSON serializable: {type(obj)}') + + def _save_state(self): + """Save sweep state to JSON for crash recovery.""" + state = { + 'suggestion_idx': self.suggestion_idx, + 'success_observations': self.success_observations, + 'failure_observations': self.failure_observations, + 'min_score': self.min_score if self.min_score != math.inf else None, + 'max_score': self.max_score if self.max_score != -math.inf else None, + 'log_c_min': self.log_c_min if self.log_c_min != math.inf else None, + 'log_c_max': self.log_c_max if self.log_c_max != -math.inf else None, + } + tmp = f'{self.state_file}.tmp' + try: + with open(tmp, 'w') as f: + json.dump(state, f, indent=2, default=self._json_default) + os.replace(tmp, self.state_file) + except OSError as e: + print(f'[Protein] Failed to save state: {e}') + if os.path.exists(tmp): + os.remove(tmp) + + def _load_state_if_exists(self): + """Load state from previous run if exists (crash recovery).""" + tmp = f'{self.state_file}.tmp' + if os.path.exists(tmp): + os.remove(tmp) + if not os.path.exists(self.state_file): + return + try: + with open(self.state_file) as f: + state = json.load(f) + self.suggestion_idx = state.get('suggestion_idx', 0) + self.success_observations = state.get('success_observations', []) + self.failure_observations = state.get('failure_observations', []) + if state.get('min_score') is not None: + self.min_score = state['min_score'] + if state.get('max_score') is not None: + self.max_score = state['max_score'] + if state.get('log_c_min') is not None: + self.log_c_min = state['log_c_min'] + if state.get('log_c_max') is not None: + self.log_c_max = state['log_c_max'] + for obs in self.success_observations + self.failure_observations: + if isinstance(obs['input'], list): + obs['input'] = np.array(obs['input']) + print(f'[Protein] Resumed from {self.state_file}: {len(self.success_observations)} obs, idx={self.suggestion_idx}') + except (json.JSONDecodeError, KeyError, FileNotFoundError, OSError) as e: + print(f'[Protein] Failed to load state: {e}') + + def _check_override(self): + """Check for user/agent override hyperparams. Returns params dict or None.""" + if not os.path.exists(self.override_file): + return None + tmp = f'{self.override_file}.tmp' + try: + with open(self.override_file) as f: + data = json.load(f) + if 'suggestions' not in data or not data['suggestions']: + os.remove(self.override_file) + return None + suggestion = data['suggestions'].pop(0) + if data['suggestions']: + with open(tmp, 'w') as f: + json.dump(data, f, indent=2) + os.replace(tmp, self.override_file) + else: + os.remove(self.override_file) + reason = suggestion.get('reason', 'No reason provided') + print(f'[Protein] OVERRIDE: {reason}') + return suggestion.get('params', suggestion) + except (json.JSONDecodeError, KeyError) as e: + print(f'[Protein] Invalid override file: {e}') + if os.path.exists(self.override_file): + os.remove(self.override_file) + return None + except OSError as e: + print(f'[Protein] Failed to update override file: {e}') + if os.path.exists(tmp): + os.remove(tmp) + return None + + def _apply_params_to_fill(self, fill, params): + """Apply param dict to fill in place. Modifies fill directly.""" + for key, value in pufferlib.unroll_nested_dict(params): + parts = key.split('/') + try: + target = fill + for part in parts[:-1]: + target = target[part] + target[parts[-1]] = value + except KeyError: + print(f'[Protein] Override key not found: {key}') + def _filter_near_duplicates(self, inputs, duplicate_threshold=EPSILON): if len(inputs) < 2: return np.arange(len(inputs)) @@ -574,6 +684,13 @@ def _train_gp_models(self): def suggest(self, fill): info = {} self.suggestion_idx += 1 + + override = self._check_override() + if override: + self._apply_params_to_fill(fill, override) + info['override'] = True + return fill, info + if len(self.success_observations) == 0 and self.seed_with_search_center: suggestion = self.hyperparameters.search_centers return self.hyperparameters.to_dict(suggestion, fill), info @@ -697,6 +814,7 @@ def observe(self, hypers, score, cost, is_failure=False): if is_failure or not np.isfinite(score) or np.isnan(score): new_observation['is_failure'] = True self.failure_observations.append(new_observation) + self._save_state() return if self.success_observations: @@ -705,6 +823,7 @@ def observe(self, hypers, score, cost, is_failure=False): same = np.where(dist < EPSILON)[0] if len(same) > 0: self.success_observations[same[0]] = new_observation + self._save_state() return # Ignore obs that are below the minimum cost @@ -712,3 +831,83 @@ def observe(self, hypers, score, cost, is_failure=False): return self.success_observations.append(new_observation) + self._save_state() + + +def read_sweep_results(state_file, sweep_config, sort_by='score'): + """ + Load sweep results as user/agent-readable dicts. + + Args: + state_file: Path to {project}_sweep.json + sweep_config: The 'sweep' section from load_config() + sort_by: 'score' (descending), 'cost' (ascending), or None + + Returns: + List of dicts: [{'params': {...}, 'score': float, 'cost': float}, ...] + """ + with open(state_file) as f: + state = json.load(f) + + hyperparams = Hyperparameters(sweep_config, verbose=False) + + results = [] + for obs in state.get('success_observations', []): + input_vec = np.array(obs['input']) + + if len(input_vec) != hyperparams.num: + raise ValueError( + f"State file has {len(input_vec)} dimensions but config has {hyperparams.num}. " + f"Config may have changed since sweep started." + ) + + params = hyperparams.to_dict(input_vec) + flat_params = dict(pufferlib.unroll_nested_dict(params)) + + results.append({ + 'params': flat_params, + 'score': obs['output'], + 'cost': obs['cost'], + }) + + if sort_by == 'score': + results.sort(key=lambda x: x['score'], reverse=True) + elif sort_by == 'cost': + results.sort(key=lambda x: x['cost']) + + return results + + +def create_override(override_file, suggestions, reason=None): + """ + Inject hyperparams into next sweep run. Use real values, not normalized. + + Args: + override_file: Path to write + suggestions: List of dicts, e.g. [{'train/learning_rate': 0.001}] + reason: List of strings (same length as suggestions), or None + """ + if reason is None: + reason = [None] * len(suggestions) + if len(reason) != len(suggestions): + raise ValueError(f"Got {len(suggestions)} suggestions but {len(reason)} reasons") + + data = { + 'suggestions': [ + { + 'params': s, + 'reason': r or 'Programmatic override' + } + for s, r in zip(suggestions, reason) + ] + } + + tmp = f'{override_file}.tmp' + try: + with open(tmp, 'w') as f: + json.dump(data, f, indent=2) + os.replace(tmp, override_file) + except OSError: + if os.path.exists(tmp): + os.remove(tmp) + raise diff --git a/setup.py b/setup.py index 552cb00e8..9fd3bf9fc 100644 --- a/setup.py +++ b/setup.py @@ -189,14 +189,15 @@ def run(self): # Find C extensions c_extensions = [] if not NO_OCEAN: - c_extension_paths = glob.glob('pufferlib/ocean/**/binding.c', recursive=True) + #c_extension_paths = glob.glob('pufferlib/ocean/**/binding.c', recursive=True) + c_extension_paths = ['pufferlib/ocean/dogfight/binding.c'] c_extensions = [ Extension( path.rstrip('.c').replace('/', '.'), sources=[path], **extension_kwargs, ) - for path in c_extension_paths if 'matsci' not in path + for path in c_extension_paths# if 'matsci' not in path ] c_extension_paths = [os.path.join(*path.split('/')[:-1]) for path in c_extension_paths] diff --git a/tests/test_sweep_persistence_and_override.py b/tests/test_sweep_persistence_and_override.py new file mode 100644 index 000000000..ca1c61ef1 --- /dev/null +++ b/tests/test_sweep_persistence_and_override.py @@ -0,0 +1,816 @@ +"""Tests for Protein sweep persistence and override. +Run: python tests/test_sweep_persistence_and_override.py + +Tests cover: +- State persistence (save/load, crash recovery, atomic writes) +- Override injection (single/multiple, partial params, consumption) +- Analysis helpers (read_sweep_results, create_override) +""" +import os +import json +import tempfile +import numpy as np + + +def _minimal_sweep_config(): + """Minimal config for testing.""" + return { + 'method': 'Protein', + 'metric': 'score', + 'goal': 'maximize', + 'downsample': 1, + 'train': { + 'learning_rate': { + 'distribution': 'log_normal', + 'min': 0.0001, 'max': 0.01, 'mean': 0.001, 'scale': 0.5 + }, + 'total_timesteps': { + 'distribution': 'log_normal', + 'min': 1e7, 'max': 1e9, 'mean': 1e8, 'scale': 'time' + }, + } + } + + +# ============================================================================= +# Persistence Tests +# ============================================================================= + +def test_json_default(): + from pufferlib.sweep import Protein + arr = np.array([1.0, 2.0]) + assert Protein._json_default(arr) == [1.0, 2.0] + assert Protein._json_default(np.float64(1.5)) == 1.5 + print("PASS test_json_default") + + +def test_save_and_load_state(): + from pufferlib.sweep import Protein + with tempfile.TemporaryDirectory() as tmpdir: + cfg = _minimal_sweep_config() + cfg['state_file'] = os.path.join(tmpdir, "test.json") + cfg['override_file'] = os.path.join(tmpdir, "int.json") + + p1 = Protein(cfg, use_gpu=False) + p1.success_observations = [{'input': np.array([0.1, 0.2]), 'output': 0.8, 'cost': 100}] + p1.suggestion_idx = 5 + p1._save_state() + + cfg2 = _minimal_sweep_config() + cfg2['state_file'] = os.path.join(tmpdir, "test.json") + cfg2['override_file'] = os.path.join(tmpdir, "int2.json") + p2 = Protein(cfg2, use_gpu=False) + assert p2.suggestion_idx == 5 + assert len(p2.success_observations) == 1 + print("PASS test_save_and_load_state") + + +def test_override(): + from pufferlib.sweep import Protein + with tempfile.TemporaryDirectory() as tmpdir: + cfg = _minimal_sweep_config() + cfg['override_file'] = os.path.join(tmpdir, "int.json") + cfg['state_file'] = os.path.join(tmpdir, "state.json") + + # Create override with 2 suggestions + with open(cfg['override_file'], 'w') as f: + json.dump({'suggestions': [ + {'params': {'train/learning_rate': 0.005}, 'reason': 'test1'}, + {'params': {'train/learning_rate': 0.006}, 'reason': 'test2'}, + ]}, f) + + p = Protein(cfg, use_gpu=False) + + # First call consumes first suggestion + result = p._check_override() + assert result == {'train/learning_rate': 0.005} + assert os.path.exists(cfg['override_file']) # still has one left + + # Second call consumes second and deletes file + result = p._check_override() + assert result == {'train/learning_rate': 0.006} + assert not os.path.exists(cfg['override_file']) # consumed + print("PASS test_override") + + +def test_override_in_suggest(): + """Test that override params are used in suggest().""" + from pufferlib.sweep import Protein + with tempfile.TemporaryDirectory() as tmpdir: + cfg = _minimal_sweep_config() + cfg['override_file'] = os.path.join(tmpdir, "int.json") + cfg['state_file'] = os.path.join(tmpdir, "state.json") + + # Create override + with open(cfg['override_file'], 'w') as f: + json.dump({'suggestions': [ + {'params': {'train/learning_rate': 0.005, 'train/total_timesteps': 5e7}, 'reason': 'test'}, + ]}, f) + + p = Protein(cfg, use_gpu=False) + fill = {'train': {'learning_rate': 0.001, 'total_timesteps': 1e8}} + result, info = p.suggest(fill) + + assert info.get('override') is True + assert abs(result['train']['learning_rate'] - 0.005) < 1e-6 + assert not os.path.exists(cfg['override_file']) # consumed + print("PASS test_override_in_suggest") + + +def test_atomic_write(): + """Test that _save_state uses atomic write.""" + from pufferlib.sweep import Protein + with tempfile.TemporaryDirectory() as tmpdir: + cfg = _minimal_sweep_config() + cfg['state_file'] = os.path.join(tmpdir, "test.json") + cfg['override_file'] = os.path.join(tmpdir, "int.json") + + p = Protein(cfg, use_gpu=False) + p.success_observations = [{'input': np.array([0.1, 0.2]), 'output': 0.8, 'cost': 100}] + p._save_state() + + # Verify file exists and is valid JSON + assert os.path.exists(cfg['state_file']) + with open(cfg['state_file']) as f: + state = json.load(f) + assert len(state['success_observations']) == 1 + print("PASS test_atomic_write") + + +def test_partial_override(): + """Test that override with only some params merges with fill dict.""" + from pufferlib.sweep import Protein + with tempfile.TemporaryDirectory() as tmpdir: + cfg = _minimal_sweep_config() + cfg['override_file'] = os.path.join(tmpdir, "int.json") + cfg['state_file'] = os.path.join(tmpdir, "state.json") + + # Create override with only learning_rate (not total_timesteps) + with open(cfg['override_file'], 'w') as f: + json.dump({'suggestions': [ + {'params': {'train/learning_rate': 0.0069}, 'reason': 'partial test'}, + ]}, f) + + p = Protein(cfg, use_gpu=False) + # Fill has both params with default values + fill = {'train': {'learning_rate': 0.001, 'total_timesteps': 1e8, 'extra_param': 42}} + result, info = p.suggest(fill) + + assert info.get('override') is True + # Override param should be overwritten + assert abs(result['train']['learning_rate'] - 0.0069) < 1e-9 + # Non-override param should be preserved from fill + assert result['train']['total_timesteps'] == 1e8 + assert result['train']['extra_param'] == 42 + # CRITICAL: fill must be modified in place (pufferl.py expects this) + assert fill is result, "suggest() must modify fill in place, not return a copy" + assert abs(fill['train']['learning_rate'] - 0.0069) < 1e-9 + print("PASS test_partial_override") + + +def test_observe_saves_state(): + """Test that observe() automatically saves state.""" + from pufferlib.sweep import Protein + with tempfile.TemporaryDirectory() as tmpdir: + cfg = _minimal_sweep_config() + cfg['state_file'] = os.path.join(tmpdir, "test.json") + cfg['override_file'] = os.path.join(tmpdir, "int.json") + + p = Protein(cfg, use_gpu=False) + fill = {'train': {'learning_rate': 0.001, 'total_timesteps': 1e8}} + + # Get first suggestion + result, _ = p.suggest(fill) + + # Observe result + p.observe(result, score=0.75, cost=100) + + # Check state was saved + assert os.path.exists(cfg['state_file']) + with open(cfg['state_file']) as f: + state = json.load(f) + assert len(state['success_observations']) == 1 + assert state['success_observations'][0]['output'] == 0.75 + print("PASS test_observe_saves_state") + + +def test_failure_observation(): + """Test that failure observations are recorded.""" + from pufferlib.sweep import Protein + with tempfile.TemporaryDirectory() as tmpdir: + cfg = _minimal_sweep_config() + cfg['state_file'] = os.path.join(tmpdir, "test.json") + cfg['override_file'] = os.path.join(tmpdir, "int.json") + + p = Protein(cfg, use_gpu=False) + fill = {'train': {'learning_rate': 0.001, 'total_timesteps': 1e8}} + result, _ = p.suggest(fill) + + # Observe failure + p.observe(result, score=float('nan'), cost=100) + + with open(cfg['state_file']) as f: + state = json.load(f) + assert len(state['failure_observations']) == 1 + assert state['failure_observations'][0]['is_failure'] is True + print("PASS test_failure_observation") + + +def test_crash_recovery_preserves_bounds(): + """Test that min/max score bounds are preserved across crash recovery.""" + from pufferlib.sweep import Protein + import math + with tempfile.TemporaryDirectory() as tmpdir: + cfg = _minimal_sweep_config() + cfg['state_file'] = os.path.join(tmpdir, "test.json") + cfg['override_file'] = os.path.join(tmpdir, "int.json") + + # First session - add observations + p1 = Protein(cfg, use_gpu=False) + p1.success_observations = [ + {'input': np.array([0.1, 0.2]), 'output': 0.3, 'cost': 50}, + {'input': np.array([0.2, 0.3]), 'output': 0.9, 'cost': 100}, + ] + p1.min_score = 0.3 + p1.max_score = 0.9 + p1.log_c_min = np.log(50) + p1.log_c_max = np.log(100) + p1._save_state() + + # Second session - recover + cfg2 = _minimal_sweep_config() + cfg2['state_file'] = os.path.join(tmpdir, "test.json") + cfg2['override_file'] = os.path.join(tmpdir, "int2.json") + p2 = Protein(cfg2, use_gpu=False) + + assert p2.min_score == 0.3 + assert p2.max_score == 0.9 + assert abs(p2.log_c_min - np.log(50)) < 1e-9 + assert abs(p2.log_c_max - np.log(100)) < 1e-9 + print("PASS test_crash_recovery_preserves_bounds") + + +def test_invalid_override_file(): + """Test that invalid override files are handled gracefully.""" + from pufferlib.sweep import Protein + with tempfile.TemporaryDirectory() as tmpdir: + cfg = _minimal_sweep_config() + cfg['override_file'] = os.path.join(tmpdir, "int.json") + cfg['state_file'] = os.path.join(tmpdir, "state.json") + + # Create invalid JSON + with open(cfg['override_file'], 'w') as f: + f.write("not valid json {{{") + + p = Protein(cfg, use_gpu=False) + result = p._check_override() + + # Should return None and delete the invalid file + assert result is None + assert not os.path.exists(cfg['override_file']) + print("PASS test_invalid_override_file") + + +def test_sweep_continues_after_crash(): + """Test that a sweep can be stopped and resumed from where it left off. + + Simulates: run 2 iterations -> crash -> resume -> run 2 more iterations + Verifies: suggestion_idx continues, observations accumulate, no duplicates, + AND that loaded observations are numpy arrays with correct values. + """ + from pufferlib.sweep import Protein + with tempfile.TemporaryDirectory() as tmpdir: + state_file = os.path.join(tmpdir, "sweep.json") + int_file = os.path.join(tmpdir, "int.json") + + # --- Session 1: Run 2 iterations then "crash" --- + cfg1 = _minimal_sweep_config() + cfg1['state_file'] = state_file + cfg1['override_file'] = int_file + + p1 = Protein(cfg1, use_gpu=False) + fill = {'train': {'learning_rate': 0.001, 'total_timesteps': 1e8}} + + # Iteration 1 + result1, _ = p1.suggest(fill.copy()) + p1.observe(result1, score=0.5, cost=100) + + # Iteration 2 + result2, _ = p1.suggest(fill.copy()) + p1.observe(result2, score=0.6, cost=150) + + assert p1.suggestion_idx == 2 + assert len(p1.success_observations) == 2 + + # Capture state for deep verification after resume + p1_inputs = [obs['input'].copy() for obs in p1.success_observations] + p1_outputs = [obs['output'] for obs in p1.success_observations] + p1_costs = [obs['cost'] for obs in p1.success_observations] + p1_min_score = p1.min_score + p1_max_score = p1.max_score + p1_log_c_min = p1.log_c_min + p1_log_c_max = p1.log_c_max + + # Verify state file exists + assert os.path.exists(state_file) + + # --- "Crash" - delete p1, create new instance --- + del p1 + + # --- Session 2: Resume and run 2 more iterations --- + cfg2 = _minimal_sweep_config() + cfg2['state_file'] = state_file + cfg2['override_file'] = int_file + + p2 = Protein(cfg2, use_gpu=False) + + # Verify state was recovered + assert p2.suggestion_idx == 2, f"Expected idx=2, got {p2.suggestion_idx}" + assert len(p2.success_observations) == 2, f"Expected 2 obs, got {len(p2.success_observations)}" + + # Deep verification: observations are numpy arrays (not JSON lists) + for i, obs in enumerate(p2.success_observations): + assert isinstance(obs['input'], np.ndarray), \ + f"Obs {i} input should be np.ndarray, got {type(obs['input'])}" + + # Deep verification: observation values match exactly + for i, obs in enumerate(p2.success_observations): + assert np.allclose(obs['input'], p1_inputs[i]), f"Obs {i} input mismatch" + assert obs['output'] == p1_outputs[i], f"Obs {i} output mismatch" + assert obs['cost'] == p1_costs[i], f"Obs {i} cost mismatch" + + # Deep verification: bounds restored correctly (handle inf case) + import math + if math.isinf(p1_min_score): + assert math.isinf(p2.min_score), f"min_score: expected inf, got {p2.min_score}" + else: + assert p2.min_score == p1_min_score, f"min_score mismatch" + if math.isinf(p1_max_score): + assert math.isinf(p2.max_score), f"max_score: expected -inf, got {p2.max_score}" + else: + assert p2.max_score == p1_max_score, f"max_score mismatch" + if math.isinf(p1_log_c_min): + assert math.isinf(p2.log_c_min), f"log_c_min: expected inf, got {p2.log_c_min}" + else: + assert abs(p2.log_c_min - p1_log_c_min) < 1e-9, f"log_c_min mismatch" + if math.isinf(p1_log_c_max): + assert math.isinf(p2.log_c_max), f"log_c_max: expected -inf, got {p2.log_c_max}" + else: + assert abs(p2.log_c_max - p1_log_c_max) < 1e-9, f"log_c_max mismatch" + + # Iteration 3 + result3, _ = p2.suggest(fill.copy()) + p2.observe(result3, score=0.7, cost=200) + + # Iteration 4 + result4, _ = p2.suggest(fill.copy()) + p2.observe(result4, score=0.8, cost=250) + + assert p2.suggestion_idx == 4 + assert len(p2.success_observations) == 4 + + # Verify all scores are present + scores = [obs['output'] for obs in p2.success_observations] + assert 0.5 in scores + assert 0.6 in scores + assert 0.7 in scores + assert 0.8 in scores + + print("PASS test_sweep_continues_after_crash") + + +def test_corrupted_state_file(): + """Truncated/corrupted JSON should reset state, not crash.""" + from pufferlib.sweep import Protein + with tempfile.TemporaryDirectory() as tmpdir: + cfg = _minimal_sweep_config() + cfg['state_file'] = os.path.join(tmpdir, "test.json") + cfg['override_file'] = os.path.join(tmpdir, "int.json") + + # Write truncated JSON (missing closing brace) + with open(cfg['state_file'], 'w') as f: + f.write('{"suggestion_idx": 5') + + # Should not crash - should start fresh + p = Protein(cfg, use_gpu=False) + assert p.suggestion_idx == 0 + assert len(p.success_observations) == 0 + print("PASS test_corrupted_state_file") + + +def test_empty_state_file(): + """Empty JSON object {} should use defaults, not crash.""" + from pufferlib.sweep import Protein + with tempfile.TemporaryDirectory() as tmpdir: + cfg = _minimal_sweep_config() + cfg['state_file'] = os.path.join(tmpdir, "test.json") + cfg['override_file'] = os.path.join(tmpdir, "int.json") + + # Write empty JSON object + with open(cfg['state_file'], 'w') as f: + f.write('{}') + + # Should use defaults + p = Protein(cfg, use_gpu=False) + assert p.suggestion_idx == 0 + assert len(p.success_observations) == 0 + print("PASS test_empty_state_file") + + +def test_state_file_deleted_during_load(): + """FileNotFoundError during load should not crash.""" + from pufferlib.sweep import Protein + from unittest.mock import patch + with tempfile.TemporaryDirectory() as tmpdir: + cfg = _minimal_sweep_config() + cfg['state_file'] = os.path.join(tmpdir, "test.json") + cfg['override_file'] = os.path.join(tmpdir, "int.json") + + # Patch exists() to return True even though file doesn't exist + # This simulates the race condition + original_exists = os.path.exists + def mock_exists(path): + if path == cfg['state_file']: + return True + return original_exists(path) + + with patch('os.path.exists', side_effect=mock_exists): + p = Protein(cfg, use_gpu=False) + + assert p.suggestion_idx == 0 # Started fresh despite "existing" file + print("PASS test_state_file_deleted_during_load") + + +def test_save_state_cleans_up_tmp_on_failure(): + """_save_state() should clean up .tmp file if write fails.""" + from pufferlib.sweep import Protein + from unittest.mock import patch + with tempfile.TemporaryDirectory() as tmpdir: + cfg = _minimal_sweep_config() + cfg['state_file'] = os.path.join(tmpdir, "test.json") + cfg['override_file'] = os.path.join(tmpdir, "int.json") + + p = Protein(cfg, use_gpu=False) + tmp_file = f"{cfg['state_file']}.tmp" + + # Patch os.replace to fail + with patch('os.replace', side_effect=OSError("Simulated disk full")): + p._save_state() + + # .tmp file should NOT exist (cleaned up) + assert not os.path.exists(tmp_file), ".tmp file should be cleaned up on failure" + print("PASS test_save_state_cleans_up_tmp_on_failure") + + +def test_orphaned_tmp_cleaned_on_load(): + """Orphaned .tmp files from previous crash should be cleaned up on load.""" + from pufferlib.sweep import Protein + with tempfile.TemporaryDirectory() as tmpdir: + cfg = _minimal_sweep_config() + cfg['state_file'] = os.path.join(tmpdir, "test.json") + cfg['override_file'] = os.path.join(tmpdir, "int.json") + + tmp_file = f"{cfg['state_file']}.tmp" + + # Create orphaned .tmp file (simulates crash during previous save) + with open(tmp_file, 'w') as f: + f.write('orphaned tmp data') + + assert os.path.exists(tmp_file) + + # Initialize Protein - should clean up orphan + p = Protein(cfg, use_gpu=False) + + assert not os.path.exists(tmp_file), "Orphaned .tmp should be cleaned up" + print("PASS test_orphaned_tmp_cleaned_on_load") + + +def test_override_invalid_path(): + """Override with non-existent nested path should skip, not crash.""" + from pufferlib.sweep import Protein + with tempfile.TemporaryDirectory() as tmpdir: + cfg = _minimal_sweep_config() + cfg['override_file'] = os.path.join(tmpdir, "int.json") + cfg['state_file'] = os.path.join(tmpdir, "state.json") + + # Override has valid keys and invalid path + with open(cfg['override_file'], 'w') as f: + json.dump({'suggestions': [{ + 'params': { + 'train/learning_rate': 0.005, + 'nonexistent/deeply/nested': 0.2, + }, + 'reason': 'test invalid path' + }]}, f) + + p = Protein(cfg, use_gpu=False) + fill = {'train': {'learning_rate': 0.001, 'total_timesteps': 1e8}} + + # Should not crash, should apply valid keys, skip invalid paths + result, info = p.suggest(fill) + + assert info.get('override') is True + assert abs(result['train']['learning_rate'] - 0.005) < 1e-6 + assert 'nonexistent' not in result + print("PASS test_override_invalid_path") + + +def test_override_atomic_write(): + """Override update should use atomic write pattern.""" + from pufferlib.sweep import Protein + from unittest.mock import patch + with tempfile.TemporaryDirectory() as tmpdir: + cfg = _minimal_sweep_config() + cfg['override_file'] = os.path.join(tmpdir, "int.json") + cfg['state_file'] = os.path.join(tmpdir, "state.json") + tmp_file = f"{cfg['override_file']}.tmp" + + # Override with 2 suggestions + with open(cfg['override_file'], 'w') as f: + json.dump({'suggestions': [ + {'params': {'train/learning_rate': 0.005}, 'reason': 'first'}, + {'params': {'train/learning_rate': 0.006}, 'reason': 'second'}, + ]}, f) + + # Patch os.replace to fail (simulating crash during atomic write) + p = Protein(cfg, use_gpu=False) + + with patch('os.replace', side_effect=OSError("Simulated crash")): + result = p._check_override() + + # Original file should still be intact (no corruption) + with open(cfg['override_file']) as f: + data = json.load(f) + + # Should still have both suggestions (first wasn't consumed due to crash) + assert len(data['suggestions']) == 2, "Original file should be intact after crash" + # .tmp should be cleaned up + assert not os.path.exists(tmp_file), ".tmp should be cleaned up on failure" + print("PASS test_override_atomic_write") + + +# ============================================================================= +# Analysis Helper Tests +# ============================================================================= + +def test_read_sweep_results(): + """Read state file and return denormalized observations.""" + from pufferlib.sweep import read_sweep_results, Hyperparameters + with tempfile.TemporaryDirectory() as tmpdir: + state_file = os.path.join(tmpdir, 'test_sweep.json') + config = _minimal_sweep_config() + + # Create Hyperparameters to get normalized values + hyperparams = Hyperparameters(config, verbose=False) + + # Create a known observation with specific real values + real_params = {'train': {'learning_rate': 0.001, 'total_timesteps': 1e8}} + normalized = hyperparams.from_dict(real_params) + + # Write state file with this observation + state = { + 'success_observations': [ + {'input': normalized.tolist(), 'output': 0.75, 'cost': 100.0} + ], + 'failure_observations': [] + } + with open(state_file, 'w') as f: + json.dump(state, f) + + # Read and denormalize + results = read_sweep_results(state_file, config) + + assert len(results) == 1 + assert results[0]['score'] == 0.75 + assert results[0]['cost'] == 100.0 + + # Check params are denormalized and flattened + params = results[0]['params'] + assert 'train/learning_rate' in params + assert 'train/total_timesteps' in params + + # Values should be close to originals (some numerical precision loss) + assert abs(params['train/learning_rate'] - 0.001) < 1e-6 + assert abs(params['train/total_timesteps'] - 1e8) / 1e8 < 0.01 + + print("PASS test_read_sweep_results") + + +def test_read_sweep_results_sorted(): + """Results sorted by score (descending) by default.""" + from pufferlib.sweep import read_sweep_results, Hyperparameters + with tempfile.TemporaryDirectory() as tmpdir: + state_file = os.path.join(tmpdir, 'test_sweep.json') + config = _minimal_sweep_config() + + hyperparams = Hyperparameters(config, verbose=False) + normalized = hyperparams.from_dict({'train': {'learning_rate': 0.001, 'total_timesteps': 1e8}}) + + # Create observations with different scores + state = { + 'success_observations': [ + {'input': normalized.tolist(), 'output': 0.5, 'cost': 100.0}, + {'input': normalized.tolist(), 'output': 0.9, 'cost': 200.0}, + {'input': normalized.tolist(), 'output': 0.3, 'cost': 50.0}, + ], + 'failure_observations': [] + } + with open(state_file, 'w') as f: + json.dump(state, f) + + # Default sort by score descending + results = read_sweep_results(state_file, config) + assert results[0]['score'] == 0.9 + assert results[1]['score'] == 0.5 + assert results[2]['score'] == 0.3 + + # Sort by cost ascending + results = read_sweep_results(state_file, config, sort_by='cost') + assert results[0]['cost'] == 50.0 + assert results[1]['cost'] == 100.0 + assert results[2]['cost'] == 200.0 + + # No sort + results = read_sweep_results(state_file, config, sort_by=None) + assert results[0]['score'] == 0.5 # Original order + + print("PASS test_read_sweep_results_sorted") + + +def test_read_sweep_results_empty(): + """Empty state file returns empty list.""" + from pufferlib.sweep import read_sweep_results + with tempfile.TemporaryDirectory() as tmpdir: + state_file = os.path.join(tmpdir, 'test_sweep.json') + config = _minimal_sweep_config() + + # Empty state + state = {'success_observations': [], 'failure_observations': []} + with open(state_file, 'w') as f: + json.dump(state, f) + + results = read_sweep_results(state_file, config) + assert results == [] + + print("PASS test_read_sweep_results_empty") + + +def test_config_mismatch_error(): + """Clear error when state file doesn't match config dimensions.""" + from pufferlib.sweep import read_sweep_results + with tempfile.TemporaryDirectory() as tmpdir: + state_file = os.path.join(tmpdir, 'test_sweep.json') + + # Config with 2 params + config = _minimal_sweep_config() + + # State with 3 dimensions (wrong!) + state = { + 'success_observations': [ + {'input': [0.1, 0.2, 0.3], 'output': 0.75, 'cost': 100.0} + ], + 'failure_observations': [] + } + with open(state_file, 'w') as f: + json.dump(state, f) + + # Should raise ValueError with helpful message + try: + read_sweep_results(state_file, config) + assert False, "Should have raised ValueError" + except ValueError as e: + assert "3 dimensions" in str(e) + assert "2" in str(e) + assert "config" in str(e).lower() + + print("PASS test_config_mismatch_error") + + +def test_create_override_single(): + """Create override file with one suggestion.""" + from pufferlib.sweep import create_override, Protein + with tempfile.TemporaryDirectory() as tmpdir: + int_file = os.path.join(tmpdir, 'override.json') + + # Create override + create_override(int_file, [{'train/learning_rate': 0.00069}], reason=['test single']) + + # Verify file format + with open(int_file) as f: + data = json.load(f) + + assert 'suggestions' in data + assert len(data['suggestions']) == 1 + assert data['suggestions'][0]['params'] == {'train/learning_rate': 0.00069} + assert data['suggestions'][0]['reason'] == 'test single' + + # Verify it can be consumed by Protein._check_override() + config = _minimal_sweep_config() + config['override_file'] = int_file + config['state_file'] = os.path.join(tmpdir, 'state.json') + p = Protein(config, use_gpu=False) + result = p._check_override() + assert result == {'train/learning_rate': 0.00069} + assert not os.path.exists(int_file) # Consumed and deleted + + print("PASS test_create_override_single") + + +def test_create_override_multiple(): + """Create override with multiple suggestions.""" + from pufferlib.sweep import create_override + with tempfile.TemporaryDirectory() as tmpdir: + int_file = os.path.join(tmpdir, 'override.json') + + # Multiple suggestions with different reasons + suggestions = [ + {'train/learning_rate': 0.001}, + {'train/learning_rate': 0.002}, + {'train/learning_rate': 0.003}, + ] + reasons = ['reason1', 'reason2', 'reason3'] + + create_override(int_file, suggestions, reason=reasons) + + with open(int_file) as f: + data = json.load(f) + + assert len(data['suggestions']) == 3 + assert data['suggestions'][0]['params'] == {'train/learning_rate': 0.001} + assert data['suggestions'][0]['reason'] == 'reason1' + assert data['suggestions'][1]['params'] == {'train/learning_rate': 0.002} + assert data['suggestions'][1]['reason'] == 'reason2' + assert data['suggestions'][2]['params'] == {'train/learning_rate': 0.003} + assert data['suggestions'][2]['reason'] == 'reason3' + + print("PASS test_create_override_multiple") + + +def test_create_override_reasons(): + """Reasons list must match suggestions length, or be None.""" + from pufferlib.sweep import create_override + with tempfile.TemporaryDirectory() as tmpdir: + # Reasons list matches suggestions + int_file = os.path.join(tmpdir, 'int1.json') + create_override(int_file, [{'a': 1}, {'b': 2}], reason=['reason1', 'reason2']) + with open(int_file) as f: + data = json.load(f) + assert data['suggestions'][0]['reason'] == 'reason1' + assert data['suggestions'][1]['reason'] == 'reason2' + + # No reason - should use default + int_file = os.path.join(tmpdir, 'int2.json') + create_override(int_file, [{'a': 1}]) + with open(int_file) as f: + data = json.load(f) + assert 'reason' in data['suggestions'][0] + assert data['suggestions'][0]['reason'] # Non-empty default + + print("PASS test_create_override_reasons") + + +def test_create_override_mismatched_reasons(): + """Mismatched list lengths raise ValueError.""" + from pufferlib.sweep import create_override + with tempfile.TemporaryDirectory() as tmpdir: + int_file = os.path.join(tmpdir, 'int.json') + try: + create_override(int_file, [{'a': 1}, {'b': 2}], reason=['only one']) + assert False, "Should have raised ValueError" + except ValueError as e: + assert "2 suggestions" in str(e) + assert "1 reasons" in str(e) + + print("PASS test_create_override_mismatched_reasons") + + +if __name__ == '__main__': + # Persistence tests + test_json_default() + test_save_and_load_state() + test_override() + test_override_in_suggest() + test_atomic_write() + test_partial_override() + test_observe_saves_state() + test_failure_observation() + test_crash_recovery_preserves_bounds() + test_invalid_override_file() + test_sweep_continues_after_crash() + test_corrupted_state_file() + test_empty_state_file() + test_state_file_deleted_during_load() + test_save_state_cleans_up_tmp_on_failure() + test_orphaned_tmp_cleaned_on_load() + test_override_invalid_path() + test_override_atomic_write() + # Analysis helper tests + test_read_sweep_results() + test_read_sweep_results_sorted() + test_read_sweep_results_empty() + test_config_mismatch_error() + test_create_override_single() + test_create_override_multiple() + test_create_override_reasons() + test_create_override_mismatched_reasons() + print("\nOK: All 26 tests passed!")