diff --git a/benchmarking/README.md b/benchmarking/README.md index 80cbc0247..ded1a2c8d 100644 --- a/benchmarking/README.md +++ b/benchmarking/README.md @@ -4,14 +4,25 @@ The `src/imitation/scripts/config/tuned_hps` directory provides the tuned hyperp Configuration files can be loaded either from the CLI or from the Python API. -## CLI +## Single benchmark + +To run a single benchmark from the command line: ```bash python -m imitation.scripts. with _ ``` `train_script` can be either 1) `train_imitation` with `algo` as `bc` or `dagger` or 2) `train_adversarial` with `algo` as `gail` or `airl`. The `env` can be either of `seals_ant`, `seals_half_cheetah`, `seals_hopper`, `seals_swimmer`, or `seals_walker`. The hyperparameters for other environments are not tuned yet. You may be able to get reasonable performance by using hyperparameters tuned for a similar environment; alternatively, you can tune the hyperparameters using the `tuning` script. -## Python +To view the results: + +```bash +python -m imitation.scripts.analyze analyze_imitation with \ + source_dir_str="output/sacred" table_verbosity=0 \ + csv_output_path=results.csv \ + run_name="" +``` + +To run a single benchmark from Python add the config to your Sacred experiment `ex`: ```python ... @@ -19,6 +30,72 @@ from imitation.scripts. import .run(command_name="", named_configs=["_"]) ``` +## Entire benchmark suite + +### Running locally + +To generate the commands to run the entire benchmarking suite with multiple random seeds: + +```bash +python experiments/commands.py \ + --name= \ + --cfg_pattern "benchmarking/example_*.json" \ + --seeds 0 1 2 \ + --output_dir=output +``` + +To run those commands in parallel: + +```bash +python experiments/commands.py \ + --name= \ + --cfg_pattern "benchmarking/example_*.json" \ + --seeds 0 1 2 \ + --output_dir=output | parallel -j 8 +``` + +(You may need to `brew install parallel` to get this to work on Mac.) + +### Running on Hofvarpnir + +To generate the commands for the Hofvarpnir cluster: + +```bash +python experiments/commands.py \ + --name= \ + --cfg_pattern "benchmarking/example_*.json" \ + --seeds 0 1 2 \ + --output_dir=/data/output \ + --remote +``` + +To run those commands pipe them into bash: + +```bash +python experiments/commands.py \ + --name \ + --cfg_pattern "benchmarking/example_*.json" \ + --seeds 0 1 2 \ + --output_dir /data/output \ + --remote | bash +``` + +### Results + +To produce a table with all the results: + +```bash +python -m imitation.scripts.analyze analyze_imitation with \ + source_dir_str="output/sacred" table_verbosity=0 \ + csv_output_path=results.csv \ + run_name="" +``` + +To compute a p-value to test whether the differences from the paper are statistically significant: + +```bash +python -m imitation.scripts.compare_to_baseline results.csv +``` # Tuning Hyperparameters The hyperparameters of any algorithm in imitation can be tuned using `src/imitation/scripts/tuning.py`. diff --git a/benchmarking/results/logs_example_airl_seals_ant_bhp.csv b/benchmarking/results/logs_example_airl_seals_ant_bhp.csv new file mode 100644 index 000000000..7c0e3ba8b --- /dev/null +++ b/benchmarking/results/logs_example_airl_seals_ant_bhp.csv @@ -0,0 +1,6 @@ +agent_path,checkpoint_interval,seed,show_config,total_timesteps,algorithm_kwargs.demo_batch_size,algorithm_kwargs.gen_replay_buffer_capacity,algorithm_kwargs.n_disc_updates_per_round,common.env_name,common.log_dir,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.policy_type,reward.add_std_alpha,reward.ensemble_size,reward.net_cls.py/type,reward.net_kwargs.normalize_input_layer.py/type,reward.normalize_output_layer.py/type,rl.batch_size,rl.rl_cls.py/type,rl.rl_kwargs.batch_size,rl.rl_kwargs.clip_range,rl.rl_kwargs.ent_coef,rl.rl_kwargs.gae_lambda,rl.rl_kwargs.gamma,rl.rl_kwargs.learning_rate,rl.rl_kwargs.max_grad_norm,rl.rl_kwargs.n_epochs,rl.rl_kwargs.vf_coef,train.n_episodes_eval,train.policy_cls.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,algo,env_name,expert_return_summary,imit_return_summary +,0,101,False,10000000.0,8192,8192,16,seals/Ant-v0,output/airl/seals_Ant-v0/20221024_082122_711915,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,16,0.3,3.27750078482474e-06,0.8,0.995,3.249429831179079e-05,0.9,10,0.4351450387648799,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,AIRL,seals/Ant-v0,2408.22 ± 665.201 (n=104),123.476 ± 2.16606 (n=56) +,0,100,False,10000000.0,8192,8192,16,seals/Ant-v0,output/airl/seals_Ant-v0/20221024_082120_c540b2,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,16,0.3,3.27750078482474e-06,0.8,0.995,3.249429831179079e-05,0.9,10,0.4351450387648799,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,AIRL,seals/Ant-v0,2408.22 ± 665.201 (n=104),-378.377 ± 60.6063 (n=56) +,0,102,False,10000000.0,8192,8192,16,seals/Ant-v0,output/airl/seals_Ant-v0/20221024_082122_ba94a1,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,16,0.3,3.27750078482474e-06,0.8,0.995,3.249429831179079e-05,0.9,10,0.4351450387648799,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,AIRL,seals/Ant-v0,2408.22 ± 665.201 (n=104),-314.108 ± 19.2371 (n=56) +,0,104,False,10000000.0,8192,8192,16,seals/Ant-v0,output/airl/seals_Ant-v0/20221024_082122_8c6aba,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,16,0.3,3.27750078482474e-06,0.8,0.995,3.249429831179079e-05,0.9,10,0.4351450387648799,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,AIRL,seals/Ant-v0,2408.22 ± 665.201 (n=104),-0.402349 ± 19.7147 (n=56) +,0,103,False,10000000.0,8192,8192,16,seals/Ant-v0,output/airl/seals_Ant-v0/20221024_082122_47f04c,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,16,0.3,3.27750078482474e-06,0.8,0.995,3.249429831179079e-05,0.9,10,0.4351450387648799,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,AIRL,seals/Ant-v0,2408.22 ± 665.201 (n=104),18.9413 ± 1.1345 (n=56) diff --git a/benchmarking/results/logs_example_airl_seals_half_cheetah_bhp.csv b/benchmarking/results/logs_example_airl_seals_half_cheetah_bhp.csv new file mode 100644 index 000000000..012bcfaaa --- /dev/null +++ b/benchmarking/results/logs_example_airl_seals_half_cheetah_bhp.csv @@ -0,0 +1,6 @@ +agent_path,checkpoint_interval,seed,show_config,total_timesteps,algorithm_kwargs.demo_batch_size,algorithm_kwargs.gen_replay_buffer_capacity,algorithm_kwargs.n_disc_updates_per_round,common.env_name,common.log_dir,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.policy_type,reward.add_std_alpha,reward.ensemble_size,reward.net_cls.py/type,reward.net_kwargs.normalize_input_layer.py/type,reward.normalize_output_layer.py/type,rl.batch_size,rl.rl_cls.py/type,rl.rl_kwargs.batch_size,rl.rl_kwargs.clip_range,rl.rl_kwargs.ent_coef,rl.rl_kwargs.gae_lambda,rl.rl_kwargs.gamma,rl.rl_kwargs.learning_rate,rl.rl_kwargs.max_grad_norm,rl.rl_kwargs.n_epochs,rl.rl_kwargs.vf_coef,train.n_episodes_eval,train.policy_cls.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,algo,env_name,expert_return_summary,imit_return_summary +,0,100,False,10000000.0,2048,512,16,seals/HalfCheetah-v0,output/airl/seals_HalfCheetah-v0/20221021_115006_924cb4,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,64,0.1,0.0005544771755195421,0.95,0.95,0.00047248619386801587,0.8,5,0.11483689492120866,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,AIRL,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),1674.29 ± 581.622 (n=56) +,0,104,False,10000000.0,2048,512,16,seals/HalfCheetah-v0,output/airl/seals_HalfCheetah-v0/20221021_115008_b838f5,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,64,0.1,0.0005544771755195421,0.95,0.95,0.00047248619386801587,0.8,5,0.11483689492120866,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,AIRL,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),3652.14 ± 648.766 (n=56) +,0,102,False,10000000.0,2048,512,16,seals/HalfCheetah-v0,output/airl/seals_HalfCheetah-v0/20221021_115008_23f6ee,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,64,0.1,0.0005544771755195421,0.95,0.95,0.00047248619386801587,0.8,5,0.11483689492120866,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,AIRL,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),3491.62 ± 368.717 (n=56) +,0,101,False,10000000.0,2048,512,16,seals/HalfCheetah-v0,output/airl/seals_HalfCheetah-v0/20221021_115008_ae2f97,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,64,0.1,0.0005544771755195421,0.95,0.95,0.00047248619386801587,0.8,5,0.11483689492120866,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,AIRL,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),4441.25 ± 87.8795 (n=56) +,0,103,False,10000000.0,2048,512,16,seals/HalfCheetah-v0,output/airl/seals_HalfCheetah-v0/20221021_115008_1ae278,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,64,0.1,0.0005544771755195421,0.95,0.95,0.00047248619386801587,0.8,5,0.11483689492120866,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,AIRL,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),3960.15 ± 108.134 (n=56) diff --git a/benchmarking/results/logs_example_airl_seals_hopper_bhp.csv b/benchmarking/results/logs_example_airl_seals_hopper_bhp.csv new file mode 100644 index 000000000..e52fb44e9 --- /dev/null +++ b/benchmarking/results/logs_example_airl_seals_hopper_bhp.csv @@ -0,0 +1,6 @@ +agent_path,checkpoint_interval,seed,show_config,total_timesteps,algorithm_kwargs.demo_batch_size,algorithm_kwargs.gen_replay_buffer_capacity,algorithm_kwargs.n_disc_updates_per_round,common.env_name,common.log_dir,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.policy_type,reward.add_std_alpha,reward.ensemble_size,reward.net_cls.py/type,reward.net_kwargs.normalize_input_layer.py/type,reward.normalize_output_layer.py/type,rl.batch_size,rl.rl_cls.py/type,rl.rl_kwargs.batch_size,rl.rl_kwargs.clip_range,rl.rl_kwargs.ent_coef,rl.rl_kwargs.gae_lambda,rl.rl_kwargs.gamma,rl.rl_kwargs.learning_rate,rl.rl_kwargs.max_grad_norm,rl.rl_kwargs.n_epochs,rl.rl_kwargs.vf_coef,train.n_episodes_eval,train.policy_cls,train.policy_kwargs.activation_fn.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,train.policy_kwargs.net_arch,algo,env_name,expert_return_summary,imit_return_summary +,0,103,False,10000000.0,2048,8192,16,seals/Hopper-v0,output/airl/seals_Hopper-v0/20221022_223308_a8cbd6,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,512,0.1,0.009709494745755033,0.98,0.995,0.0005807211840258373,0.9,20,0.20315938606555833,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2600.12 ± 155.143 (n=56) +,0,101,False,10000000.0,2048,8192,16,seals/Hopper-v0,output/airl/seals_Hopper-v0/20221022_223308_299f28,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,512,0.1,0.009709494745755033,0.98,0.995,0.0005807211840258373,0.9,20,0.20315938606555833,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2663.1 ± 121.83 (n=56) +,0,104,False,10000000.0,2048,8192,16,seals/Hopper-v0,output/airl/seals_Hopper-v0/20221022_223307_1607e3,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,512,0.1,0.009709494745755033,0.98,0.995,0.0005807211840258373,0.9,20,0.20315938606555833,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2740.77 ± 107.306 (n=56) +,0,100,False,10000000.0,2048,8192,16,seals/Hopper-v0,output/airl/seals_Hopper-v0/20221022_223305_7116b9,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,512,0.1,0.009709494745755033,0.98,0.995,0.0005807211840258373,0.9,20,0.20315938606555833,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2758.67 ± 121.298 (n=56) +,0,102,False,10000000.0,2048,8192,16,seals/Hopper-v0,output/airl/seals_Hopper-v0/20221022_223307_23fde3,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,8192,stable_baselines3.ppo.ppo.PPO,512,0.1,0.009709494745755033,0.98,0.995,0.0005807211840258373,0.9,20,0.20315938606555833,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2613.26 ± 128.037 (n=56) diff --git a/benchmarking/results/logs_example_airl_seals_swimmer_bhp.csv b/benchmarking/results/logs_example_airl_seals_swimmer_bhp.csv new file mode 100644 index 000000000..c4afcb6af --- /dev/null +++ b/benchmarking/results/logs_example_airl_seals_swimmer_bhp.csv @@ -0,0 +1,6 @@ +agent_path,checkpoint_interval,seed,show_config,total_timesteps,algorithm_kwargs.demo_batch_size,algorithm_kwargs.gen_replay_buffer_capacity,algorithm_kwargs.n_disc_updates_per_round,common.env_name,common.log_dir.py/reduce,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.loader_kwargs.env_name,expert.loader_kwargs.organization,expert.policy_type,reward.add_std_alpha,reward.ensemble_size,reward.net_cls.py/type,reward.net_kwargs.normalize_input_layer.py/type,reward.normalize_output_layer.py/type,rl.batch_size,rl.rl_cls.py/type,rl.rl_kwargs.batch_size,rl.rl_kwargs.clip_range,rl.rl_kwargs.ent_coef,rl.rl_kwargs.gae_lambda,rl.rl_kwargs.gamma,rl.rl_kwargs.learning_rate,rl.rl_kwargs.max_grad_norm,rl.rl_kwargs.n_epochs,rl.rl_kwargs.vf_coef,train.n_episodes_eval,train.policy_cls,train.policy_kwargs.activation_fn.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,train.policy_kwargs.net_arch,algo,env_name,expert_return_summary,imit_return_summary +,0,101,False,10000000.0,128,16384,16,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_airl_seals_swimmer_best_hp_eval', 'inner_be60f_00001_1_seed=101_2022-11-09_06-28-20', 'output', 'airl', 'seals_Swimmer-v0', '20221109_062825_03facf']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,seals/Swimmer-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,64,0.1,0.006137718463434523,0.95,0.999,0.0013390060486393868,2,5,0.6162112311062333,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),234.52 ± 7.61457 (n=50) +,0,102,False,10000000.0,128,16384,16,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_airl_seals_swimmer_best_hp_eval', 'inner_be60f_00002_2_seed=102_2022-11-09_06-28-21', 'output', 'airl', 'seals_Swimmer-v0', '20221109_062833_74ab85']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,seals/Swimmer-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,64,0.1,0.006137718463434523,0.95,0.999,0.0013390060486393868,2,5,0.6162112311062333,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),302.529 ± 7.31652 (n=50) +,0,100,False,10000000.0,128,16384,16,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_airl_seals_swimmer_best_hp_eval', 'inner_be60f_00000_0_seed=100_2022-11-09_06-28-14', 'output', 'airl', 'seals_Swimmer-v0', '20221109_062824_6fee49']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,seals/Swimmer-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,64,0.1,0.006137718463434523,0.95,0.999,0.0013390060486393868,2,5,0.6162112311062333,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),248.793 ± 2.30907 (n=50) +,0,103,False,10000000.0,128,16384,16,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_airl_seals_swimmer_best_hp_eval', 'inner_be60f_00003_3_seed=103_2022-11-09_06-28-21', 'output', 'airl', 'seals_Swimmer-v0', '20221109_062833_72d6bf']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,seals/Swimmer-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,64,0.1,0.006137718463434523,0.95,0.999,0.0013390060486393868,2,5,0.6162112311062333,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),299.295 ± 4.40014 (n=50) +,0,104,False,10000000.0,128,16384,16,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_airl_seals_swimmer_best_hp_eval', 'inner_be60f_00004_4_seed=104_2022-11-09_06-28-21', 'output', 'airl', 'seals_Swimmer-v0', '20221109_062833_1570e5']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,seals/Swimmer-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,64,0.1,0.006137718463434523,0.95,0.999,0.0013390060486393868,2,5,0.6162112311062333,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),295.572 ± 9.13404 (n=50) diff --git a/benchmarking/results/logs_example_airl_seals_walker_bhp.csv b/benchmarking/results/logs_example_airl_seals_walker_bhp.csv new file mode 100644 index 000000000..cb8ffbf5b --- /dev/null +++ b/benchmarking/results/logs_example_airl_seals_walker_bhp.csv @@ -0,0 +1,6 @@ +agent_path,checkpoint_interval,seed,show_config,total_timesteps,algorithm_kwargs.demo_batch_size,algorithm_kwargs.gen_replay_buffer_capacity,algorithm_kwargs.n_disc_updates_per_round,common.env_name,common.log_dir.py/reduce,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.loader_kwargs.env_name,expert.loader_kwargs.organization,expert.policy_type,reward.add_std_alpha,reward.ensemble_size,reward.net_cls.py/type,reward.net_kwargs.normalize_input_layer.py/type,reward.normalize_output_layer.py/type,rl.batch_size,rl.rl_cls.py/type,rl.rl_kwargs.batch_size,rl.rl_kwargs.clip_range,rl.rl_kwargs.ent_coef,rl.rl_kwargs.gae_lambda,rl.rl_kwargs.gamma,rl.rl_kwargs.learning_rate,rl.rl_kwargs.max_grad_norm,rl.rl_kwargs.n_epochs,rl.rl_kwargs.vf_coef,train.n_episodes_eval,train.policy_cls,train.policy_kwargs.activation_fn.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,train.policy_kwargs.net_arch,algo,env_name,expert_return_summary,imit_return_summary +,0,101,False,10000000.0,512,16384,16,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_airl_seals_walker_best_hp_eval', 'inner_c0abc_00001_1_seed=101_2022-11-09_06-28-23', 'output', 'airl', 'seals_Walker2d-v0', '20221109_062829_bb5442']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,seals/Walker2d-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,128,0.4,0.002003867232707145,0.92,0.98,3.052170958603811e-05,0.6,20,0.6167177795726859,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),1044.57 ± 1.01596 (n=50) +,0,100,False,10000000.0,512,16384,16,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_airl_seals_walker_best_hp_eval', 'inner_c0abc_00000_0_seed=100_2022-11-09_06-28-18', 'output', 'airl', 'seals_Walker2d-v0', '20221109_062827_6b454c']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,seals/Walker2d-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,128,0.4,0.002003867232707145,0.92,0.98,3.052170958603811e-05,0.6,20,0.6167177795726859,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),651.678 ± 12.0014 (n=50) +,0,103,False,10000000.0,512,16384,16,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_airl_seals_walker_best_hp_eval', 'inner_c0abc_00003_3_seed=103_2022-11-09_06-28-23', 'output', 'airl', 'seals_Walker2d-v0', '20221109_062829_c4eb91']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,seals/Walker2d-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,128,0.4,0.002003867232707145,0.92,0.98,3.052170958603811e-05,0.6,20,0.6167177795726859,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),1021 ± 68.6611 (n=50) +,0,102,False,10000000.0,512,16384,16,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_airl_seals_walker_best_hp_eval', 'inner_c0abc_00002_2_seed=102_2022-11-09_06-28-23', 'output', 'airl', 'seals_Walker2d-v0', '20221109_062829_cfc95c']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,seals/Walker2d-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,128,0.4,0.002003867232707145,0.92,0.98,3.052170958603811e-05,0.6,20,0.6167177795726859,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),-8.05116 ± 5.70636 (n=50) +,0,104,False,10000000.0,512,16384,16,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_airl_seals_walker_best_hp_eval', 'inner_c0abc_00004_4_seed=104_2022-11-09_06-28-23', 'output', 'airl', 'seals_Walker2d-v0', '20221109_062829_1fdf14']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,seals/Walker2d-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicShapedRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,128,0.4,0.002003867232707145,0.92,0.98,3.052170958603811e-05,0.6,20,0.6167177795726859,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",AIRL,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),898.306 ± 320.022 (n=50) diff --git a/benchmarking/results/logs_example_bc_seals_ant_bhp.csv b/benchmarking/results/logs_example_bc_seals_ant_bhp.csv new file mode 100644 index 000000000..bafa893de --- /dev/null +++ b/benchmarking/results/logs_example_bc_seals_ant_bhp.csv @@ -0,0 +1,6 @@ +agent_path,seed,bc_kwargs.batch_size,bc_kwargs.l2_weight,bc_kwargs.optimizer_cls.py/type,bc_kwargs.optimizer_kwargs.lr,bc_train_kwargs.log_interval,bc_train_kwargs.n_batches,bc_train_kwargs.n_epochs,common.env_name,common.log_dir,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,dagger.rollout_round_min_episodes,dagger.total_timesteps,dagger.use_offline_rollouts,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.policy_type,train.n_episodes_eval,train.policy_cls.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,algo,env_name,expert_return_summary,imit_return_summary +,100,16,2.350251568550711e-05,torch.optim.adam.Adam,0.0017601048183920826,500,,5,seals/Ant-v0,output/bc/seals_Ant-v0/20221021_115141_f4ca8b,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,BC,seals/Ant-v0,2408.22 ± 665.201 (n=104),1499.7 ± 600.606 (n=56) +,101,16,2.350251568550711e-05,torch.optim.adam.Adam,0.0017601048183920826,500,,5,seals/Ant-v0,output/bc/seals_Ant-v0/20221021_115143_e1e72c,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,BC,seals/Ant-v0,2408.22 ± 665.201 (n=104),2253.66 ± 633.442 (n=56) +,103,16,2.350251568550711e-05,torch.optim.adam.Adam,0.0017601048183920826,500,,5,seals/Ant-v0,output/bc/seals_Ant-v0/20221021_115143_259744,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,BC,seals/Ant-v0,2408.22 ± 665.201 (n=104),2079.42 ± 731.222 (n=56) +,104,16,2.350251568550711e-05,torch.optim.adam.Adam,0.0017601048183920826,500,,5,seals/Ant-v0,output/bc/seals_Ant-v0/20221021_115143_245d4a,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,BC,seals/Ant-v0,2408.22 ± 665.201 (n=104),2059.98 ± 699.001 (n=56) +,102,16,2.350251568550711e-05,torch.optim.adam.Adam,0.0017601048183920826,500,,5,seals/Ant-v0,output/bc/seals_Ant-v0/20221021_115143_2e76df,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,BC,seals/Ant-v0,2408.22 ± 665.201 (n=104),1870.14 ± 737.307 (n=56) diff --git a/benchmarking/results/logs_example_bc_seals_half_cheetah_bhp.csv b/benchmarking/results/logs_example_bc_seals_half_cheetah_bhp.csv new file mode 100644 index 000000000..4a8f116ec --- /dev/null +++ b/benchmarking/results/logs_example_bc_seals_half_cheetah_bhp.csv @@ -0,0 +1,6 @@ +agent_path,seed,bc_kwargs.batch_size,bc_kwargs.l2_weight,bc_kwargs.optimizer_cls.py/type,bc_kwargs.optimizer_kwargs.lr,bc_train_kwargs.log_interval,bc_train_kwargs.n_batches,bc_train_kwargs.n_epochs,common.env_name,common.log_dir.py/reduce,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,dagger.rollout_round_min_episodes,dagger.total_timesteps,dagger.use_offline_rollouts,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.loader_kwargs.path,expert.policy_type,train.n_episodes_eval,train.policy_cls.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,algo,env_name,expert_return_summary,imit_return_summary +,104,64,0.005728455628518169,torch.optim.adam.Adam,0.008056922426724927,500,,20,seals/HalfCheetah-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc_seals_half_cheetah_best_hp_eval', 'inner_85d2c_00004_4_seed=104_2022-11-14_15-03-22', 'output', 'bc', 'seals_HalfCheetah-v0', '20221114_150328_52931f']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,60000,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,BC,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),3540.47 ± 777.394 (n=50) +,102,64,0.005728455628518169,torch.optim.adam.Adam,0.008056922426724927,500,,20,seals/HalfCheetah-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc_seals_half_cheetah_best_hp_eval', 'inner_85d2c_00002_2_seed=102_2022-11-14_15-03-22', 'output', 'bc', 'seals_HalfCheetah-v0', '20221114_150328_0631bc']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,60000,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,BC,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),3308.07 ± 833.261 (n=50) +,101,64,0.005728455628518169,torch.optim.adam.Adam,0.008056922426724927,500,,20,seals/HalfCheetah-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc_seals_half_cheetah_best_hp_eval', 'inner_85d2c_00001_1_seed=101_2022-11-14_15-03-22', 'output', 'bc', 'seals_HalfCheetah-v0', '20221114_150327_909529']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,60000,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,BC,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),3517.64 ± 766.922 (n=50) +,103,64,0.005728455628518169,torch.optim.adam.Adam,0.008056922426724927,500,,20,seals/HalfCheetah-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc_seals_half_cheetah_best_hp_eval', 'inner_85d2c_00003_3_seed=103_2022-11-14_15-03-22', 'output', 'bc', 'seals_HalfCheetah-v0', '20221114_150328_50e30b']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,60000,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,BC,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),3405.94 ± 584.32 (n=50) +,100,64,0.005728455628518169,torch.optim.adam.Adam,0.008056922426724927,500,,20,seals/HalfCheetah-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc_seals_half_cheetah_best_hp_eval', 'inner_85d2c_00000_0_seed=100_2022-11-14_15-03-17', 'output', 'bc', 'seals_HalfCheetah-v0', '20221114_150326_6096c1']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,60000,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,BC,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),3458.01 ± 693.419 (n=50) diff --git a/benchmarking/results/logs_example_bc_seals_hopper_bhp.csv b/benchmarking/results/logs_example_bc_seals_hopper_bhp.csv new file mode 100644 index 000000000..6634655ea --- /dev/null +++ b/benchmarking/results/logs_example_bc_seals_hopper_bhp.csv @@ -0,0 +1,6 @@ +agent_path,seed,bc_kwargs.batch_size,bc_kwargs.l2_weight,bc_kwargs.optimizer_cls.py/type,bc_kwargs.optimizer_kwargs.lr,bc_train_kwargs.log_interval,bc_train_kwargs.n_batches,bc_train_kwargs.n_epochs,common.env_name,common.log_dir,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,dagger.rollout_round_min_episodes,dagger.total_timesteps,dagger.use_offline_rollouts,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.policy_type,train.n_episodes_eval,train.policy_cls.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,algo,env_name,expert_return_summary,imit_return_summary +,103,64,1.3610189916104634e-06,torch.optim.adam.Adam,0.0007172435323620212,500,,20,seals/Hopper-v0,output/bc/seals_Hopper-v0/20221021_184632_43f19c,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,BC,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2239.95 ± 111.425 (n=56) +,100,64,1.3610189916104634e-06,torch.optim.adam.Adam,0.0007172435323620212,500,,20,seals/Hopper-v0,output/bc/seals_Hopper-v0/20221021_184630_ecee3d,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,BC,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2241.96 ± 133.666 (n=56) +,102,64,1.3610189916104634e-06,torch.optim.adam.Adam,0.0007172435323620212,500,,20,seals/Hopper-v0,output/bc/seals_Hopper-v0/20221021_184632_35a53f,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,BC,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2194.95 ± 129.698 (n=56) +,101,64,1.3610189916104634e-06,torch.optim.adam.Adam,0.0007172435323620212,500,,20,seals/Hopper-v0,output/bc/seals_Hopper-v0/20221021_184632_bbc6dd,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,BC,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2253.88 ± 120.151 (n=56) +,104,64,1.3610189916104634e-06,torch.optim.adam.Adam,0.0007172435323620212,500,,20,seals/Hopper-v0,output/bc/seals_Hopper-v0/20221021_184632_f55a65,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,BC,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2283.27 ± 95.716 (n=56) diff --git a/benchmarking/results/logs_example_bc_seals_swimmer_bhp.csv b/benchmarking/results/logs_example_bc_seals_swimmer_bhp.csv new file mode 100644 index 000000000..62721aead --- /dev/null +++ b/benchmarking/results/logs_example_bc_seals_swimmer_bhp.csv @@ -0,0 +1,6 @@ +agent_path,seed,bc_kwargs.batch_size,bc_kwargs.l2_weight,bc_kwargs.optimizer_cls.py/type,bc_kwargs.optimizer_kwargs.lr,bc_train_kwargs.log_interval,bc_train_kwargs.n_batches,bc_train_kwargs.n_epochs,common.env_name,common.log_dir.py/reduce,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,dagger.rollout_round_min_episodes,dagger.total_timesteps,dagger.use_offline_rollouts,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.loader_kwargs.path,expert.policy_type,train.n_episodes_eval,train.policy_cls.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,algo,env_name,expert_return_summary,imit_return_summary +,101,16,4.37857842825771e-05,torch.optim.adam.Adam,0.0016370547173923296,500,,10,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc\\_seals_swimmer_best_hp_eval', 'inner_4cc91_00001_1_seed=101_2022-11-02_15-25-53', 'output', 'bc', 'seals_Swimmer-v0', '20221102_152557_bfbf99']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,BC,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),282.727 ± 6.70404 (n=50) +,103,16,4.37857842825771e-05,torch.optim.adam.Adam,0.0016370547173923296,500,,10,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc\\_seals_swimmer_best_hp_eval', 'inner_4cc91_00003_3_seed=103_2022-11-02_15-25-53', 'output', 'bc', 'seals_Swimmer-v0', '20221102_152557_c2bdfa']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,BC,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),280.162 ± 5.94572 (n=50) +,104,16,4.37857842825771e-05,torch.optim.adam.Adam,0.0016370547173923296,500,,10,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc\\_seals_swimmer_best_hp_eval', 'inner_4cc91_00004_4_seed=104_2022-11-02_15-25-53', 'output', 'bc', 'seals_Swimmer-v0', '20221102_152557_56ab19']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,BC,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),283.323 ± 5.90963 (n=50) +,102,16,4.37857842825771e-05,torch.optim.adam.Adam,0.0016370547173923296,500,,10,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc\\_seals_swimmer_best_hp_eval', 'inner_4cc91_00002_2_seed=102_2022-11-02_15-25-53', 'output', 'bc', 'seals_Swimmer-v0', '20221102_152557_2c49ca']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,BC,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),280.825 ± 7.64087 (n=50) +,100,16,4.37857842825771e-05,torch.optim.adam.Adam,0.0016370547173923296,500,,10,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc\\_seals_swimmer_best_hp_eval', 'inner_4cc91_00000_0_seed=100_2022-11-02_15-25-48', 'output', 'bc', 'seals_Swimmer-v0', '20221102_152556_144f05']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,BC,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),287.85 ± 5.57297 (n=50) diff --git a/benchmarking/results/logs_example_bc_seals_walker_bhp.csv b/benchmarking/results/logs_example_bc_seals_walker_bhp.csv new file mode 100644 index 000000000..17adb911d --- /dev/null +++ b/benchmarking/results/logs_example_bc_seals_walker_bhp.csv @@ -0,0 +1,6 @@ +agent_path,seed,bc_kwargs.batch_size,bc_kwargs.l2_weight,bc_kwargs.optimizer_cls.py/type,bc_kwargs.optimizer_kwargs.lr,bc_train_kwargs.log_interval,bc_train_kwargs.n_batches,bc_train_kwargs.n_epochs,common.env_name,common.log_dir.py/reduce,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,dagger.rollout_round_min_episodes,dagger.total_timesteps,dagger.use_offline_rollouts,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.loader_kwargs.path,expert.policy_type,train.n_episodes_eval,train.policy_cls.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,algo,env_name,expert_return_summary,imit_return_summary +,101,32,0.0014680228143404998,torch.optim.adam.Adam,0.0003034620018780926,500,,20,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc_seals_walker_best_hp_eval', 'inner_dc973_00001_1_seed=101_2022-11-03_09-52-14', 'output', 'bc', 'seals_Walker2d-v0', '20221103_095220_4b9bf0']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,BC,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),2493.52 ± 505.612 (n=50) +,103,32,0.0014680228143404998,torch.optim.adam.Adam,0.0003034620018780926,500,,20,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc_seals_walker_best_hp_eval', 'inner_dc973_00003_3_seed=103_2022-11-03_09-52-14', 'output', 'bc', 'seals_Walker2d-v0', '20221103_095220_ec9b99']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,BC,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),2546.03 ± 503.795 (n=50) +,100,32,0.0014680228143404998,torch.optim.adam.Adam,0.0003034620018780926,500,,20,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc_seals_walker_best_hp_eval', 'inner_dc973_00000_0_seed=100_2022-11-03_09-52-11', 'output', 'bc', 'seals_Walker2d-v0', '20221103_095218_db6ebb']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,BC,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),2431.11 ± 561.489 (n=50) +,102,32,0.0014680228143404998,torch.optim.adam.Adam,0.0003034620018780926,500,,20,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc_seals_walker_best_hp_eval', 'inner_dc973_00002_2_seed=102_2022-11-03_09-52-14', 'output', 'bc', 'seals_Walker2d-v0', '20221103_095220_37dfa7']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,BC,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),2497.32 ± 432.525 (n=50) +,104,32,0.0014680228143404998,torch.optim.adam.Adam,0.0003034620018780926,500,,20,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_bc_seals_walker_best_hp_eval', 'inner_dc973_00004_4_seed=104_2022-11-03_09-52-14', 'output', 'bc', 'seals_Walker2d-v0', '20221103_095220_46e144']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,BC,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),2593.77 ± 424.954 (n=50) diff --git a/benchmarking/results/logs_example_dagger_seals_ant_bhp.csv b/benchmarking/results/logs_example_dagger_seals_ant_bhp.csv new file mode 100644 index 000000000..a3df80cc5 --- /dev/null +++ b/benchmarking/results/logs_example_dagger_seals_ant_bhp.csv @@ -0,0 +1,6 @@ +agent_path,seed,bc_kwargs.batch_size,bc_kwargs.l2_weight,bc_kwargs.optimizer_cls.py/type,bc_kwargs.optimizer_kwargs.lr,bc_train_kwargs.log_interval,bc_train_kwargs.n_batches,bc_train_kwargs.n_epochs,common.env_name,common.log_dir,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,dagger.beta_schedule.py/object,dagger.beta_schedule.rampdown_rounds,dagger.rollout_round_min_episodes,dagger.total_timesteps,dagger.use_offline_rollouts,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.loader_kwargs.path,expert.policy_type,train.n_episodes_eval,train.policy_cls.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,algo,env_name,expert_return_summary,imit_return_summary +,103,16,0.0001,torch.optim.adam.Adam,0.001,500,,10,seals/Ant-v0,output/dagger/seals_Ant-v0/20221024_173721_04cd1f,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,imitation.algorithms.dagger.LinearBetaSchedule,15,5,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,DAgger,seals/Ant-v0,2578.98 ± 683.531 (n=104),2153.93 ± 859.53 (n=56) +,101,16,0.0001,torch.optim.adam.Adam,0.001,500,,10,seals/Ant-v0,output/dagger/seals_Ant-v0/20221024_173721_769813,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,imitation.algorithms.dagger.LinearBetaSchedule,15,5,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,DAgger,seals/Ant-v0,2536.22 ± 697.961 (n=104),2714.79 ± 537.801 (n=56) +,102,16,0.0001,torch.optim.adam.Adam,0.001,500,,10,seals/Ant-v0,output/dagger/seals_Ant-v0/20221024_173721_26539c,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,imitation.algorithms.dagger.LinearBetaSchedule,15,5,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,DAgger,seals/Ant-v0,2497.44 ± 707.042 (n=104),2167.93 ± 788.897 (n=56) +,100,16,0.0001,torch.optim.adam.Adam,0.001,500,,10,seals/Ant-v0,output/dagger/seals_Ant-v0/20221024_173716_4a49f4,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,imitation.algorithms.dagger.LinearBetaSchedule,15,5,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,DAgger,seals/Ant-v0,2573.21 ± 620.486 (n=104),2392.39 ± 680.058 (n=56) +,104,16,0.0001,torch.optim.adam.Adam,0.001,500,,10,seals/Ant-v0,output/dagger/seals_Ant-v0/20221024_173721_99afba,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,imitation.algorithms.dagger.LinearBetaSchedule,15,5,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,DAgger,seals/Ant-v0,2557.62 ± 702.379 (n=104),2173.82 ± 730.654 (n=56) diff --git a/benchmarking/results/logs_example_dagger_seals_half_cheetah_bhp.csv b/benchmarking/results/logs_example_dagger_seals_half_cheetah_bhp.csv new file mode 100644 index 000000000..927af4e98 --- /dev/null +++ b/benchmarking/results/logs_example_dagger_seals_half_cheetah_bhp.csv @@ -0,0 +1,6 @@ +agent_path,seed,bc_kwargs.batch_size,bc_kwargs.l2_weight,bc_kwargs.optimizer_cls.py/type,bc_kwargs.optimizer_kwargs.lr,bc_train_kwargs.log_interval,bc_train_kwargs.n_batches,bc_train_kwargs.n_epochs,common.env_name,common.log_dir,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,dagger.beta_schedule.decay_probability,dagger.beta_schedule.py/object,dagger.rollout_round_min_episodes,dagger.total_timesteps,dagger.use_offline_rollouts,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.loader_kwargs.path,expert.policy_type,train.n_episodes_eval,train.policy_cls.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,algo,env_name,expert_return_summary,imit_return_summary +,102,16,0.0001,torch.optim.adam.Adam,0.001,500,,5,seals/HalfCheetah-v0,output/dagger/seals_HalfCheetah-v0/20221024_192912_82aa93,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,0.7,imitation.algorithms.dagger.ExponentialBetaSchedule,5,60000,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,DAgger,seals/HalfCheetah-v0,4213.44 ± 631.818 (n=64),4080.58 ± 631.88 (n=56) +,103,16,0.0001,torch.optim.adam.Adam,0.001,500,,5,seals/HalfCheetah-v0,output/dagger/seals_HalfCheetah-v0/20221024_192912_ea6184,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,0.7,imitation.algorithms.dagger.ExponentialBetaSchedule,5,60000,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,DAgger,seals/HalfCheetah-v0,4030.78 ± 842.851 (n=64),4261.82 ± 624.333 (n=56) +,101,16,0.0001,torch.optim.adam.Adam,0.001,500,,5,seals/HalfCheetah-v0,output/dagger/seals_HalfCheetah-v0/20221024_192912_45b32a,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,0.7,imitation.algorithms.dagger.ExponentialBetaSchedule,5,60000,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,DAgger,seals/HalfCheetah-v0,4233.26 ± 608.398 (n=64),4107.19 ± 692.207 (n=56) +,100,16,0.0001,torch.optim.adam.Adam,0.001,500,,5,seals/HalfCheetah-v0,output/dagger/seals_HalfCheetah-v0/20221024_192909_39894f,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,0.7,imitation.algorithms.dagger.ExponentialBetaSchedule,5,60000,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,DAgger,seals/HalfCheetah-v0,4051.72 ± 822.611 (n=64),4342.39 ± 443.082 (n=56) +,104,16,0.0001,torch.optim.adam.Adam,0.001,500,,5,seals/HalfCheetah-v0,output/dagger/seals_HalfCheetah-v0/20221024_192912_67ef85,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,0.7,imitation.algorithms.dagger.ExponentialBetaSchedule,5,60000,False,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,DAgger,seals/HalfCheetah-v0,4129.05 ± 746.065 (n=64),4068.88 ± 645.629 (n=56) diff --git a/benchmarking/results/logs_example_dagger_seals_hopper_bhp.csv b/benchmarking/results/logs_example_dagger_seals_hopper_bhp.csv new file mode 100644 index 000000000..be56fe927 --- /dev/null +++ b/benchmarking/results/logs_example_dagger_seals_hopper_bhp.csv @@ -0,0 +1,6 @@ +agent_path,seed,bc_kwargs.batch_size,bc_kwargs.l2_weight,bc_kwargs.optimizer_cls.py/type,bc_kwargs.optimizer_kwargs.lr,bc_train_kwargs.log_interval,bc_train_kwargs.n_batches,bc_train_kwargs.n_epochs,common.env_name,common.log_dir,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,dagger.beta_schedule.decay_probability,dagger.beta_schedule.py/object,dagger.rollout_round_min_episodes,dagger.total_timesteps,dagger.use_offline_rollouts,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.policy_type,train.n_episodes_eval,train.policy_cls.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,algo,env_name,expert_return_summary,imit_return_summary +,100,16,0.0001,torch.optim.adam.Adam,0.001,500,,1,seals/Hopper-v0,output/dagger/seals_Hopper-v0/20221021_204020_dd6a68,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,0.3,imitation.algorithms.dagger.ExponentialBetaSchedule,10,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,DAgger,seals/Hopper-v0,413.908 ± 56.9172 (n=112),477.137 ± 42.7627 (n=56) +,101,16,0.0001,torch.optim.adam.Adam,0.001,500,,1,seals/Hopper-v0,output/dagger/seals_Hopper-v0/20221021_204023_79244e,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,0.3,imitation.algorithms.dagger.ExponentialBetaSchedule,10,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,DAgger,seals/Hopper-v0,444.837 ± 61.4541 (n=112),434.321 ± 37.8565 (n=56) +,104,16,0.0001,torch.optim.adam.Adam,0.001,500,,1,seals/Hopper-v0,output/dagger/seals_Hopper-v0/20221021_204023_525a87,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,0.3,imitation.algorithms.dagger.ExponentialBetaSchedule,10,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,DAgger,seals/Hopper-v0,443.562 ± 67.9984 (n=112),385.186 ± 34.7564 (n=56) +,103,16,0.0001,torch.optim.adam.Adam,0.001,500,,1,seals/Hopper-v0,output/dagger/seals_Hopper-v0/20221021_204023_69c197,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,0.3,imitation.algorithms.dagger.ExponentialBetaSchedule,10,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,DAgger,seals/Hopper-v0,456.621 ± 46.3077 (n=112),453.07 ± 31.3048 (n=56) +,102,16,0.0001,torch.optim.adam.Adam,0.001,500,,1,seals/Hopper-v0,output/dagger/seals_Hopper-v0/20221021_204023_6a8cab,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,0.3,imitation.algorithms.dagger.ExponentialBetaSchedule,10,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,DAgger,seals/Hopper-v0,427.62 ± 67.3483 (n=112),462.591 ± 43.0062 (n=56) diff --git a/benchmarking/results/logs_example_dagger_seals_swimmer_bhp.csv b/benchmarking/results/logs_example_dagger_seals_swimmer_bhp.csv new file mode 100644 index 000000000..3fd3e1d50 --- /dev/null +++ b/benchmarking/results/logs_example_dagger_seals_swimmer_bhp.csv @@ -0,0 +1,6 @@ +agent_path,seed,bc_kwargs.batch_size,bc_kwargs.l2_weight,bc_kwargs.optimizer_cls.py/type,bc_kwargs.optimizer_kwargs.lr,bc_train_kwargs.log_interval,bc_train_kwargs.n_batches,bc_train_kwargs.n_epochs,common.env_name,common.log_dir.py/reduce,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,dagger.beta_schedule.py/object,dagger.beta_schedule.rampdown_rounds,dagger.rollout_round_min_episodes,dagger.total_timesteps,dagger.use_offline_rollouts,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.loader_kwargs.path,expert.policy_type,train.n_episodes_eval,train.policy_cls.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,algo,env_name,expert_return_summary,imit_return_summary +,100,16,0.0001,torch.optim.adam.Adam,0.001,500,,1,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_dagger_seals_swimmer_best_hp_eval', 'inner_239d1_00000_0_seed=100_2022-11-14_01-38-47', 'output', 'dagger', 'seals_Swimmer-v0', '20221114_013855_ec142d']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,imitation.algorithms.dagger.LinearBetaSchedule,15,3,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,DAgger,seals/Swimmer-v0,290.223 ± 10.3715 (n=102),288.126 ± 8.34982 (n=50) +,102,16,0.0001,torch.optim.adam.Adam,0.001,500,,1,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_dagger_seals_swimmer_best_hp_eval', 'inner_239d1_00002_2_seed=102_2022-11-14_01-39-00', 'output', 'dagger', 'seals_Swimmer-v0', '20221114_013910_914b23']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,imitation.algorithms.dagger.LinearBetaSchedule,15,3,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,DAgger,seals/Swimmer-v0,289.066 ± 10.4919 (n=102),291.563 ± 9.90896 (n=50) +,103,16,0.0001,torch.optim.adam.Adam,0.001,500,,1,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_dagger_seals_swimmer_best_hp_eval', 'inner_239d1_00003_3_seed=103_2022-11-14_01-39-08', 'output', 'dagger', 'seals_Swimmer-v0', '20221114_013916_06a767']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,imitation.algorithms.dagger.LinearBetaSchedule,15,3,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,DAgger,seals/Swimmer-v0,289.483 ± 9.59645 (n=102),289.95 ± 10.0327 (n=50) +,101,16,0.0001,torch.optim.adam.Adam,0.001,500,,1,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_dagger_seals_swimmer_best_hp_eval', 'inner_239d1_00001_1_seed=101_2022-11-14_01-38-53', 'output', 'dagger', 'seals_Swimmer-v0', '20221114_013902_596a0a']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,imitation.algorithms.dagger.LinearBetaSchedule,15,3,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,DAgger,seals/Swimmer-v0,287.93 ± 8.76716 (n=102),285.815 ± 10.1058 (n=50) +,104,16,0.0001,torch.optim.adam.Adam,0.001,500,,1,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_dagger_seals_swimmer_best_hp_eval', 'inner_239d1_00004_4_seed=104_2022-11-14_01-39-14', 'output', 'dagger', 'seals_Swimmer-v0', '20221114_013924_fd5c0e']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,imitation.algorithms.dagger.LinearBetaSchedule,15,3,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,DAgger,seals/Swimmer-v0,288.178 ± 8.95568 (n=102),289.018 ± 9.15658 (n=50) diff --git a/benchmarking/results/logs_example_dagger_seals_walker_bhp.csv b/benchmarking/results/logs_example_dagger_seals_walker_bhp.csv new file mode 100644 index 000000000..305b56898 --- /dev/null +++ b/benchmarking/results/logs_example_dagger_seals_walker_bhp.csv @@ -0,0 +1,6 @@ +agent_path,seed,bc_kwargs.batch_size,bc_kwargs.l2_weight,bc_kwargs.optimizer_cls.py/type,bc_kwargs.optimizer_kwargs.lr,bc_train_kwargs.log_interval,bc_train_kwargs.n_batches,bc_train_kwargs.n_epochs,common.env_name,common.log_dir.py/reduce,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,dagger.beta_schedule.decay_probability,dagger.beta_schedule.py/object,dagger.rollout_round_min_episodes,dagger.total_timesteps,dagger.use_offline_rollouts,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.loader_kwargs.path,expert.policy_type,train.n_episodes_eval,train.policy_cls.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,algo,env_name,expert_return_summary,imit_return_summary +,101,16,0.0001,torch.optim.adam.Adam,0.001,500,,5,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_dagger_seals_walker_best_hp_eval', 'inner_9b366_00001_1_seed=101_2022-11-14_00-45-42', 'output', 'dagger', 'seals_Walker2d-v0', '20221114_010211_f3e6f1']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,0.7,imitation.algorithms.dagger.ExponentialBetaSchedule,5,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,DAgger,seals/Walker2d-v0,2617.43 ± 644.979 (n=100),2603.29 ± 615.705 (n=50) +,100,16,0.0001,torch.optim.adam.Adam,0.001,500,,5,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_dagger_seals_walker_best_hp_eval', 'inner_9b366_00000_0_seed=100_2022-11-14_00-09-05', 'output', 'dagger', 'seals_Walker2d-v0', '20221114_004544_4d5105']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,0.7,imitation.algorithms.dagger.ExponentialBetaSchedule,5,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,DAgger,seals/Walker2d-v0,2603.97 ± 698.661 (n=100),2696.42 ± 584.967 (n=50) +,102,16,0.0001,torch.optim.adam.Adam,0.001,500,,5,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_dagger_seals_walker_best_hp_eval', 'inner_9b366_00002_2_seed=102_2022-11-14_01-02-09', 'output', 'dagger', 'seals_Walker2d-v0', '20221114_010930_87aa1e']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,0.7,imitation.algorithms.dagger.ExponentialBetaSchedule,5,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,DAgger,seals/Walker2d-v0,2703.58 ± 755.159 (n=100),2643.04 ± 621.008 (n=50) +,104,16,0.0001,torch.optim.adam.Adam,0.001,500,,5,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_dagger_seals_walker_best_hp_eval', 'inner_9b366_00004_4_seed=104_2022-11-14_01-12-44', 'output', 'dagger', 'seals_Walker2d-v0', '20221114_011416_4f858c']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,0.7,imitation.algorithms.dagger.ExponentialBetaSchedule,5,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,DAgger,seals/Walker2d-v0,2750.23 ± 552.364 (n=100),2656.56 ± 683.49 (n=50) +,103,16,0.0001,torch.optim.adam.Adam,0.001,500,,5,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_dagger_seals_walker_best_hp_eval', 'inner_9b366_00003_3_seed=103_2022-11-14_01-09-28', 'output', 'dagger', 'seals_Walker2d-v0', '20221114_011246_b62527']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,0.7,imitation.algorithms.dagger.ExponentialBetaSchedule,5,100000.0,False,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/policies/final/,ppo,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,DAgger,seals/Walker2d-v0,2672.75 ± 510.027 (n=100),2744.53 ± 607.177 (n=50) diff --git a/benchmarking/results/logs_example_gail_seals_ant_bhp.csv b/benchmarking/results/logs_example_gail_seals_ant_bhp.csv new file mode 100644 index 000000000..0c538e889 --- /dev/null +++ b/benchmarking/results/logs_example_gail_seals_ant_bhp.csv @@ -0,0 +1,6 @@ +agent_path,checkpoint_interval,seed,show_config,total_timesteps,algorithm_kwargs.demo_batch_size,algorithm_kwargs.gen_replay_buffer_capacity,algorithm_kwargs.n_disc_updates_per_round,common.env_name,common.log_dir,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.policy_type,reward.add_std_alpha,reward.ensemble_size,reward.net_cls.py/type,reward.net_kwargs.normalize_input_layer.py/type,reward.normalize_output_layer.py/type,rl.batch_size,rl.rl_cls.py/type,rl.rl_kwargs.batch_size,rl.rl_kwargs.clip_range,rl.rl_kwargs.ent_coef,rl.rl_kwargs.gae_lambda,rl.rl_kwargs.gamma,rl.rl_kwargs.learning_rate,rl.rl_kwargs.max_grad_norm,rl.rl_kwargs.n_epochs,rl.rl_kwargs.vf_coef,train.n_episodes_eval,train.policy_cls.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,algo,env_name,expert_return_summary,imit_return_summary +,0,102,False,10000000.0,32,16384,8,seals/Ant-v0,output/gail/seals_Ant-v0/20221022_170527_c06945,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,16,0.3,0.008871887607426377,0.8,0.995,2.428297806883194e-05,0.9,10,0.4351450387648799,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,GAIL,seals/Ant-v0,2408.22 ± 665.201 (n=104),1649.79 ± 447.102 (n=56) +,0,104,False,10000000.0,32,16384,8,seals/Ant-v0,output/gail/seals_Ant-v0/20221022_171143_0c5a14,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,16,0.3,0.008871887607426377,0.8,0.995,2.428297806883194e-05,0.9,10,0.4351450387648799,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,GAIL,seals/Ant-v0,2408.22 ± 665.201 (n=104),2377.56 ± 615.104 (n=56) +,0,101,False,10000000.0,32,16384,8,seals/Ant-v0,output/gail/seals_Ant-v0/20221022_164822_bf165a,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,16,0.3,0.008871887607426377,0.8,0.995,2.428297806883194e-05,0.9,10,0.4351450387648799,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,GAIL,seals/Ant-v0,2408.22 ± 665.201 (n=104),2494.22 ± 494.241 (n=56) +,0,103,False,10000000.0,32,16384,8,seals/Ant-v0,output/gail/seals_Ant-v0/20221022_171019_da32dd,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,16,0.3,0.008871887607426377,0.8,0.995,2.428297806883194e-05,0.9,10,0.4351450387648799,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,GAIL,seals/Ant-v0,2408.22 ± 665.201 (n=104),1789.58 ± 825.401 (n=56) +,0,100,False,10000000.0,32,16384,8,seals/Ant-v0,output/gail/seals_Ant-v0/20221022_154828_32c746,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_ant_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,16,0.3,0.008871887607426377,0.8,0.995,2.428297806883194e-05,0.9,10,0.4351450387648799,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,GAIL,seals/Ant-v0,2408.22 ± 665.201 (n=104),2320.07 ± 571.159 (n=56) diff --git a/benchmarking/results/logs_example_gail_seals_half_cheetah_bhp.csv b/benchmarking/results/logs_example_gail_seals_half_cheetah_bhp.csv new file mode 100644 index 000000000..0a1f3e7c6 --- /dev/null +++ b/benchmarking/results/logs_example_gail_seals_half_cheetah_bhp.csv @@ -0,0 +1,6 @@ +agent_path,checkpoint_interval,seed,show_config,total_timesteps,algorithm_kwargs.demo_batch_size,algorithm_kwargs.gen_replay_buffer_capacity,algorithm_kwargs.n_disc_updates_per_round,common.env_name,common.log_dir,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.policy_type,reward.add_std_alpha,reward.ensemble_size,reward.net_cls.py/type,reward.net_kwargs.normalize_input_layer.py/type,reward.normalize_output_layer.py/type,rl.batch_size,rl.rl_cls.py/type,rl.rl_kwargs.batch_size,rl.rl_kwargs.clip_range,rl.rl_kwargs.ent_coef,rl.rl_kwargs.gae_lambda,rl.rl_kwargs.gamma,rl.rl_kwargs.learning_rate,rl.rl_kwargs.max_grad_norm,rl.rl_kwargs.n_epochs,rl.rl_kwargs.vf_coef,train.n_episodes_eval,train.policy_cls.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,algo,env_name,expert_return_summary,imit_return_summary +,0,102,False,10000000.0,8192,512,8,seals/HalfCheetah-v0,output/gail/seals_HalfCheetah-v0/20221021_002918_4b9b24,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,64,0.1,3.992371122209408e-06,0.95,0.95,0.00026250519057717037,0.8,5,0.11483689492120866,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,GAIL,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),3174.02 ± 940.62 (n=56) +,0,103,False,10000000.0,8192,512,8,seals/HalfCheetah-v0,output/gail/seals_HalfCheetah-v0/20221021_002959_f202b3,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,64,0.1,3.992371122209408e-06,0.95,0.95,0.00026250519057717037,0.8,5,0.11483689492120866,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,GAIL,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),3165.06 ± 819.894 (n=56) +,0,101,False,10000000.0,8192,512,8,seals/HalfCheetah-v0,output/gail/seals_HalfCheetah-v0/20221021_002637_97ec09,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,64,0.1,3.992371122209408e-06,0.95,0.95,0.00026250519057717037,0.8,5,0.11483689492120866,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,GAIL,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),2917.02 ± 998.621 (n=56) +,0,104,False,10000000.0,8192,512,8,seals/HalfCheetah-v0,output/gail/seals_HalfCheetah-v0/20221021_003011_9f8d5c,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,64,0.1,3.992371122209408e-06,0.95,0.95,0.00026250519057717037,0.8,5,0.11483689492120866,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,GAIL,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),2840.81 ± 1171.5 (n=56) +,0,100,False,10000000.0,8192,512,8,seals/HalfCheetah-v0,output/gail/seals_HalfCheetah-v0/20221021_001643_0ab3dd,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-09-05T18:27:27-07:00/seals_half_cheetah_1/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,64,0.1,3.992371122209408e-06,0.95,0.95,0.00026250519057717037,0.8,5,0.11483689492120866,50,imitation.policies.base.FeedForward32Policy,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,GAIL,seals/HalfCheetah-v0,3465.42 ± 976.462 (n=104),2952.95 ± 650.494 (n=56) diff --git a/benchmarking/results/logs_example_gail_seals_hopper_bhp.csv b/benchmarking/results/logs_example_gail_seals_hopper_bhp.csv new file mode 100644 index 000000000..1674d6508 --- /dev/null +++ b/benchmarking/results/logs_example_gail_seals_hopper_bhp.csv @@ -0,0 +1,6 @@ +agent_path,checkpoint_interval,seed,show_config,total_timesteps,algorithm_kwargs.demo_batch_size,algorithm_kwargs.gen_replay_buffer_capacity,algorithm_kwargs.n_disc_updates_per_round,common.env_name,common.log_dir,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.policy_type,reward.add_std_alpha,reward.ensemble_size,reward.net_cls.py/type,reward.net_kwargs.normalize_input_layer.py/type,reward.normalize_output_layer.py/type,rl.batch_size,rl.rl_cls.py/type,rl.rl_kwargs.batch_size,rl.rl_kwargs.clip_range,rl.rl_kwargs.ent_coef,rl.rl_kwargs.gae_lambda,rl.rl_kwargs.gamma,rl.rl_kwargs.learning_rate,rl.rl_kwargs.max_grad_norm,rl.rl_kwargs.n_epochs,rl.rl_kwargs.vf_coef,train.n_episodes_eval,train.policy_cls,train.policy_kwargs.activation_fn.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,train.policy_kwargs.net_arch,algo,env_name,expert_return_summary,imit_return_summary +,0,100,False,10000000.0,128,4096,8,seals/Hopper-v0,output/gail/seals_Hopper-v0/20221022_081326_aaa4d4,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,512,0.1,0.001255299425412744,0.98,0.995,4.3984856156897565e-05,0.9,20,0.20315938606555833,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2539.6 ± 160.69 (n=56) +,0,101,False,10000000.0,128,4096,8,seals/Hopper-v0,output/gail/seals_Hopper-v0/20221022_081328_1544bd,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,512,0.1,0.001255299425412744,0.98,0.995,4.3984856156897565e-05,0.9,20,0.20315938606555833,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2681.16 ± 121.442 (n=56) +,0,102,False,10000000.0,128,4096,8,seals/Hopper-v0,output/gail/seals_Hopper-v0/20221022_081327_67142d,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,512,0.1,0.001255299425412744,0.98,0.995,4.3984856156897565e-05,0.9,20,0.20315938606555833,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2765.27 ± 134.75 (n=56) +,0,103,False,10000000.0,128,4096,8,seals/Hopper-v0,output/gail/seals_Hopper-v0/20221022_081327_5c0a51,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,512,0.1,0.001255299425412744,0.98,0.995,4.3984856156897565e-05,0.9,20,0.20315938606555833,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2679.83 ± 133.841 (n=56) +,0,104,False,10000000.0,128,4096,8,seals/Hopper-v0,output/gail/seals_Hopper-v0/20221022_081327_641c89,"['tensorboard', 'stdout', 'wandb']",,20,,,8,True,False,algorithm-benchmark,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_hopper_2/rollouts/final.pkl,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,512,0.1,0.001255299425412744,0.98,0.995,4.3984856156897565e-05,0.9,20,0.20315938606555833,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Hopper-v0,2630.92 ± 112.582 (n=104),2691.2 ± 148.312 (n=56) diff --git a/benchmarking/results/logs_example_gail_seals_swimmer_bhp.csv b/benchmarking/results/logs_example_gail_seals_swimmer_bhp.csv new file mode 100644 index 000000000..15119028d --- /dev/null +++ b/benchmarking/results/logs_example_gail_seals_swimmer_bhp.csv @@ -0,0 +1,6 @@ +agent_path,checkpoint_interval,seed,show_config,total_timesteps,algorithm_kwargs.demo_batch_size,algorithm_kwargs.gen_replay_buffer_capacity,algorithm_kwargs.n_disc_updates_per_round,common.env_name,common.log_dir.py/reduce,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.loader_kwargs.env_name,expert.loader_kwargs.organization,expert.policy_type,reward.add_std_alpha,reward.ensemble_size,reward.net_cls.py/type,reward.net_kwargs.normalize_input_layer.py/type,reward.normalize_output_layer.py/type,rl.batch_size,rl.rl_cls.py/type,rl.rl_kwargs.batch_size,rl.rl_kwargs.clip_range,rl.rl_kwargs.ent_coef,rl.rl_kwargs.gae_lambda,rl.rl_kwargs.gamma,rl.rl_kwargs.learning_rate,rl.rl_kwargs.max_grad_norm,rl.rl_kwargs.n_epochs,rl.rl_kwargs.vf_coef,train.n_episodes_eval,train.policy_cls,train.policy_kwargs.activation_fn.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,train.policy_kwargs.net_arch,algo,env_name,expert_return_summary,imit_return_summary +,0,103,False,10000000.0,32,4096,16,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_gail_seals_swimmer_best_hp_eval', 'inner_14cf1_00003_3_seed=103_2022-11-03_07-52-12', 'output', 'gail', 'seals_Swimmer-v0', '20221103_075221_8d85d9']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,seals/Swimmer-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,64,0.1,2.257758693006348e-06,0.95,0.999,2.0190030388504567e-05,2,5,0.6162112311062333,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),295.837 ± 7.8336 (n=50) +,0,100,False,10000000.0,32,4096,16,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_gail_seals_swimmer_best_hp_eval', 'inner_14cf1_00000_0_seed=100_2022-11-03_07-52-05', 'output', 'gail', 'seals_Swimmer-v0', '20221103_075214_8ceb71']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,seals/Swimmer-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,64,0.1,2.257758693006348e-06,0.95,0.999,2.0190030388504567e-05,2,5,0.6162112311062333,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),292.676 ± 11.1014 (n=50) +,0,102,False,10000000.0,32,4096,16,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_gail_seals_swimmer_best_hp_eval', 'inner_14cf1_00002_2_seed=102_2022-11-03_07-52-12', 'output', 'gail', 'seals_Swimmer-v0', '20221103_075221_d6a329']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,seals/Swimmer-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,64,0.1,2.257758693006348e-06,0.95,0.999,2.0190030388504567e-05,2,5,0.6162112311062333,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),291.416 ± 10.8008 (n=50) +,0,101,False,10000000.0,32,4096,16,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_gail_seals_swimmer_best_hp_eval', 'inner_14cf1_00001_1_seed=101_2022-11-03_07-52-10', 'output', 'gail', 'seals_Swimmer-v0', '20221103_075215_2cc4e0']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,seals/Swimmer-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,64,0.1,2.257758693006348e-06,0.95,0.999,2.0190030388504567e-05,2,5,0.6162112311062333,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),297.635 ± 8.87094 (n=50) +,0,104,False,10000000.0,32,4096,16,seals/Swimmer-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_gail_seals_swimmer_best_hp_eval', 'inner_14cf1_00004_4_seed=104_2022-11-03_07-52-12', 'output', 'gail', 'seals_Swimmer-v0', '20221103_075221_65562a']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_swimmer_0/rollouts/final.pkl,seals/Swimmer-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,4096,stable_baselines3.ppo.ppo.PPO,64,0.1,2.257758693006348e-06,0.95,0.999,2.0190030388504567e-05,2,5,0.6162112311062333,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Swimmer-v0,298.247 ± 7.80207 (n=104),293.427 ± 10.7178 (n=50) diff --git a/benchmarking/results/logs_example_gail_seals_walker_bhp.csv b/benchmarking/results/logs_example_gail_seals_walker_bhp.csv new file mode 100644 index 000000000..818a6a7b0 --- /dev/null +++ b/benchmarking/results/logs_example_gail_seals_walker_bhp.csv @@ -0,0 +1,6 @@ +agent_path,checkpoint_interval,seed,show_config,total_timesteps,algorithm_kwargs.demo_batch_size,algorithm_kwargs.gen_replay_buffer_capacity,algorithm_kwargs.n_disc_updates_per_round,common.env_name,common.log_dir.py/reduce,common.log_format_strs,common.log_format_strs_additional.wandb,common.log_level,common.log_root,common.max_episode_steps,common.num_vec,common.parallel,common.wandb.wandb_kwargs.monitor_gym,common.wandb.wandb_kwargs.project,common.wandb.wandb_kwargs.save_code,common.wandb.wandb_name_prefix,common.wandb.wandb_tag,demonstrations.n_expert_demos,demonstrations.rollout_path,expert.loader_kwargs.env_name,expert.loader_kwargs.organization,expert.policy_type,reward.add_std_alpha,reward.ensemble_size,reward.net_cls.py/type,reward.net_kwargs.normalize_input_layer.py/type,reward.normalize_output_layer.py/type,rl.batch_size,rl.rl_cls.py/type,rl.rl_kwargs.batch_size,rl.rl_kwargs.clip_range,rl.rl_kwargs.ent_coef,rl.rl_kwargs.gae_lambda,rl.rl_kwargs.gamma,rl.rl_kwargs.learning_rate,rl.rl_kwargs.max_grad_norm,rl.rl_kwargs.n_epochs,rl.rl_kwargs.vf_coef,train.n_episodes_eval,train.policy_cls,train.policy_kwargs.activation_fn.py/type,train.policy_kwargs.features_extractor_class.py/type,train.policy_kwargs.features_extractor_kwargs.normalize_class.py/type,train.policy_kwargs.net_arch,algo,env_name,expert_return_summary,imit_return_summary +,0,104,False,10000000.0,512,16384,16,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_gail_seals_walker_best_hp_eval', 'inner_c8027_00004_4_seed=104_2022-11-09_06-21-24', 'output', 'gail', 'seals_Walker2d-v0', '20221109_062130_84fd94']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,seals/Walker2d-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,128,0.4,0.0007566389899529574,0.92,0.98,1.943992487657563e-05,0.6,20,0.6167177795726859,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),2720.91 ± 466.367 (n=50) +,0,102,False,10000000.0,512,16384,16,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_gail_seals_walker_best_hp_eval', 'inner_c8027_00002_2_seed=102_2022-11-09_06-21-24', 'output', 'gail', 'seals_Walker2d-v0', '20221109_062130_471aeb']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,seals/Walker2d-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,128,0.4,0.0007566389899529574,0.92,0.98,1.943992487657563e-05,0.6,20,0.6167177795726859,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),2600.9 ± 565.618 (n=50) +,0,100,False,10000000.0,512,16384,16,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_gail_seals_walker_best_hp_eval', 'inner_c8027_00000_0_seed=100_2022-11-09_06-21-22', 'output', 'gail', 'seals_Walker2d-v0', '20221109_062128_c33939']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,seals/Walker2d-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,128,0.4,0.0007566389899529574,0.92,0.98,1.943992487657563e-05,0.6,20,0.6167177795726859,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),2539.56 ± 651.114 (n=50) +,0,103,False,10000000.0,512,16384,16,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_gail_seals_walker_best_hp_eval', 'inner_c8027_00003_3_seed=103_2022-11-09_06-21-24', 'output', 'gail', 'seals_Walker2d-v0', '20221109_062130_1ac751']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,seals/Walker2d-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,128,0.4,0.0007566389899529574,0.92,0.98,1.943992487657563e-05,0.6,20,0.6167177795726859,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),2712.06 ± 608.339 (n=50) +,0,101,False,10000000.0,512,16384,16,seals/Walker2d-v0,"[{'py/type': 'pathlib.PosixPath'}, {'py/tuple': ['/', 'home', 'taufeeque', 'ray_results', 'example_gail_seals_walker_best_hp_eval', 'inner_c8027_00001_1_seed=101_2022-11-09_06-21-24', 'output', 'gail', 'seals_Walker2d-v0', '20221109_062129_262d36']}]","['tensorboard', 'stdout', 'wandb']",,20,,,1,True,False,imitation,False,,,,/home/taufeeque/imitation/output/train_experts/2022-10-11T06:27:42-07:00/seals_walker_3/rollouts/final.pkl,seals/Walker2d-v0,HumanCompatibleAI,ppo-huggingface,,,imitation.rewards.reward_nets.BasicRewardNet,imitation.util.networks.RunningNorm,imitation.util.networks.RunningNorm,16384,stable_baselines3.ppo.ppo.PPO,128,0.4,0.0007566389899529574,0.92,0.98,1.943992487657563e-05,0.6,20,0.6167177795726859,50,MlpPolicy,torch.nn.modules.activation.ReLU,imitation.policies.base.NormalizeFeaturesExtractor,imitation.util.networks.RunningNorm,"[{'pi': [64, 64], 'vf': [64, 64]}]",GAIL,seals/Walker2d-v0,2672.96 ± 639.375 (n=104),2642.98 ± 454.699 (n=50) diff --git a/src/imitation/scripts/analyze.py b/src/imitation/scripts/analyze.py index b63538f6d..803d79cf5 100644 --- a/src/imitation/scripts/analyze.py +++ b/src/imitation/scripts/analyze.py @@ -152,16 +152,20 @@ def _get_exp_command(sd: sacred_util.SacredDicts) -> str: def _get_algo_name(sd: sacred_util.SacredDicts) -> str: exp_command = _get_exp_command(sd) - if exp_command == "gail": - return "GAIL" - elif exp_command == "airl": - return "AIRL" - elif exp_command == "train_bc": - return "BC" - elif exp_command == "train_dagger": - return "DAgger" + COMMAND_TO_ALGO = { + "train_bc": "BC", + "bc": "BC", + "train_dagger": "DAgger", + "dagger": "DAgger", + "gail": "GAIL", + "airl": "AIRL", + "preference_comparisons": "Preference Comparisons", + } + + if exp_command.lower() in COMMAND_TO_ALGO.keys(): + return COMMAND_TO_ALGO[exp_command.lower()] else: - return f"??exp_command={exp_command}" + raise ValueError(f"Unknown command: {exp_command}") def _return_summaries(sd: sacred_util.SacredDicts) -> dict: diff --git a/src/imitation/scripts/compare_to_baseline.py b/src/imitation/scripts/compare_to_baseline.py new file mode 100644 index 000000000..59436a621 --- /dev/null +++ b/src/imitation/scripts/compare_to_baseline.py @@ -0,0 +1,91 @@ +"""Compare experiment results to baseline results. + +This script compares experiment results to the results reported in the +[paper](https://arxiv.org/pdf/2211.11972.pdf). It takes as input a CSV file +containing experiment results, and outputs a table of p-values comparing the experiment +results to the baseline results. + +Usage: + $ python compare_to_baseline.py + +The results file should be a CSV file containing the following columns: + * algo: The name of the imitation algorithm. + * env_name: The name of the environment. + * imit_return_summary: A string containing the mean and standard deviation of the + experiment returns, as reported by `imitation.scripts.analyze`. +""" + +import glob + +import pandas as pd +import scipy + +from imitation.data import types + + +def compare_results_to_baseline(results_filename: types.AnyPath) -> pd.DataFrame: + """Compare benchmark results to baseline results. + + Args: + results_filename: Path to a CSV file containing experiment results. + + Returns: + A string containing a table of p-values comparing the experiment results to + the baseline results. + """ + results_summary = load_and_summarize_csv(results_filename) + + baseline_filenames = glob.glob("benchmarking/results/*.csv") + baseline_dfs = [load_and_summarize_csv(filename) for filename in baseline_filenames] + baseline_summary = pd.concat(baseline_dfs) + + comparison = pd.merge(results_summary, baseline_summary, on=["algo", "env_name"]) + + comparison["pvalue"] = scipy.stats.ttest_ind_from_stats( + comparison["mean_x"], + comparison["std_x"], + comparison["count_x"], + comparison["mean_y"], + comparison["std_y"], + comparison["count_y"], + ).pvalue + + return comparison[["algo", "env_name", "pvalue"]] + + +def load_and_summarize_csv(results_filename: types.AnyPath) -> pd.DataFrame: + """Load a results CSV file and summarize the statistics. + + Args: + results_filename: Path to a CSV file containing experiment results. + + Returns: + A DataFrame containing the mean and standard deviation of the experiment + returns, grouped by algorithm and environment. + """ + data = pd.read_csv(results_filename) + data["imit_return"] = data["imit_return_summary"].apply( + lambda x: float(x.split(" ")[0]), + ) + summary = ( + data[["algo", "env_name", "imit_return"]] + .groupby(["algo", "env_name"]) + .describe() + ) + summary.columns = summary.columns.get_level_values(1) + summary = summary.reset_index() + return summary + + +def main() -> None: # pragma: no cover + """Run the script.""" + import sys + + if len(sys.argv) != 2: + print("Supply a path to a results file") + else: + print(compare_results_to_baseline(sys.argv[1]).to_string()) + + +if __name__ == "__main__": + main() diff --git a/tests/scripts/test_scripts.py b/tests/scripts/test_scripts.py index ae39116e7..3fb8026b1 100644 --- a/tests/scripts/test_scripts.py +++ b/tests/scripts/test_scripts.py @@ -33,6 +33,7 @@ from imitation.rewards import reward_nets from imitation.scripts import ( analyze, + compare_to_baseline, convert_trajs, eval_policy, parallel, @@ -1096,3 +1097,42 @@ def test_convert_trajs_from_current_format_is_idempotent( assert ( filecmp.dircmp(converted_path, original_path).diff_files == [] ), "convert_trajs not idempotent" + + +@pytest.mark.parametrize( + "imit_returns,p_value", + [ + ( + [2000, 1900, 2100], + 0.8, + ), + ( + [1000, 900, 1100], + 0.05, + ), + ], +) +def test_compare_to_baseline_p_values( + tmpdir: str, + imit_returns: List[float], + p_value: float, +): + comparison = pd.DataFrame.from_records( + [ + { + "algo": "BC", + "env_name": "seals/Ant-v0", + "imit_return_summary": f"{imit_return} +/- 0.0", + } + for imit_return in imit_returns + ], + ) + tmpfile = pathlib.Path(tmpdir) / "comparison.csv" + comparison.to_csv(tmpfile) + + assert ( + compare_to_baseline.compare_results_to_baseline(results_filename=tmpfile)[ + "pvalue" + ][0] + < p_value + )