diff --git a/.cache/v/cache/lastfailed b/.cache/v/cache/lastfailed new file mode 100644 index 0000000..6effa1d --- /dev/null +++ b/.cache/v/cache/lastfailed @@ -0,0 +1,3 @@ +{ + "tests/test_multiagent_representations.py": true +} \ No newline at end of file diff --git a/control_pcgrl/configs/config.py b/control_pcgrl/configs/config.py index cb1883e..6ecd1ea 100644 --- a/control_pcgrl/configs/config.py +++ b/control_pcgrl/configs/config.py @@ -32,6 +32,15 @@ class BinaryPathConfig(ProblemConfig): name: str = 'binary' # Regions weight will be 0 by default. weights: Dict[str, int] = field(default_factory = lambda: ({ + # 'player': 1, + # 'create': 1, + # 'target': 1, + # 'regions': 1, + # 'ratio': 1, + # 'dist-win': 1, + # 'sol-length': 2 + + 'path-length': 100, })) @@ -51,6 +60,8 @@ class BinaryControlConfig(ProblemConfig): @dataclass class MultiagentConfig: n_agents: int = MISSING + # valid values: (shared, independent, JSON string) + policies: str = "centralized" # use shared weights by default @dataclass @@ -107,6 +118,7 @@ class ControlPCGRLConfig: multiagent: MultiagentConfig = MISSING problem: ProblemConfig = MISSING + algorithm: str = 'PPO' debug: bool = False render: bool = False infer: bool = False @@ -117,6 +129,7 @@ class ControlPCGRLConfig: exp_id: str = '0' representation: str = 'turtle' + show_agents: bool = False learning_rate: float = 5e-6 gamma: float = 0.99 map_shape: List[Any] = field(default_factory=lambda: @@ -158,4 +171,4 @@ class ControlPCGRLConfig: cs.store(name="binary_path", group="problem", node=BinaryPathConfig) cs.store(name="default_model", group="model", node=ModelConfig) -cs.store(name="seqnca", group="model", node=SeqNCAConfig) \ No newline at end of file +cs.store(name="seqnca", group="model", node=SeqNCAConfig) diff --git a/control_pcgrl/configs/config.yaml b/control_pcgrl/configs/config.yaml index 8d412bd..189ae22 100644 --- a/control_pcgrl/configs/config.yaml +++ b/control_pcgrl/configs/config.yaml @@ -3,22 +3,22 @@ defaults: - _self_ # Why can't we override this on the command line? - # - override hydra/launcher: submitit_local + #- override hydra/launcher: submitit_local - override hydra/launcher: submitit_slurm hydra: sweeper: params: - exp_id: 0, 1, 2 - learning_rate: 5e-4, 1e-4, 5e-5, 1e-5, 5e-6, 1e-6 + #exp_id: 0, 1, 2 + learning_rate: 5e-5 launcher: tasks_per_node: 1 #FIXME: Can't set this to 1 or 2 even when only asking from 1 ("0") worker from ray... - cpus_per_task: 10 + cpus_per_task: 22 gpus_per_node: 1 - timeout_min: 1440 + timeout_min: 1440 # 1 days of training mem_gb: 30 # Emails maybe? diff --git a/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696/27482696_submission.sh b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696/27482696_submission.sh new file mode 100644 index 0000000..1ae510b --- /dev/null +++ b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696/27482696_submission.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +# Parameters +#SBATCH --array=0-3%4 +#SBATCH --cpus-per-task=10 +#SBATCH --error=/scratch/rd2893/control-pcgrl/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/%A_%a/%A_%a_0_log.err +#SBATCH --gpus-per-node=1 +#SBATCH --job-name=train_ctrl +#SBATCH --mem=30GB +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=1 +#SBATCH --open-mode=append +#SBATCH --output=/scratch/rd2893/control-pcgrl/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/%A_%a/%A_%a_0_log.out +#SBATCH --signal=USR2@120 +#SBATCH --time=1440 +#SBATCH --wckey=submitit + +# command +export SUBMITIT_EXECUTOR=slurm +srun --unbuffered --output /scratch/rd2893/control-pcgrl/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/%A_%a/%A_%a_%t_log.out --error /scratch/rd2893/control-pcgrl/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/%A_%a/%A_%a_%t_log.err /scratch/rd2893/miniconda3/envs/pcgrl/bin/python -u -m submitit.core._submit /scratch/rd2893/control-pcgrl/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/%j diff --git a/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_0/27482696_0_0_log.err b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_0/27482696_0_0_log.err new file mode 100644 index 0000000..8e48be2 --- /dev/null +++ b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_0/27482696_0_0_log.err @@ -0,0 +1,70 @@ +/scratch/rd2893/miniconda3/envs/pcgrl/lib/python3.7/site-packages/gym/utils/passive_env_checker.py:32: UserWarning: WARN: A Box observation space has an unconventional shape (neither an image, nor a 1D vector). We recommend flattening the observation to have only a 1D vector or use a custom policy to properly process the data. Actual observation shape: (16, 16) + "A Box observation space has an unconventional shape (neither an image, nor a 1D vector). " +2022-11-29 15:05:25,106 WARNING env.py:236 -- Your MultiAgentEnv >>>>>>>> does not have some or all of the needed base-class attributes! Make sure you call `super().__init__` from within your MutiAgentEnv's constructor. This will raise an error in the future. +2022-11-29 15:05:27,892 INFO worker.py:1518 -- Started a local Ray instance. +(PPOTrainer pid=3593784) 2022-11-29 15:05:35,626 INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you. +(PPOTrainer pid=3593784) 2022-11-29 15:05:35,628 INFO algorithm.py:358 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags. +(PPOTrainer pid=3593784) /scratch/rd2893/miniconda3/envs/pcgrl/lib/python3.7/site-packages/gym/utils/passive_env_checker.py:32: UserWarning: WARN: A Box observation space has an unconventional shape (neither an image, nor a 1D vector). We recommend flattening the observation to have only a 1D vector or use a custom policy to properly process the data. Actual observation shape: (16, 16) +(PPOTrainer pid=3593784) "A Box observation space has an unconventional shape (neither an image, nor a 1D vector). " +(PPOTrainer pid=3593784) 2022-11-29 15:05:42,133 WARNING deprecation.py:48 -- DeprecationWarning: `simple_optimizer` has been deprecated. This will raise an error in the future! +(PPOTrainer pid=3593784) 2022-11-29 15:05:42,337 WARNING util.py:66 -- Install gputil for GPU system monitoring. +(PPOTrainer pid=3593784) 2022-11-29 15:05:42,421 WARNING deprecation.py:48 -- DeprecationWarning: `policy_mapping_fn(agent_id)` has been deprecated. Use `policy_mapping_fn(agent_id, episode, worker, **kwargs)` instead. This will raise an error in the future! +(PPOTrainer pid=3593784) 2022-11-29 15:05:44,433 WARNING deprecation.py:48 -- DeprecationWarning: `concat_samples` has been deprecated. Use `concat_samples() from rllib.policy.sample_batch` instead. This will raise an error in the future! +(PPOTrainer pid=3593784) /scratch/rd2893/miniconda3/envs/pcgrl/lib/python3.7/site-packages/ray/rllib/utils/metrics/learner_info.py:110: RuntimeWarning: Mean of empty slice +(PPOTrainer pid=3593784) return np.nanmean(tower_data) +2022-11-30 01:46:43,745 WARNING util.py:244 -- The `callbacks.on_trial_result` operation took 7.095 s, which may be a performance bottleneck. +2022-11-30 01:46:43,747 WARNING util.py:244 -- The `process_trial_result` operation took 7.099 s, which may be a performance bottleneck. +2022-11-30 01:46:43,747 WARNING util.py:244 -- Processing trial results took 7.099 s, which may be a performance bottleneck. Please consider reporting results less frequently to Ray Tune. +2022-11-30 01:46:43,747 WARNING util.py:244 -- The `process_trial_result` operation took 7.100 s, which may be a performance bottleneck. +2022-11-30 01:48:00,324 WARNING util.py:244 -- The `callbacks.on_trial_result` operation took 9.173 s, which may be a performance bottleneck. +2022-11-30 01:48:00,327 WARNING util.py:244 -- The `process_trial_result` operation took 9.178 s, which may be a performance bottleneck. +2022-11-30 01:48:00,331 WARNING util.py:244 -- Processing trial results took 9.182 s, which may be a performance bottleneck. Please consider reporting results less frequently to Ray Tune. +2022-11-30 01:48:00,332 WARNING util.py:244 -- The `process_trial_result` operation took 9.184 s, which may be a performance bottleneck. +2022-11-30 01:50:26,182 WARNING util.py:244 -- The `callbacks.on_trial_result` operation took 6.208 s, which may be a performance bottleneck. +2022-11-30 01:50:26,184 WARNING util.py:244 -- The `process_trial_result` operation took 6.212 s, which may be a performance bottleneck. +2022-11-30 01:50:26,184 WARNING util.py:244 -- Processing trial results took 6.212 s, which may be a performance bottleneck. Please consider reporting results less frequently to Ray Tune. +2022-11-30 01:50:26,184 WARNING util.py:244 -- The `process_trial_result` operation took 6.214 s, which may be a performance bottleneck. +submitit WARNING (2022-11-30 15:02:50,873) - Caught signal SIGUSR2 on gv002.hpc.nyu.edu: this job is timed-out. +submitit WARNING (2022-11-30 15:02:51,153) - Bypassing signal SIGCONT +Traceback (most recent call last): + File "/scratch/rd2893/miniconda3/envs/pcgrl/lib/python3.7/site-packages/submitit/core/job_environment.py", line 226, in checkpoint_and_try_requeue + raise utils.UncompletedJobError(message) +submitit.core.utils.UncompletedJobError: Job not requeued because: timed-out too many times. +Exception ignored in: 'ray._raylet.check_signals' +Traceback (most recent call last): + File "/scratch/rd2893/miniconda3/envs/pcgrl/lib/python3.7/site-packages/submitit/core/job_environment.py", line 226, in checkpoint_and_try_requeue + raise utils.UncompletedJobError(message) +submitit.core.utils.UncompletedJobError: Job not requeued because: timed-out too many times. +slurmstepd: error: *** JOB 27482702 ON gv002 CANCELLED AT 2022-11-30T15:05:22 DUE TO TIME LIMIT *** +slurmstepd: error: *** STEP 27482702.0 ON gv002 CANCELLED AT 2022-11-30T15:05:22 DUE TO TIME LIMIT *** +*** SIGTERM received at time=1669838722 on cpu 6 *** +[failure_signal_handler.cc : 331] RAW: Signal 15 raised at PC=0x14df1d5017b0 while already in AbslFailureSignalHandler() +*** SIGTERM received at time=1669838722 on cpu 6 *** +PC: @ 0x14df1d5017b0 (unknown) absl::lts_20211102::debugging_internal::ParseMangledName() + @ 0x14df25374b20 3424 (unknown) + @ 0x14df1d4fbf78 32 absl::lts_20211102::debugging_internal::DemangleInplace() + @ 0x14df1d4fd4b2 1296 absl::lts_20211102::debugging_internal::(anonymous namespace)::Symbolizer::GetSymbol() + @ 0x14df1d4fd7fe 80 absl::lts_20211102::Symbolize() + @ 0x14df1d4d162d 2144 absl::lts_20211102::debugging_internal::DumpPCAndFrameSizeAndSymbol() + @ 0x14df1d4d1751 192 absl::lts_20211102::debugging_internal::DumpPCAndFrameSizesAndStackTrace() + @ 0x14df1d4d10c6 496 absl::lts_20211102::WriteStackTrace() + @ 0x14df1d4d12d1 80 absl::lts_20211102::AbslFailureSignalHandler() + @ 0x14df25374b20 (unknown) (unknown) +[2022-11-30 15:05:22,384 E 3592250 3592250] logging.cc:361: *** SIGTERM received at time=1669838722 on cpu 6 *** +[2022-11-30 15:05:22,384 E 3592250 3592250] logging.cc:361: PC: @ 0x14df1d5017b0 (unknown) absl::lts_20211102::debugging_internal::ParseMangledName() +[2022-11-30 15:05:22,384 E 3592250 3592250] logging.cc:361: @ 0x14df25374b20 3424 (unknown) +[2022-11-30 15:05:22,384 E 3592250 3592250] logging.cc:361: @ 0x14df1d4fbf78 32 absl::lts_20211102::debugging_internal::DemangleInplace() +[2022-11-30 15:05:22,384 E 3592250 3592250] logging.cc:361: @ 0x14df1d4fd4b2 1296 absl::lts_20211102::debugging_internal::(anonymous namespace)::Symbolizer::GetSymbol() +[2022-11-30 15:05:22,384 E 3592250 3592250] logging.cc:361: @ 0x14df1d4fd7fe 80 absl::lts_20211102::Symbolize() +[2022-11-30 15:05:22,384 E 3592250 3592250] logging.cc:361: @ 0x14df1d4d162d 2144 absl::lts_20211102::debugging_internal::DumpPCAndFrameSizeAndSymbol() +[2022-11-30 15:05:22,384 E 3592250 3592250] logging.cc:361: @ 0x14df1d4d1751 192 absl::lts_20211102::debugging_internal::DumpPCAndFrameSizesAndStackTrace() +[2022-11-30 15:05:22,384 E 3592250 3592250] logging.cc:361: @ 0x14df1d4d10c6 496 absl::lts_20211102::WriteStackTrace() +[2022-11-30 15:05:22,384 E 3592250 3592250] logging.cc:361: @ 0x14df1d4d12d1 80 absl::lts_20211102::AbslFailureSignalHandler() +[2022-11-30 15:05:22,385 E 3592250 3592250] logging.cc:361: @ 0x14df25374b20 (unknown) (unknown) +PC: @ 0x14df2537064a (unknown) pthread_cond_timedwait@@GLIBC_2.3.2 + @ 0x14df25374b20 (unknown) (unknown) +[2022-11-30 15:05:22,385 E 3592250 3592250] logging.cc:361: *** SIGTERM received at time=1669838722 on cpu 6 *** +[2022-11-30 15:05:22,385 E 3592250 3592250] logging.cc:361: PC: @ 0x14df2537064a (unknown) pthread_cond_timedwait@@GLIBC_2.3.2 +[2022-11-30 15:05:22,385 E 3592250 3592250] logging.cc:361: @ 0x14df25374b20 (unknown) (unknown) +submitit WARNING (2022-11-30 15:05:22,799) - Bypassing signal SIGTERM +submitit WARNING (2022-11-30 15:05:22,819) - Bypassing signal SIGCONT diff --git a/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_0/27482696_0_submitted.pkl b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_0/27482696_0_submitted.pkl new file mode 100644 index 0000000..b5d7e93 Binary files /dev/null and b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_0/27482696_0_submitted.pkl differ diff --git a/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_1/27482696_1_0_log.err b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_1/27482696_1_0_log.err new file mode 100644 index 0000000..704e5b9 --- /dev/null +++ b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_1/27482696_1_0_log.err @@ -0,0 +1,20 @@ +/scratch/rd2893/miniconda3/envs/pcgrl/lib/python3.7/site-packages/gym/utils/passive_env_checker.py:32: UserWarning: WARN: A Box observation space has an unconventional shape (neither an image, nor a 1D vector). We recommend flattening the observation to have only a 1D vector or use a custom policy to properly process the data. Actual observation shape: (16, 16) + "A Box observation space has an unconventional shape (neither an image, nor a 1D vector). " +2022-11-29 15:05:25,086 WARNING env.py:236 -- Your MultiAgentEnv >>>>>>>> does not have some or all of the needed base-class attributes! Make sure you call `super().__init__` from within your MutiAgentEnv's constructor. This will raise an error in the future. +2022-11-29 15:05:27,858 INFO worker.py:1518 -- Started a local Ray instance. +(PPOTrainer pid=532048) 2022-11-29 15:05:35,162 INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you. +(PPOTrainer pid=532048) 2022-11-29 15:05:35,163 INFO algorithm.py:358 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags. +(PPOTrainer pid=532048) /scratch/rd2893/miniconda3/envs/pcgrl/lib/python3.7/site-packages/gym/utils/passive_env_checker.py:32: UserWarning: WARN: A Box observation space has an unconventional shape (neither an image, nor a 1D vector). We recommend flattening the observation to have only a 1D vector or use a custom policy to properly process the data. Actual observation shape: (16, 16) +(PPOTrainer pid=532048) "A Box observation space has an unconventional shape (neither an image, nor a 1D vector). " +(PPOTrainer pid=532048) 2022-11-29 15:05:41,998 WARNING deprecation.py:48 -- DeprecationWarning: `simple_optimizer` has been deprecated. This will raise an error in the future! +(PPOTrainer pid=532048) 2022-11-29 15:05:42,202 WARNING util.py:66 -- Install gputil for GPU system monitoring. +(PPOTrainer pid=532048) 2022-11-29 15:05:42,269 WARNING deprecation.py:48 -- DeprecationWarning: `policy_mapping_fn(agent_id)` has been deprecated. Use `policy_mapping_fn(agent_id, episode, worker, **kwargs)` instead. This will raise an error in the future! +(PPOTrainer pid=532048) 2022-11-29 15:05:44,548 WARNING deprecation.py:48 -- DeprecationWarning: `concat_samples` has been deprecated. Use `concat_samples() from rllib.policy.sample_batch` instead. This will raise an error in the future! +(PPOTrainer pid=532048) /scratch/rd2893/miniconda3/envs/pcgrl/lib/python3.7/site-packages/ray/rllib/utils/metrics/learner_info.py:110: RuntimeWarning: Mean of empty slice +(PPOTrainer pid=532048) return np.nanmean(tower_data) +2022-11-30 01:48:00,310 WARNING util.py:244 -- The `callbacks.on_trial_result` operation took 9.325 s, which may be a performance bottleneck. +2022-11-30 01:48:00,312 WARNING util.py:244 -- The `process_trial_result` operation took 9.327 s, which may be a performance bottleneck. +2022-11-30 01:48:00,312 WARNING util.py:244 -- Processing trial results took 9.328 s, which may be a performance bottleneck. Please consider reporting results less frequently to Ray Tune. +2022-11-30 01:48:00,312 WARNING util.py:244 -- The `process_trial_result` operation took 9.329 s, which may be a performance bottleneck. +submitit WARNING (2022-11-30 15:02:50,633) - Caught signal SIGUSR2 on gv005.hpc.nyu.edu: this job is timed-out. +submitit WARNING (2022-11-30 15:02:50,633) - Bypassing signal SIGCONT diff --git a/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_1/27482696_1_0_result.pkl b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_1/27482696_1_0_result.pkl new file mode 100644 index 0000000..c263664 Binary files /dev/null and b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_1/27482696_1_0_result.pkl differ diff --git a/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_1/27482696_1_submitted.pkl b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_1/27482696_1_submitted.pkl new file mode 100644 index 0000000..5b10016 Binary files /dev/null and b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_1/27482696_1_submitted.pkl differ diff --git a/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_2/27482696_2_0_log.err b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_2/27482696_2_0_log.err new file mode 100644 index 0000000..45400d9 --- /dev/null +++ b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_2/27482696_2_0_log.err @@ -0,0 +1,56 @@ +/scratch/rd2893/miniconda3/envs/pcgrl/lib/python3.7/site-packages/gym/utils/passive_env_checker.py:32: UserWarning: WARN: A Box observation space has an unconventional shape (neither an image, nor a 1D vector). We recommend flattening the observation to have only a 1D vector or use a custom policy to properly process the data. Actual observation shape: (16, 16) + "A Box observation space has an unconventional shape (neither an image, nor a 1D vector). " +2022-11-29 15:05:25,081 WARNING env.py:236 -- Your MultiAgentEnv >>>>>>>> does not have some or all of the needed base-class attributes! Make sure you call `super().__init__` from within your MutiAgentEnv's constructor. This will raise an error in the future. +2022-11-29 15:05:27,854 INFO worker.py:1518 -- Started a local Ray instance. +(PPOTrainer pid=1434787) 2022-11-29 15:05:35,204 INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you. +(PPOTrainer pid=1434787) 2022-11-29 15:05:35,205 INFO algorithm.py:358 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags. +(PPOTrainer pid=1434787) /scratch/rd2893/miniconda3/envs/pcgrl/lib/python3.7/site-packages/gym/utils/passive_env_checker.py:32: UserWarning: WARN: A Box observation space has an unconventional shape (neither an image, nor a 1D vector). We recommend flattening the observation to have only a 1D vector or use a custom policy to properly process the data. Actual observation shape: (16, 16) +(PPOTrainer pid=1434787) "A Box observation space has an unconventional shape (neither an image, nor a 1D vector). " +(PPOTrainer pid=1434787) 2022-11-29 15:05:41,952 WARNING deprecation.py:48 -- DeprecationWarning: `simple_optimizer` has been deprecated. This will raise an error in the future! +(PPOTrainer pid=1434787) 2022-11-29 15:05:42,156 WARNING util.py:66 -- Install gputil for GPU system monitoring. +(PPOTrainer pid=1434787) 2022-11-29 15:05:42,261 WARNING deprecation.py:48 -- DeprecationWarning: `policy_mapping_fn(agent_id)` has been deprecated. Use `policy_mapping_fn(agent_id, episode, worker, **kwargs)` instead. This will raise an error in the future! +(PPOTrainer pid=1434787) 2022-11-29 15:05:44,313 WARNING deprecation.py:48 -- DeprecationWarning: `concat_samples` has been deprecated. Use `concat_samples() from rllib.policy.sample_batch` instead. This will raise an error in the future! +(PPOTrainer pid=1434787) /scratch/rd2893/miniconda3/envs/pcgrl/lib/python3.7/site-packages/ray/rllib/utils/metrics/learner_info.py:110: RuntimeWarning: Mean of empty slice +(PPOTrainer pid=1434787) return np.nanmean(tower_data) +2022-11-30 01:30:35,219 WARNING util.py:244 -- The `callbacks.on_trial_result` operation took 8.386 s, which may be a performance bottleneck. +2022-11-30 01:30:35,220 WARNING util.py:244 -- The `process_trial_result` operation took 8.388 s, which may be a performance bottleneck. +2022-11-30 01:30:35,220 WARNING util.py:244 -- Processing trial results took 8.388 s, which may be a performance bottleneck. Please consider reporting results less frequently to Ray Tune. +2022-11-30 01:30:35,220 WARNING util.py:244 -- The `process_trial_result` operation took 8.389 s, which may be a performance bottleneck. +2022-11-30 01:31:45,292 WARNING util.py:244 -- The `callbacks.on_trial_result` operation took 10.041 s, which may be a performance bottleneck. +2022-11-30 01:31:45,293 WARNING util.py:244 -- The `process_trial_result` operation took 10.042 s, which may be a performance bottleneck. +2022-11-30 01:31:45,296 WARNING util.py:244 -- Processing trial results took 10.046 s, which may be a performance bottleneck. Please consider reporting results less frequently to Ray Tune. +2022-11-30 01:31:45,296 WARNING util.py:244 -- The `process_trial_result` operation took 10.047 s, which may be a performance bottleneck. +submitit WARNING (2022-11-30 15:02:50,940) - Caught signal SIGUSR2 on gv006.hpc.nyu.edu: this job is timed-out. +submitit WARNING (2022-11-30 15:03:01,174) - Bypassing signal SIGCONT +Traceback (most recent call last): + File "/scratch/rd2893/miniconda3/envs/pcgrl/lib/python3.7/site-packages/submitit/core/job_environment.py", line 226, in checkpoint_and_try_requeue + raise utils.UncompletedJobError(message) +submitit.core.utils.UncompletedJobError: Job not requeued because: timed-out too many times. +Exception ignored in: 'ray._raylet.check_signals' +Traceback (most recent call last): + File "/scratch/rd2893/miniconda3/envs/pcgrl/lib/python3.7/site-packages/submitit/core/job_environment.py", line 226, in checkpoint_and_try_requeue + raise utils.UncompletedJobError(message) +submitit.core.utils.UncompletedJobError: Job not requeued because: timed-out too many times. +slurmstepd: error: *** JOB 27482704 ON gv006 CANCELLED AT 2022-11-30T15:05:22 DUE TO TIME LIMIT *** +slurmstepd: error: *** STEP 27482704.0 ON gv006 CANCELLED AT 2022-11-30T15:05:22 DUE TO TIME LIMIT *** +*** SIGTERM received at time=1669838722 on cpu 10 *** +PC: @ 0x14b4c1a2a64a (unknown) pthread_cond_timedwait@@GLIBC_2.3.2 +[failure_signal_handler.cc : 331] RAW: Signal 15 raised at PC=0x14b4c17b4627 while already in AbslFailureSignalHandler() +*** SIGTERM received at time=1669838722 on cpu 10 *** +PC: @ 0x14b4c17b4627 (unknown) __strlen_avx2 + @ 0x14b4c1a2eb20 5712 (unknown) + @ 0x14b4c16a74f6 1456 _IO_vfprintf + @ 0x14b4c16d0784 1664 __vsnprintf + @ 0x3462343178302020 (unknown) (unknown) +[2022-11-30 15:05:22,211 E 1432424 1432424] logging.cc:361: *** SIGTERM received at time=1669838722 on cpu 10 *** +[2022-11-30 15:05:22,211 E 1432424 1432424] logging.cc:361: PC: @ 0x14b4c17b4627 (unknown) __strlen_avx2 +[2022-11-30 15:05:22,213 E 1432424 1432424] logging.cc:361: @ 0x14b4c1a2eb20 5712 (unknown) +[2022-11-30 15:05:22,213 E 1432424 1432424] logging.cc:361: @ 0x14b4c16a74f6 1456 _IO_vfprintf +[2022-11-30 15:05:22,213 E 1432424 1432424] logging.cc:361: @ 0x14b4c16d0784 1664 __vsnprintf +[2022-11-30 15:05:22,214 E 1432424 1432424] logging.cc:361: @ 0x3462343178302020 (unknown) (unknown) + @ 0x14b4c1a2eb20 (unknown) (unknown) +[2022-11-30 15:05:22,214 E 1432424 1432424] logging.cc:361: *** SIGTERM received at time=1669838722 on cpu 10 *** +[2022-11-30 15:05:22,214 E 1432424 1432424] logging.cc:361: PC: @ 0x14b4c1a2a64a (unknown) pthread_cond_timedwait@@GLIBC_2.3.2 +[2022-11-30 15:05:22,216 E 1432424 1432424] logging.cc:361: @ 0x14b4c1a2eb20 (unknown) (unknown) +submitit WARNING (2022-11-30 15:05:22,705) - Bypassing signal SIGTERM +submitit WARNING (2022-11-30 15:05:22,705) - Bypassing signal SIGCONT diff --git a/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_2/27482696_2_submitted.pkl b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_2/27482696_2_submitted.pkl new file mode 100644 index 0000000..30a8f15 Binary files /dev/null and b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_2/27482696_2_submitted.pkl differ diff --git a/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_3/27482696_3_0_log.err b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_3/27482696_3_0_log.err new file mode 100644 index 0000000..bb9f91b --- /dev/null +++ b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_3/27482696_3_0_log.err @@ -0,0 +1,70 @@ +/scratch/rd2893/miniconda3/envs/pcgrl/lib/python3.7/site-packages/gym/utils/passive_env_checker.py:32: UserWarning: WARN: A Box observation space has an unconventional shape (neither an image, nor a 1D vector). We recommend flattening the observation to have only a 1D vector or use a custom policy to properly process the data. Actual observation shape: (16, 16) + "A Box observation space has an unconventional shape (neither an image, nor a 1D vector). " +2022-11-29 15:05:25,249 WARNING env.py:236 -- Your MultiAgentEnv >>>>>>>> does not have some or all of the needed base-class attributes! Make sure you call `super().__init__` from within your MutiAgentEnv's constructor. This will raise an error in the future. +2022-11-29 15:05:27,900 INFO worker.py:1518 -- Started a local Ray instance. +2022-11-29 15:05:42,607 WARNING trial_runner.py:331 -- The maximum number of pending trials has been automatically set to the number of available cluster CPUs, which is high (176 CPUs/pending trials). If you're running an experiment with a large number of trials, this could lead to scheduling overhead. In this case, consider setting the `TUNE_MAX_PENDING_TRIALS_PG` environment variable to the desired maximum number of concurrent trials. +(PPOTrainer pid=1389415) 2022-11-29 15:05:48,172 INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you. +(PPOTrainer pid=1389415) 2022-11-29 15:05:48,174 INFO algorithm.py:358 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags. +(PPOTrainer pid=1389415) /scratch/rd2893/miniconda3/envs/pcgrl/lib/python3.7/site-packages/gym/utils/passive_env_checker.py:32: UserWarning: WARN: A Box observation space has an unconventional shape (neither an image, nor a 1D vector). We recommend flattening the observation to have only a 1D vector or use a custom policy to properly process the data. Actual observation shape: (16, 16) +(PPOTrainer pid=1389415) "A Box observation space has an unconventional shape (neither an image, nor a 1D vector). " +(PPOTrainer pid=1389415) 2022-11-29 15:05:55,949 WARNING deprecation.py:48 -- DeprecationWarning: `simple_optimizer` has been deprecated. This will raise an error in the future! +(PPOTrainer pid=1389415) 2022-11-29 15:05:56,175 WARNING util.py:66 -- Install gputil for GPU system monitoring. +(PPOTrainer pid=1389415) 2022-11-29 15:05:56,299 WARNING deprecation.py:48 -- DeprecationWarning: `policy_mapping_fn(agent_id)` has been deprecated. Use `policy_mapping_fn(agent_id, episode, worker, **kwargs)` instead. This will raise an error in the future! +(PPOTrainer pid=1389415) 2022-11-29 15:05:58,764 WARNING deprecation.py:48 -- DeprecationWarning: `concat_samples` has been deprecated. Use `concat_samples() from rllib.policy.sample_batch` instead. This will raise an error in the future! +(PPOTrainer pid=1389415) /scratch/rd2893/miniconda3/envs/pcgrl/lib/python3.7/site-packages/ray/rllib/utils/metrics/learner_info.py:110: RuntimeWarning: Mean of empty slice +(PPOTrainer pid=1389415) return np.nanmean(tower_data) +2022-11-30 01:30:35,354 WARNING util.py:244 -- The `callbacks.on_trial_result` operation took 7.989 s, which may be a performance bottleneck. +2022-11-30 01:30:35,358 WARNING util.py:244 -- The `process_trial_result` operation took 7.994 s, which may be a performance bottleneck. +2022-11-30 01:30:35,359 WARNING util.py:244 -- Processing trial results took 7.996 s, which may be a performance bottleneck. Please consider reporting results less frequently to Ray Tune. +2022-11-30 01:30:35,362 WARNING util.py:244 -- The `process_trial_result` operation took 8.001 s, which may be a performance bottleneck. +2022-11-30 01:32:50,896 WARNING util.py:244 -- The `callbacks.on_trial_result` operation took 5.364 s, which may be a performance bottleneck. +2022-11-30 01:32:50,901 WARNING util.py:244 -- The `process_trial_result` operation took 5.372 s, which may be a performance bottleneck. +2022-11-30 01:32:50,902 WARNING util.py:244 -- Processing trial results took 5.372 s, which may be a performance bottleneck. Please consider reporting results less frequently to Ray Tune. +2022-11-30 01:32:50,902 WARNING util.py:244 -- The `process_trial_result` operation took 5.375 s, which may be a performance bottleneck. +2022-11-30 01:46:43,788 WARNING util.py:244 -- The `callbacks.on_trial_result` operation took 8.858 s, which may be a performance bottleneck. +2022-11-30 01:46:43,810 WARNING util.py:244 -- The `process_trial_result` operation took 8.881 s, which may be a performance bottleneck. +2022-11-30 01:46:43,810 WARNING util.py:244 -- Processing trial results took 8.882 s, which may be a performance bottleneck. Please consider reporting results less frequently to Ray Tune. +2022-11-30 01:46:43,810 WARNING util.py:244 -- The `process_trial_result` operation took 8.885 s, which may be a performance bottleneck. +2022-11-30 01:48:00,389 WARNING util.py:244 -- The `callbacks.on_trial_result` operation took 8.216 s, which may be a performance bottleneck. +2022-11-30 01:48:00,394 WARNING util.py:244 -- The `process_trial_result` operation took 8.223 s, which may be a performance bottleneck. +2022-11-30 01:48:00,394 WARNING util.py:244 -- Processing trial results took 8.223 s, which may be a performance bottleneck. Please consider reporting results less frequently to Ray Tune. +2022-11-30 01:48:00,394 WARNING util.py:244 -- The `process_trial_result` operation took 8.226 s, which may be a performance bottleneck. +2022-11-30 01:50:26,268 WARNING util.py:244 -- The `callbacks.on_trial_result` operation took 10.092 s, which may be a performance bottleneck. +2022-11-30 01:50:26,287 WARNING util.py:244 -- The `process_trial_result` operation took 10.112 s, which may be a performance bottleneck. +2022-11-30 01:50:26,287 WARNING util.py:244 -- Processing trial results took 10.112 s, which may be a performance bottleneck. Please consider reporting results less frequently to Ray Tune. +2022-11-30 01:50:26,287 WARNING util.py:244 -- The `process_trial_result` operation took 10.114 s, which may be a performance bottleneck. +2022-11-30 01:51:35,706 WARNING util.py:244 -- The `callbacks.on_trial_result` operation took 8.865 s, which may be a performance bottleneck. +2022-11-30 01:51:35,710 WARNING util.py:244 -- The `process_trial_result` operation took 8.872 s, which may be a performance bottleneck. +2022-11-30 01:51:35,712 WARNING util.py:244 -- Processing trial results took 8.873 s, which may be a performance bottleneck. Please consider reporting results less frequently to Ray Tune. +2022-11-30 01:51:35,712 WARNING util.py:244 -- The `process_trial_result` operation took 8.877 s, which may be a performance bottleneck. +2022-11-30 04:05:42,396 WARNING util.py:244 -- The `callbacks.on_trial_result` operation took 0.530 s, which may be a performance bottleneck. +2022-11-30 04:05:42,416 WARNING util.py:244 -- The `process_trial_result` operation took 0.552 s, which may be a performance bottleneck. +2022-11-30 04:05:42,416 WARNING util.py:244 -- Processing trial results took 0.553 s, which may be a performance bottleneck. Please consider reporting results less frequently to Ray Tune. +2022-11-30 04:05:42,416 WARNING util.py:244 -- The `process_trial_result` operation took 0.555 s, which may be a performance bottleneck. +2022-11-30 07:25:45,666 WARNING util.py:244 -- The `callbacks.on_trial_result` operation took 0.690 s, which may be a performance bottleneck. +2022-11-30 07:25:45,684 WARNING util.py:244 -- The `process_trial_result` operation took 0.711 s, which may be a performance bottleneck. +2022-11-30 07:25:45,685 WARNING util.py:244 -- Processing trial results took 0.712 s, which may be a performance bottleneck. Please consider reporting results less frequently to Ray Tune. +2022-11-30 07:25:45,685 WARNING util.py:244 -- The `process_trial_result` operation took 0.714 s, which may be a performance bottleneck. +2022-11-30 13:45:51,631 WARNING util.py:244 -- The `on_step_begin` operation took 0.592 s, which may be a performance bottleneck. +submitit WARNING (2022-11-30 15:02:51,594) - Caught signal SIGUSR2 on ga019.hpc.nyu.edu: this job is timed-out. +submitit WARNING (2022-11-30 15:02:51,898) - Bypassing signal SIGCONT +Traceback (most recent call last): + File "/scratch/rd2893/miniconda3/envs/pcgrl/lib/python3.7/site-packages/submitit/core/job_environment.py", line 226, in checkpoint_and_try_requeue + raise utils.UncompletedJobError(message) +submitit.core.utils.UncompletedJobError: Job not requeued because: timed-out too many times. +Exception ignored in: 'ray._raylet.check_signals' +Traceback (most recent call last): + File "/scratch/rd2893/miniconda3/envs/pcgrl/lib/python3.7/site-packages/submitit/core/job_environment.py", line 226, in checkpoint_and_try_requeue + raise utils.UncompletedJobError(message) +submitit.core.utils.UncompletedJobError: Job not requeued because: timed-out too many times. +slurmstepd: error: *** STEP 27482696.0 ON ga019 CANCELLED AT 2022-11-30T15:05:22 DUE TO TIME LIMIT *** +srun: Job step aborted: Waiting up to 32 seconds for job step to finish. +slurmstepd: error: *** JOB 27482696 ON ga019 CANCELLED AT 2022-11-30T15:05:22 DUE TO TIME LIMIT *** +*** SIGTERM received at time=1669838722 on cpu 85 *** +PC: @ 0x145586a8864a (unknown) pthread_cond_timedwait@@GLIBC_2.3.2 + @ 0x145586a8cb20 (unknown) (unknown) +[2022-11-30 15:05:22,219 E 1384324 1384324] logging.cc:361: *** SIGTERM received at time=1669838722 on cpu 85 *** +[2022-11-30 15:05:22,219 E 1384324 1384324] logging.cc:361: PC: @ 0x145586a8864a (unknown) pthread_cond_timedwait@@GLIBC_2.3.2 +[2022-11-30 15:05:22,219 E 1384324 1384324] logging.cc:361: @ 0x145586a8cb20 (unknown) (unknown) +submitit WARNING (2022-11-30 15:05:22,623) - Bypassing signal SIGTERM +submitit WARNING (2022-11-30 15:05:22,624) - Bypassing signal SIGCONT diff --git a/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_3/27482696_3_submitted.pkl b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_3/27482696_3_submitted.pkl new file mode 100644 index 0000000..d14c79f Binary files /dev/null and b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_3/27482696_3_submitted.pkl differ diff --git a/control_pcgrl/configs/multirun/2022-11-29/15-04-08/0/.hydra/config.yaml b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/0/.hydra/config.yaml new file mode 100644 index 0000000..9f91ffc --- /dev/null +++ b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/0/.hydra/config.yaml @@ -0,0 +1,43 @@ +problem: + name: binary + weights: + path-length: 100 + controls: ??? + alp_gmm: ??? +hardware: + n_cpu: 1 + n_gpu: 1 + num_envs_per_worker: 10 +model: + name: null + conv_filters: 64 + fc_size: 64 +multiagent: + n_agents: 1 +debug: false +render: false +infer: false +evaluate: false +load: false +overwrite: false +wandb: false +exp_id: '0' +representation: turtle +learning_rate: 5.0e-06 +gamma: 0.99 +map_shape: +- 16 +- 16 +crop_shape: +- 32 +- 32 +max_board_scans: 3 +n_aux_tiles: 0 +observation_size: null +controls: null +change_percentage: null +static_prob: null +action_size: null +log_dir: null +env_name: null +evaluation_env: null diff --git a/control_pcgrl/configs/multirun/2022-11-29/15-04-08/0/.hydra/hydra.yaml b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/0/.hydra/hydra.yaml new file mode 100644 index 0000000..bfefa97 --- /dev/null +++ b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/0/.hydra/hydra.yaml @@ -0,0 +1,186 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + submitit_folder: ${hydra.sweep.dir}/.submitit/%j + timeout_min: 1440 + cpus_per_task: 10 + gpus_per_node: 1 + tasks_per_node: 1 + mem_gb: 30 + nodes: 1 + name: ${hydra.job.name} + stderr_to_stdout: false + _target_: hydra_plugins.hydra_submitit_launcher.submitit_launcher.SlurmLauncher + partition: null + qos: null + comment: null + constraint: null + exclude: null + gres: null + cpus_per_gpu: null + gpus_per_task: null + mem_per_gpu: null + mem_per_cpu: null + account: null + signal_delay_s: 120 + max_num_timeout: 0 + additional_parameters: {} + array_parallelism: 256 + setup: null + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: MULTIRUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=MULTIRUN + task: + - multiagent.n_agents=1 + - representation=turtle + - load=False + job: + name: train_ctrl + chdir: null + override_dirname: load=False,multiagent.n_agents=1,representation=turtle + id: '27482696_0' + num: 0 + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.2.0 + version_base: '1.2' + cwd: /scratch/rd2893/control-pcgrl/control_pcgrl/configs + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: control_pcgrl.configs + schema: pkg + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /scratch/rd2893/control-pcgrl/control_pcgrl/configs/multirun/2022-11-29/15-04-08/0 + choices: + multiagent: single_agent + model: default_model + hardware: remote + problem: binary_path + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: submitit_slurm + hydra/output: default + verbose: false diff --git a/control_pcgrl/configs/multirun/2022-11-29/15-04-08/0/.hydra/overrides.yaml b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/0/.hydra/overrides.yaml new file mode 100644 index 0000000..886263e --- /dev/null +++ b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/0/.hydra/overrides.yaml @@ -0,0 +1,3 @@ +- multiagent.n_agents=1 +- representation=turtle +- load=False diff --git a/control_pcgrl/configs/multirun/2022-11-29/15-04-08/1/.hydra/config.yaml b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/1/.hydra/config.yaml new file mode 100644 index 0000000..0bc6d71 --- /dev/null +++ b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/1/.hydra/config.yaml @@ -0,0 +1,43 @@ +problem: + name: binary + weights: + path-length: 100 + controls: ??? + alp_gmm: ??? +hardware: + n_cpu: 1 + n_gpu: 1 + num_envs_per_worker: 10 +model: + name: null + conv_filters: 64 + fc_size: 64 +multiagent: + n_agents: 2 +debug: false +render: false +infer: false +evaluate: false +load: false +overwrite: false +wandb: false +exp_id: '0' +representation: turtle +learning_rate: 5.0e-06 +gamma: 0.99 +map_shape: +- 16 +- 16 +crop_shape: +- 32 +- 32 +max_board_scans: 3 +n_aux_tiles: 0 +observation_size: null +controls: null +change_percentage: null +static_prob: null +action_size: null +log_dir: null +env_name: null +evaluation_env: null diff --git a/control_pcgrl/configs/multirun/2022-11-29/15-04-08/1/.hydra/hydra.yaml b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/1/.hydra/hydra.yaml new file mode 100644 index 0000000..9783c95 --- /dev/null +++ b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/1/.hydra/hydra.yaml @@ -0,0 +1,186 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + submitit_folder: ${hydra.sweep.dir}/.submitit/%j + timeout_min: 1440 + cpus_per_task: 10 + gpus_per_node: 1 + tasks_per_node: 1 + mem_gb: 30 + nodes: 1 + name: ${hydra.job.name} + stderr_to_stdout: false + _target_: hydra_plugins.hydra_submitit_launcher.submitit_launcher.SlurmLauncher + partition: null + qos: null + comment: null + constraint: null + exclude: null + gres: null + cpus_per_gpu: null + gpus_per_task: null + mem_per_gpu: null + mem_per_cpu: null + account: null + signal_delay_s: 120 + max_num_timeout: 0 + additional_parameters: {} + array_parallelism: 256 + setup: null + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: MULTIRUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=MULTIRUN + task: + - multiagent.n_agents=2 + - representation=turtle + - load=False + job: + name: train_ctrl + chdir: null + override_dirname: load=False,multiagent.n_agents=2,representation=turtle + id: '27482696_1' + num: 1 + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.2.0 + version_base: '1.2' + cwd: /scratch/rd2893/control-pcgrl/control_pcgrl/configs + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: control_pcgrl.configs + schema: pkg + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /scratch/rd2893/control-pcgrl/control_pcgrl/configs/multirun/2022-11-29/15-04-08/1 + choices: + multiagent: single_agent + model: default_model + hardware: remote + problem: binary_path + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: submitit_slurm + hydra/output: default + verbose: false diff --git a/control_pcgrl/configs/multirun/2022-11-29/15-04-08/1/.hydra/overrides.yaml b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/1/.hydra/overrides.yaml new file mode 100644 index 0000000..f7ae698 --- /dev/null +++ b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/1/.hydra/overrides.yaml @@ -0,0 +1,3 @@ +- multiagent.n_agents=2 +- representation=turtle +- load=False diff --git a/control_pcgrl/configs/multirun/2022-11-29/15-04-08/2/.hydra/config.yaml b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/2/.hydra/config.yaml new file mode 100644 index 0000000..5712828 --- /dev/null +++ b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/2/.hydra/config.yaml @@ -0,0 +1,43 @@ +problem: + name: binary + weights: + path-length: 100 + controls: ??? + alp_gmm: ??? +hardware: + n_cpu: 1 + n_gpu: 1 + num_envs_per_worker: 10 +model: + name: null + conv_filters: 64 + fc_size: 64 +multiagent: + n_agents: 3 +debug: false +render: false +infer: false +evaluate: false +load: false +overwrite: false +wandb: false +exp_id: '0' +representation: turtle +learning_rate: 5.0e-06 +gamma: 0.99 +map_shape: +- 16 +- 16 +crop_shape: +- 32 +- 32 +max_board_scans: 3 +n_aux_tiles: 0 +observation_size: null +controls: null +change_percentage: null +static_prob: null +action_size: null +log_dir: null +env_name: null +evaluation_env: null diff --git a/control_pcgrl/configs/multirun/2022-11-29/15-04-08/2/.hydra/hydra.yaml b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/2/.hydra/hydra.yaml new file mode 100644 index 0000000..c4e3218 --- /dev/null +++ b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/2/.hydra/hydra.yaml @@ -0,0 +1,186 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + submitit_folder: ${hydra.sweep.dir}/.submitit/%j + timeout_min: 1440 + cpus_per_task: 10 + gpus_per_node: 1 + tasks_per_node: 1 + mem_gb: 30 + nodes: 1 + name: ${hydra.job.name} + stderr_to_stdout: false + _target_: hydra_plugins.hydra_submitit_launcher.submitit_launcher.SlurmLauncher + partition: null + qos: null + comment: null + constraint: null + exclude: null + gres: null + cpus_per_gpu: null + gpus_per_task: null + mem_per_gpu: null + mem_per_cpu: null + account: null + signal_delay_s: 120 + max_num_timeout: 0 + additional_parameters: {} + array_parallelism: 256 + setup: null + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: MULTIRUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=MULTIRUN + task: + - multiagent.n_agents=3 + - representation=turtle + - load=False + job: + name: train_ctrl + chdir: null + override_dirname: load=False,multiagent.n_agents=3,representation=turtle + id: '27482696_2' + num: 2 + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.2.0 + version_base: '1.2' + cwd: /scratch/rd2893/control-pcgrl/control_pcgrl/configs + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: control_pcgrl.configs + schema: pkg + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /scratch/rd2893/control-pcgrl/control_pcgrl/configs/multirun/2022-11-29/15-04-08/2 + choices: + multiagent: single_agent + model: default_model + hardware: remote + problem: binary_path + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: submitit_slurm + hydra/output: default + verbose: false diff --git a/control_pcgrl/configs/multirun/2022-11-29/15-04-08/2/.hydra/overrides.yaml b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/2/.hydra/overrides.yaml new file mode 100644 index 0000000..a342a9a --- /dev/null +++ b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/2/.hydra/overrides.yaml @@ -0,0 +1,3 @@ +- multiagent.n_agents=3 +- representation=turtle +- load=False diff --git a/control_pcgrl/configs/multirun/2022-11-29/15-04-08/3/.hydra/config.yaml b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/3/.hydra/config.yaml new file mode 100644 index 0000000..fb00417 --- /dev/null +++ b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/3/.hydra/config.yaml @@ -0,0 +1,43 @@ +problem: + name: binary + weights: + path-length: 100 + controls: ??? + alp_gmm: ??? +hardware: + n_cpu: 1 + n_gpu: 1 + num_envs_per_worker: 10 +model: + name: null + conv_filters: 64 + fc_size: 64 +multiagent: + n_agents: 4 +debug: false +render: false +infer: false +evaluate: false +load: false +overwrite: false +wandb: false +exp_id: '0' +representation: turtle +learning_rate: 5.0e-06 +gamma: 0.99 +map_shape: +- 16 +- 16 +crop_shape: +- 32 +- 32 +max_board_scans: 3 +n_aux_tiles: 0 +observation_size: null +controls: null +change_percentage: null +static_prob: null +action_size: null +log_dir: null +env_name: null +evaluation_env: null diff --git a/control_pcgrl/configs/multirun/2022-11-29/15-04-08/3/.hydra/hydra.yaml b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/3/.hydra/hydra.yaml new file mode 100644 index 0000000..d6651f9 --- /dev/null +++ b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/3/.hydra/hydra.yaml @@ -0,0 +1,186 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + submitit_folder: ${hydra.sweep.dir}/.submitit/%j + timeout_min: 1440 + cpus_per_task: 10 + gpus_per_node: 1 + tasks_per_node: 1 + mem_gb: 30 + nodes: 1 + name: ${hydra.job.name} + stderr_to_stdout: false + _target_: hydra_plugins.hydra_submitit_launcher.submitit_launcher.SlurmLauncher + partition: null + qos: null + comment: null + constraint: null + exclude: null + gres: null + cpus_per_gpu: null + gpus_per_task: null + mem_per_gpu: null + mem_per_cpu: null + account: null + signal_delay_s: 120 + max_num_timeout: 0 + additional_parameters: {} + array_parallelism: 256 + setup: null + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: MULTIRUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=MULTIRUN + task: + - multiagent.n_agents=4 + - representation=turtle + - load=False + job: + name: train_ctrl + chdir: null + override_dirname: load=False,multiagent.n_agents=4,representation=turtle + id: '27482696_3' + num: 3 + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.2.0 + version_base: '1.2' + cwd: /scratch/rd2893/control-pcgrl/control_pcgrl/configs + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: control_pcgrl.configs + schema: pkg + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /scratch/rd2893/control-pcgrl/control_pcgrl/configs/multirun/2022-11-29/15-04-08/3 + choices: + multiagent: single_agent + model: default_model + hardware: remote + problem: binary_path + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: submitit_slurm + hydra/output: default + verbose: false diff --git a/control_pcgrl/configs/multirun/2022-11-29/15-04-08/3/.hydra/overrides.yaml b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/3/.hydra/overrides.yaml new file mode 100644 index 0000000..74c02b5 --- /dev/null +++ b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/3/.hydra/overrides.yaml @@ -0,0 +1,3 @@ +- multiagent.n_agents=4 +- representation=turtle +- load=False diff --git a/control_pcgrl/configs/multirun/2022-11-29/15-04-08/multirun.yaml b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/multirun.yaml new file mode 100644 index 0000000..8ed5e80 --- /dev/null +++ b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/multirun.yaml @@ -0,0 +1,229 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + submitit_folder: ${hydra.sweep.dir}/.submitit/%j + timeout_min: 1440 + cpus_per_task: 10 + gpus_per_node: 1 + tasks_per_node: 1 + mem_gb: 30 + nodes: 1 + name: ${hydra.job.name} + stderr_to_stdout: false + _target_: hydra_plugins.hydra_submitit_launcher.submitit_launcher.SlurmLauncher + partition: null + qos: null + comment: null + constraint: null + exclude: null + gres: null + cpus_per_gpu: null + gpus_per_task: null + mem_per_gpu: null + mem_per_cpu: null + account: null + signal_delay_s: 120 + max_num_timeout: 0 + additional_parameters: {} + array_parallelism: 256 + setup: null + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: MULTIRUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=MULTIRUN + task: + - multiagent.n_agents=1,2,3,4 + - representation=turtle + - load=False + job: + name: train_ctrl + chdir: null + override_dirname: load=False,multiagent.n_agents=1,2,3,4,representation=turtle + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.2.0 + version_base: '1.2' + cwd: /scratch/rd2893/control-pcgrl/control_pcgrl/configs + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: control_pcgrl.configs + schema: pkg + provider: main + - path: '' + schema: structured + provider: schema + output_dir: ??? + choices: + multiagent: single_agent + model: default_model + hardware: remote + problem: binary_path + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: submitit_slurm + hydra/output: default + verbose: false +problem: + name: binary + weights: + path-length: 100 + controls: ??? + alp_gmm: ??? +hardware: + n_cpu: 1 + n_gpu: 1 + num_envs_per_worker: 10 +model: + name: null + conv_filters: 64 + fc_size: 64 +multiagent: + n_agents: 0 +debug: false +render: false +infer: false +evaluate: false +load: false +overwrite: false +wandb: false +exp_id: '0' +representation: turtle +learning_rate: 5.0e-06 +gamma: 0.99 +map_shape: +- 16 +- 16 +crop_shape: +- 32 +- 32 +max_board_scans: 3 +n_aux_tiles: 0 +observation_size: null +controls: null +change_percentage: null +static_prob: null +action_size: null +log_dir: null +env_name: null +evaluation_env: null diff --git a/control_pcgrl/configs/outputs/2022-11-29/14-39-14/.hydra/config.yaml b/control_pcgrl/configs/outputs/2022-11-29/14-39-14/.hydra/config.yaml new file mode 100644 index 0000000..ceca151 --- /dev/null +++ b/control_pcgrl/configs/outputs/2022-11-29/14-39-14/.hydra/config.yaml @@ -0,0 +1,43 @@ +problem: + name: binary + weights: + path-length: 100 + controls: ??? + alp_gmm: ??? +hardware: + n_cpu: 1 + n_gpu: 1 + num_envs_per_worker: 10 +model: + name: null + conv_filters: 64 + fc_size: 64 +multiagent: + n_agents: 0 +debug: false +render: false +infer: false +evaluate: false +load: true +overwrite: false +wandb: false +exp_id: '0' +representation: turtle +learning_rate: 5.0e-06 +gamma: 0.99 +map_shape: +- 16 +- 16 +crop_shape: +- 32 +- 32 +max_board_scans: 3 +n_aux_tiles: 0 +observation_size: null +controls: null +change_percentage: null +static_prob: null +action_size: null +log_dir: null +env_name: null +evaluation_env: null diff --git a/control_pcgrl/configs/outputs/2022-11-29/14-39-14/.hydra/hydra.yaml b/control_pcgrl/configs/outputs/2022-11-29/14-39-14/.hydra/hydra.yaml new file mode 100644 index 0000000..507fb81 --- /dev/null +++ b/control_pcgrl/configs/outputs/2022-11-29/14-39-14/.hydra/hydra.yaml @@ -0,0 +1,168 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + submitit_folder: ${hydra.sweep.dir}/.submitit/%j + timeout_min: 1440 + cpus_per_task: 10 + gpus_per_node: 1 + tasks_per_node: 1 + mem_gb: 30 + nodes: 1 + name: ${hydra.job.name} + stderr_to_stdout: false + _target_: hydra_plugins.hydra_submitit_launcher.submitit_launcher.LocalLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: + - hardware.n_gpu=1 + job: + name: train_ctrl + chdir: null + override_dirname: hardware.n_gpu=1 + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.2.0 + version_base: '1.2' + cwd: /scratch/rd2893/control-pcgrl/control_pcgrl/configs + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: control_pcgrl.configs + schema: pkg + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /scratch/rd2893/control-pcgrl/control_pcgrl/configs/outputs/2022-11-29/14-39-14 + choices: + multiagent: single_agent + model: default_model + hardware: remote + problem: binary_path + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: submitit_local + hydra/output: default + verbose: false diff --git a/control_pcgrl/configs/outputs/2022-11-29/14-39-14/.hydra/overrides.yaml b/control_pcgrl/configs/outputs/2022-11-29/14-39-14/.hydra/overrides.yaml new file mode 100644 index 0000000..23d31ba --- /dev/null +++ b/control_pcgrl/configs/outputs/2022-11-29/14-39-14/.hydra/overrides.yaml @@ -0,0 +1 @@ +- hardware.n_gpu=1 diff --git a/control_pcgrl/configs/outputs/2022-11-29/14-39-51/.hydra/config.yaml b/control_pcgrl/configs/outputs/2022-11-29/14-39-51/.hydra/config.yaml new file mode 100644 index 0000000..9c0420a --- /dev/null +++ b/control_pcgrl/configs/outputs/2022-11-29/14-39-51/.hydra/config.yaml @@ -0,0 +1,43 @@ +problem: + name: binary + weights: + path-length: 100 + controls: ??? + alp_gmm: ??? +hardware: + n_cpu: 1 + n_gpu: 0 + num_envs_per_worker: 10 +model: + name: null + conv_filters: 64 + fc_size: 64 +multiagent: + n_agents: 0 +debug: false +render: false +infer: false +evaluate: false +load: true +overwrite: false +wandb: false +exp_id: '0' +representation: turtle +learning_rate: 5.0e-06 +gamma: 0.99 +map_shape: +- 16 +- 16 +crop_shape: +- 32 +- 32 +max_board_scans: 3 +n_aux_tiles: 0 +observation_size: null +controls: null +change_percentage: null +static_prob: null +action_size: null +log_dir: null +env_name: null +evaluation_env: null diff --git a/control_pcgrl/configs/outputs/2022-11-29/14-39-51/.hydra/hydra.yaml b/control_pcgrl/configs/outputs/2022-11-29/14-39-51/.hydra/hydra.yaml new file mode 100644 index 0000000..9d5a305 --- /dev/null +++ b/control_pcgrl/configs/outputs/2022-11-29/14-39-51/.hydra/hydra.yaml @@ -0,0 +1,168 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + submitit_folder: ${hydra.sweep.dir}/.submitit/%j + timeout_min: 1440 + cpus_per_task: 10 + gpus_per_node: 1 + tasks_per_node: 1 + mem_gb: 30 + nodes: 1 + name: ${hydra.job.name} + stderr_to_stdout: false + _target_: hydra_plugins.hydra_submitit_launcher.submitit_launcher.LocalLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: + - hardware.n_gpu=0 + job: + name: train_ctrl + chdir: null + override_dirname: hardware.n_gpu=0 + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.2.0 + version_base: '1.2' + cwd: /scratch/rd2893/control-pcgrl/control_pcgrl/configs + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: control_pcgrl.configs + schema: pkg + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /scratch/rd2893/control-pcgrl/control_pcgrl/configs/outputs/2022-11-29/14-39-51 + choices: + multiagent: single_agent + model: default_model + hardware: remote + problem: binary_path + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: submitit_local + hydra/output: default + verbose: false diff --git a/control_pcgrl/configs/outputs/2022-11-29/14-39-51/.hydra/overrides.yaml b/control_pcgrl/configs/outputs/2022-11-29/14-39-51/.hydra/overrides.yaml new file mode 100644 index 0000000..0c836a4 --- /dev/null +++ b/control_pcgrl/configs/outputs/2022-11-29/14-39-51/.hydra/overrides.yaml @@ -0,0 +1 @@ +- hardware.n_gpu=0 diff --git a/control_pcgrl/configs/outputs/2022-11-29/14-40-16/.hydra/config.yaml b/control_pcgrl/configs/outputs/2022-11-29/14-40-16/.hydra/config.yaml new file mode 100644 index 0000000..aced9bb --- /dev/null +++ b/control_pcgrl/configs/outputs/2022-11-29/14-40-16/.hydra/config.yaml @@ -0,0 +1,43 @@ +problem: + name: binary + weights: + path-length: 100 + controls: ??? + alp_gmm: ??? +hardware: + n_cpu: 1 + n_gpu: 0 + num_envs_per_worker: 10 +model: + name: null + conv_filters: 64 + fc_size: 64 +multiagent: + n_agents: 0 +debug: false +render: false +infer: false +evaluate: false +load: false +overwrite: false +wandb: false +exp_id: '0' +representation: turtle +learning_rate: 5.0e-06 +gamma: 0.99 +map_shape: +- 16 +- 16 +crop_shape: +- 32 +- 32 +max_board_scans: 3 +n_aux_tiles: 0 +observation_size: null +controls: null +change_percentage: null +static_prob: null +action_size: null +log_dir: null +env_name: null +evaluation_env: null diff --git a/control_pcgrl/configs/outputs/2022-11-29/14-40-16/.hydra/hydra.yaml b/control_pcgrl/configs/outputs/2022-11-29/14-40-16/.hydra/hydra.yaml new file mode 100644 index 0000000..15e7825 --- /dev/null +++ b/control_pcgrl/configs/outputs/2022-11-29/14-40-16/.hydra/hydra.yaml @@ -0,0 +1,169 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + submitit_folder: ${hydra.sweep.dir}/.submitit/%j + timeout_min: 1440 + cpus_per_task: 10 + gpus_per_node: 1 + tasks_per_node: 1 + mem_gb: 30 + nodes: 1 + name: ${hydra.job.name} + stderr_to_stdout: false + _target_: hydra_plugins.hydra_submitit_launcher.submitit_launcher.LocalLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: + - hardware.n_gpu=0 + - load=False + job: + name: train_ctrl + chdir: null + override_dirname: hardware.n_gpu=0,load=False + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.2.0 + version_base: '1.2' + cwd: /scratch/rd2893/control-pcgrl/control_pcgrl/configs + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: control_pcgrl.configs + schema: pkg + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /scratch/rd2893/control-pcgrl/control_pcgrl/configs/outputs/2022-11-29/14-40-16 + choices: + multiagent: single_agent + model: default_model + hardware: remote + problem: binary_path + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: submitit_local + hydra/output: default + verbose: false diff --git a/control_pcgrl/configs/outputs/2022-11-29/14-40-16/.hydra/overrides.yaml b/control_pcgrl/configs/outputs/2022-11-29/14-40-16/.hydra/overrides.yaml new file mode 100644 index 0000000..4df4332 --- /dev/null +++ b/control_pcgrl/configs/outputs/2022-11-29/14-40-16/.hydra/overrides.yaml @@ -0,0 +1,2 @@ +- hardware.n_gpu=0 +- load=False diff --git a/control_pcgrl/configs/outputs/2022-11-29/14-44-50/.hydra/config.yaml b/control_pcgrl/configs/outputs/2022-11-29/14-44-50/.hydra/config.yaml new file mode 100644 index 0000000..05a13f1 --- /dev/null +++ b/control_pcgrl/configs/outputs/2022-11-29/14-44-50/.hydra/config.yaml @@ -0,0 +1,43 @@ +problem: + name: binary + weights: + path-length: 100 + controls: ??? + alp_gmm: ??? +hardware: + n_cpu: 1 + n_gpu: 0 + num_envs_per_worker: 10 +model: + name: null + conv_filters: 64 + fc_size: 64 +multiagent: + n_agents: 2 +debug: false +render: false +infer: false +evaluate: false +load: false +overwrite: false +wandb: false +exp_id: '0' +representation: turtle +learning_rate: 5.0e-06 +gamma: 0.99 +map_shape: +- 16 +- 16 +crop_shape: +- 32 +- 32 +max_board_scans: 3 +n_aux_tiles: 0 +observation_size: null +controls: null +change_percentage: null +static_prob: null +action_size: null +log_dir: null +env_name: null +evaluation_env: null diff --git a/control_pcgrl/configs/outputs/2022-11-29/14-44-50/.hydra/hydra.yaml b/control_pcgrl/configs/outputs/2022-11-29/14-44-50/.hydra/hydra.yaml new file mode 100644 index 0000000..8b1a090 --- /dev/null +++ b/control_pcgrl/configs/outputs/2022-11-29/14-44-50/.hydra/hydra.yaml @@ -0,0 +1,170 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + submitit_folder: ${hydra.sweep.dir}/.submitit/%j + timeout_min: 1440 + cpus_per_task: 10 + gpus_per_node: 1 + tasks_per_node: 1 + mem_gb: 30 + nodes: 1 + name: ${hydra.job.name} + stderr_to_stdout: false + _target_: hydra_plugins.hydra_submitit_launcher.submitit_launcher.LocalLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: + - hardware.n_gpu=0 + - load=False + - multiagent.n_agents=2 + job: + name: train_ctrl + chdir: null + override_dirname: hardware.n_gpu=0,load=False,multiagent.n_agents=2 + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.2.0 + version_base: '1.2' + cwd: /scratch/rd2893/control-pcgrl/control_pcgrl/configs + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: control_pcgrl.configs + schema: pkg + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /scratch/rd2893/control-pcgrl/control_pcgrl/configs/outputs/2022-11-29/14-44-50 + choices: + multiagent: single_agent + model: default_model + hardware: remote + problem: binary_path + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: submitit_local + hydra/output: default + verbose: false diff --git a/control_pcgrl/configs/outputs/2022-11-29/14-44-50/.hydra/overrides.yaml b/control_pcgrl/configs/outputs/2022-11-29/14-44-50/.hydra/overrides.yaml new file mode 100644 index 0000000..7b8f603 --- /dev/null +++ b/control_pcgrl/configs/outputs/2022-11-29/14-44-50/.hydra/overrides.yaml @@ -0,0 +1,3 @@ +- hardware.n_gpu=0 +- load=False +- multiagent.n_agents=2 diff --git a/control_pcgrl/configs/outputs/2022-11-29/14-46-43/.hydra/config.yaml b/control_pcgrl/configs/outputs/2022-11-29/14-46-43/.hydra/config.yaml new file mode 100644 index 0000000..05a13f1 --- /dev/null +++ b/control_pcgrl/configs/outputs/2022-11-29/14-46-43/.hydra/config.yaml @@ -0,0 +1,43 @@ +problem: + name: binary + weights: + path-length: 100 + controls: ??? + alp_gmm: ??? +hardware: + n_cpu: 1 + n_gpu: 0 + num_envs_per_worker: 10 +model: + name: null + conv_filters: 64 + fc_size: 64 +multiagent: + n_agents: 2 +debug: false +render: false +infer: false +evaluate: false +load: false +overwrite: false +wandb: false +exp_id: '0' +representation: turtle +learning_rate: 5.0e-06 +gamma: 0.99 +map_shape: +- 16 +- 16 +crop_shape: +- 32 +- 32 +max_board_scans: 3 +n_aux_tiles: 0 +observation_size: null +controls: null +change_percentage: null +static_prob: null +action_size: null +log_dir: null +env_name: null +evaluation_env: null diff --git a/control_pcgrl/configs/outputs/2022-11-29/14-46-43/.hydra/hydra.yaml b/control_pcgrl/configs/outputs/2022-11-29/14-46-43/.hydra/hydra.yaml new file mode 100644 index 0000000..561ae9a --- /dev/null +++ b/control_pcgrl/configs/outputs/2022-11-29/14-46-43/.hydra/hydra.yaml @@ -0,0 +1,170 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + submitit_folder: ${hydra.sweep.dir}/.submitit/%j + timeout_min: 1440 + cpus_per_task: 10 + gpus_per_node: 1 + tasks_per_node: 1 + mem_gb: 30 + nodes: 1 + name: ${hydra.job.name} + stderr_to_stdout: false + _target_: hydra_plugins.hydra_submitit_launcher.submitit_launcher.LocalLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: + - hardware.n_gpu=0 + - load=False + - multiagent.n_agents=2 + job: + name: train_ctrl + chdir: null + override_dirname: hardware.n_gpu=0,load=False,multiagent.n_agents=2 + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.2.0 + version_base: '1.2' + cwd: /scratch/rd2893/control-pcgrl/control_pcgrl/configs + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: control_pcgrl.configs + schema: pkg + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /scratch/rd2893/control-pcgrl/control_pcgrl/configs/outputs/2022-11-29/14-46-43 + choices: + multiagent: single_agent + model: default_model + hardware: remote + problem: binary_path + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: submitit_local + hydra/output: default + verbose: false diff --git a/control_pcgrl/configs/outputs/2022-11-29/14-46-43/.hydra/overrides.yaml b/control_pcgrl/configs/outputs/2022-11-29/14-46-43/.hydra/overrides.yaml new file mode 100644 index 0000000..7b8f603 --- /dev/null +++ b/control_pcgrl/configs/outputs/2022-11-29/14-46-43/.hydra/overrides.yaml @@ -0,0 +1,3 @@ +- hardware.n_gpu=0 +- load=False +- multiagent.n_agents=2 diff --git a/control_pcgrl/control_wrappers.py b/control_pcgrl/control_wrappers.py index 5ca4a50..ea17680 100644 --- a/control_pcgrl/control_wrappers.py +++ b/control_pcgrl/control_wrappers.py @@ -3,6 +3,7 @@ ################################################################################ import collections import copy +import json from pdb import set_trace as TT from timeit import default_timer as timer from typing import Dict, OrderedDict @@ -46,7 +47,12 @@ def __init__(self, env, ctrl_metrics=None, rand_params=False, **kwargs): metric_weights = copy.copy(self.unwrapped._reward_weights) self.metric_weights = {k: 0 for k in metric_weights} - self.metric_weights.update(kwargs['problem']['weights']) + try: + config_weights = kwargs['problem']['weights'] + except TypeError: + config_weights = json.loads(kwargs['problem'].replace('\'', '\"'))['weights'] + + self.metric_weights.update(config_weights) # cond_trgs = self.unwrapped.cond_trgs @@ -350,11 +356,11 @@ def render(self, mode='human'): else: ### PROFILING - N = 100 - start_time = timer() - for _ in range(N): - super().render(mode=mode) - print(f'mean pyglet image render time over {N} trials:', (timer() - start_time) * 1000 / N, 'ms') + #N = 100 + #start_time = timer() + #for _ in range(N): + # super().render(mode=mode) + #print(f'mean pyglet image render time over {N} trials:', (timer() - start_time) * 1000 / N, 'ms') ### return super().render(mode=mode) diff --git a/control_pcgrl/envs/helper.py b/control_pcgrl/envs/helper.py index b31cbf3..93fe9df 100644 --- a/control_pcgrl/envs/helper.py +++ b/control_pcgrl/envs/helper.py @@ -2,6 +2,7 @@ A helper module that can be used by all problems """ import numpy as np +from gym.utils import seeding from pdb import set_trace as TT """ @@ -486,7 +487,7 @@ def calc_num_reachable_tile(map, map_locations, start_value, passable_values, re Returns: int[][]: the random generated map """ -def gen_random_map(random, dims, prob): +def gen_random_map(random, dims, prob, seed=None): # def gen_random_map(random, width, height, prob): map = random.choice(list(prob.keys()),size=dims[::-1],p=list(prob.values())).astype(np.uint8) return map diff --git a/control_pcgrl/envs/pcgrl_env.py b/control_pcgrl/envs/pcgrl_env.py index 91898c7..275b1d3 100644 --- a/control_pcgrl/envs/pcgrl_env.py +++ b/control_pcgrl/envs/pcgrl_env.py @@ -1,4 +1,5 @@ import collections +import json from pdb import set_trace as TT import PIL @@ -10,7 +11,7 @@ from ray.rllib.env.env_context import EnvContext from ray.rllib.utils.annotations import override -from control_pcgrl.envs.reps.wrappers import wrap_rep +from control_pcgrl.envs.reps.wrappers import wrap_rep, MultiAgentWrapper from control_pcgrl.envs.probs import PROBLEMS from control_pcgrl.envs.probs.problem import Problem, Problem3D from control_pcgrl.envs.reps import REPRESENTATIONS @@ -45,6 +46,7 @@ def __init__(self, prob="binary", rep="narrow", **kwargs): # Attach this function to the env, since it will be different for, e.g., 3D environments. self.get_string_map = get_string_map + self._prob: Problem = PROBLEMS[prob](**kwargs) self._prob.init_tile_int_dict() self._rep_cls = REPRESENTATIONS[rep] @@ -117,6 +119,12 @@ def set_task(self, map_idx): self.cur_map_idx = map_idx self.switch_env = True + def get_rep(self): + return self._rep + + def get_map(self): + return self._rep._map + def get_map_dims(self): return (self._prob._width, self._prob._height, self.get_num_tiles()) @@ -125,6 +133,9 @@ def get_observable_map_dims(self): def configure(self, map_shape, **kwargs): # , max_step=300): # What is this garbage?? + if isinstance(map_shape, str): + map_shape = json.loads(map_shape) + self._prob._width = map_shape[0] self._prob._height = map_shape[1] self.width = map_shape[0] #UGH @@ -174,6 +185,10 @@ def get_spaces(self): def reset(self): self._changes = 0 self._iteration = 0 + # avoid default probabilities with normal distribution if we seed manually + if hasattr(self._prob, '_random'): + probs = self._prob._random.random(size=len(self._prob.get_tile_types())) + self._prob._prob = {tile: prob for tile, prob in zip(self._prob.get_tile_types(), probs)} if self.switch_env: self._rep.reset(self.get_map_dims()[:-1], get_int_prob(self._prob._prob, self._prob.get_tile_types()), next_map=self._prob.eval_maps[self.cur_map_idx]) @@ -185,9 +200,14 @@ def reset(self): self._rep_stats = self._prob.get_stats(self.get_string_map(self._get_rep_map(), self._prob.get_tile_types())) #, continuous=continuous)) self.metrics = self._rep_stats self._prob.reset(self._rep_stats) + self._prob._prob = probs self._heatmap = np.zeros(self.get_map_dims()[:-1]) - observation = self._rep.get_observation() + if issubclass(type(self._rep), MultiAgentWrapper): + observation = self._rep.get_observation(all_agents=True) + else: + observation = self._rep.get_observation() # all_agents parameter does not exist for representations without MultiAgentWrapper + # observation["heatmap"] = self._heatmap.copy() return observation @@ -249,6 +269,8 @@ def adjust_param(self, **kwargs): else: max_board_scans = kwargs.get('max_board_scans', 1) self._max_iterations = np.prod(self.get_map_dims()[:-1]) * max_board_scans + 1 + if isinstance(kwargs['map_shape'], str): + kwargs['map_shape'] = json.loads(kwargs['map_shape']) self._prob.adjust_param(**kwargs) self._rep.adjust_param(**kwargs) self.action_space = self._rep.get_action_space(self.get_map_dims()[:-1], self.get_num_tiles()) @@ -257,6 +279,8 @@ def adjust_param(self, **kwargs): # self.observation_space.spaces['heatmap'] = spaces.Box( # low=0, high=self._max_changes, dtype=np.uint8, shape=self.get_map_dims()[:-1]) + def get_agent_position(self): + return self._rep._positions """ Advance the environment using a specific action diff --git a/control_pcgrl/envs/probs/__init__.py b/control_pcgrl/envs/probs/__init__.py index 93f52c5..5ffc96f 100644 --- a/control_pcgrl/envs/probs/__init__.py +++ b/control_pcgrl/envs/probs/__init__.py @@ -33,6 +33,7 @@ "ddave": DDaveProblem, "mdungeon": MDungeonProblem, "sokoban": SokobanCtrlProblem, + #"sokoban": SokobanProblem, # "sokoban_ctrl": SokobanCtrlProblem, # "zelda": ZeldaProblem, "smb": SMBCtrlProblem, diff --git a/control_pcgrl/envs/probs/problem.py b/control_pcgrl/envs/probs/problem.py index 60a3423..075b6d9 100644 --- a/control_pcgrl/envs/probs/problem.py +++ b/control_pcgrl/envs/probs/problem.py @@ -55,6 +55,7 @@ def init_tile_int_dict(self): def get_tile_int(self, tile): return self._tile_int_dict[tile] + def is_continuous(self): return False diff --git a/control_pcgrl/envs/probs/sokoban/sokoban_ctrl_prob.py b/control_pcgrl/envs/probs/sokoban/sokoban_ctrl_prob.py index 7681205..719fd92 100644 --- a/control_pcgrl/envs/probs/sokoban/sokoban_ctrl_prob.py +++ b/control_pcgrl/envs/probs/sokoban/sokoban_ctrl_prob.py @@ -9,6 +9,7 @@ class SokobanCtrlProblem(SokobanProblem): def __init__(self): super(SokobanCtrlProblem, self).__init__() + #import pdb; pdb.set_trace() self._max_path_length = np.ceil(self._width / 2 + 1) * (self._height) # like _reward_weights but for use with ParamRew # self._reward_weights = self._reward_weights @@ -56,7 +57,7 @@ def get_reward(self, new_stats, old_stats): def get_stats(self, map): stats = super().get_stats(map) - stats["sol-length"] = len(stats["solution"]) + stats["sol-length"] = len(stats.get('solution', [])) stats["ratio"] = abs(stats["crate"] - stats["target"]) # if stats['dist-win'] == self._width * self._height * (self._width + self._height): # stats['dist-win'] = 0 diff --git a/control_pcgrl/envs/probs/sokoban/sokoban_prob.py b/control_pcgrl/envs/probs/sokoban/sokoban_prob.py index c74b5dd..6b09675 100644 --- a/control_pcgrl/envs/probs/sokoban/sokoban_prob.py +++ b/control_pcgrl/envs/probs/sokoban/sokoban_prob.py @@ -174,7 +174,6 @@ def get_stats(self, map): map, map_locations, ["empty", "player", "crate", "target"] ), "dist-win": self._width * self._height * (self._width + self._height), - "solution": [], } if ( @@ -270,7 +269,7 @@ def get_debug_info(self, new_stats, old_stats): "target": new_stats["target"], "regions": new_stats["regions"], "dist-win": new_stats["dist-win"], - "sol-length": len(new_stats["solution"]), + "sol-length": len(new_stats.get('solution', [])) } """ diff --git a/control_pcgrl/envs/reps/narrow_rep.py b/control_pcgrl/envs/reps/narrow_rep.py index 3640914..47fdcb1 100644 --- a/control_pcgrl/envs/reps/narrow_rep.py +++ b/control_pcgrl/envs/reps/narrow_rep.py @@ -97,6 +97,9 @@ def update(self, action): self.n_step += 1 super().update(action) return change, self._pos + + def update_state(self, action): + return self.update(action) # """ # Modify the level image with a red rectangle around the tile that is diff --git a/control_pcgrl/envs/reps/representation.py b/control_pcgrl/envs/reps/representation.py index 4c3d9d3..111eb9d 100644 --- a/control_pcgrl/envs/reps/representation.py +++ b/control_pcgrl/envs/reps/representation.py @@ -27,6 +27,7 @@ def __init__(self, border_tile_index=1, empty_tile_index=0): self._border_tile_index = border_tile_index self._empty_tile = empty_tile_index self._random_start: bool = True + self.seed_val: int = None self.seed() @@ -44,6 +45,7 @@ def get_pos(self): int: the used seed (same as input if not None) """ def seed(self, seed=None): + self.seed_val = seed self._random, seed = seeding.np_random(seed) return seed @@ -64,7 +66,7 @@ def reset(self, dims: tuple, prob: Problem, next_map: np.ndarray = None): self._map = next_map self._old_map = self._map.copy() elif self._random_start or self._old_map is None: - self._map = type(self).gen_random_map(self._random, dims, prob) + self._map = type(self).gen_random_map(self._random, dims, prob, self.seed_val) self._old_map = self._map.copy() else: self._map = self._old_map.copy() diff --git a/control_pcgrl/envs/reps/turtle_rep.py b/control_pcgrl/envs/reps/turtle_rep.py index 08f2cd3..7b7ac4b 100644 --- a/control_pcgrl/envs/reps/turtle_rep.py +++ b/control_pcgrl/envs/reps/turtle_rep.py @@ -30,7 +30,9 @@ def __init__(self, **kwargs): """ def reset(self, dims, prob): self._pos = self.get_pos_at_step(dims, -1) - return super().reset(dims, prob) + ret = super().reset(dims, prob) + return ret + # self._x = self._random.randint(width) # self._y = self._random.randint(height) # self._x = 0 @@ -68,7 +70,7 @@ def adjust_param(self, **kwargs): def get_action_space(self, dims, num_tiles): return spaces.Discrete(len(self._dirs) + num_tiles) - def update(self, action): + def update(self, action, pos=None): action, self._pos = self.update_pos(action, self._pos) return action, self._pos diff --git a/control_pcgrl/envs/reps/wrappers.py b/control_pcgrl/envs/reps/wrappers.py index 5f7aabd..8bd0c3a 100644 --- a/control_pcgrl/envs/reps/wrappers.py +++ b/control_pcgrl/envs/reps/wrappers.py @@ -3,6 +3,7 @@ from inspect import isclass import logging import math +import json from pdb import set_trace as TT from gym import spaces @@ -20,6 +21,7 @@ from control_pcgrl.envs.reps.narrow_rep import NarrowRepresentation from control_pcgrl.envs.reps.representation import EgocentricRepresentation, Representation from control_pcgrl.envs.reps.turtle_rep import TurtleRepresentation +from control_pcgrl.envs.reps.wide_rep import WideRepresentation # class RepresentationWrapper(Representation): @@ -418,7 +420,7 @@ def render(self, lvl_image, tile_size=16, border_size=None): return super().render(lvl_image, tile_size, border_size) -class MultiAgentRepresentation(RepresentationWrapper): +class MultiAgentWrapper(RepresentationWrapper): agent_colors = [ (255, 255, 255, 255), (0, 255, 0, 255), @@ -429,30 +431,20 @@ class MultiAgentRepresentation(RepresentationWrapper): (0, 255, 255, 255), ] def __init__(self, rep, **kwargs): - self.n_agents = kwargs['multiagent']['n_agents'] + try: + n_agents = kwargs.get('multiagent')['n_agents'] + except TypeError: + n_agents = json.loads(kwargs.get('multiagent').replace('\'', '\"'))['n_agents'] + #self.n_agents = kwargs['multiagent']['n_agents'] + self.n_agents = n_agents self._active_agent = None super().__init__(rep, **kwargs) - + def reset(self, dims, prob, **kwargs): - self._active_agent = None - ret = super().reset(dims, prob, **kwargs) - - # FIXME: specific to turtle - self._positions = np.floor(np.random.random((self.n_agents, len(dims))) * (np.array(dims))).astype(int) - - def update(self, action): - change = False - - # FIXME: mostly specific to turtle - # for i, pos_0 in enumerate(self._positions): - for k, v in action.items(): - i = int(k.split('_')[-1]) - pos_0 = self._positions[i] - change_i, pos = self.update_pos(action[f'agent_{i}'], pos_0) - change = change or change_i - self._positions[i] = pos - - return change, self._positions + super().reset(dims, prob, **kwargs) + + def update(self): + raise NotImplementedError("This must be overriden by a child class") def render(self, lvl_image, tile_size=16, border_size=None): @@ -465,13 +457,13 @@ def render(self, lvl_image, tile_size=16, border_size=None): (x+border_size[0]+1)*tile_size,(y+border_size[1]+1)*tile_size), x_graphics) return lvl_image - def get_observation(self, *args, **kwargs): + def get_observation(self, *args, all_agents=False, **kwargs): # Note that this returns a dummy/meaningless position that never changes... base_obs = super().get_observation(*args, **kwargs) agent_name = self._active_agent multiagent_obs = {} - if agent_name is None: + if agent_name is None or all_agents: for i in range(self.n_agents): obs_i = base_obs.copy() obs_i['pos'] = self._positions[i] @@ -483,10 +475,93 @@ def get_observation(self, *args, **kwargs): return multiagent_obs # base_obs['pos'] = self._positions[int(agent_name.split('_')[-1])] # return base_obs - + def set_active_agent(self, agent_name): self._active_agent = agent_name + +class MultiAgentTurtleRepresentation(MultiAgentWrapper): + def __init__(self, rep, **kwargs): + super().__init__(rep, **kwargs) + + def reset(self, dims, prob, **kwargs): + super().reset(dims, prob, **kwargs) + self._positions = np.floor(np.random.random((self.n_agents, len(dims))) * (np.array(dims))).astype(int) + self.heatmaps = np.zeros((self.n_agents, 16, 16)) + + def update(self, action): + change = False + + # FIXME: mostly specific to turtle + # for i, pos_0 in enumerate(self._positions): + for k, v in action.items(): + i = int(k.split('_')[-1]) + pos_0 = self._positions[i] + change_i, pos = self.update_pos(action[f'agent_{i}'], pos_0) + if change_i: + y, x = pos_0[1], pos_0[0] + self.heatmaps[i][y][x] += 1 + change = change or change_i + self._positions[i] = pos + + return change, self._positions + + def get_positions(self): + return self._positions + +class MultiAgentNarrowRepresentation(MultiAgentWrapper): + + def __init__(self, rep, **kwargs): + super().__init__(rep, **kwargs) + self.heatmaps = np.zeros((self.n_agents, 16, 16)) + + def reset(self, dims, prob, **kwargs): + self.rep.reset(dims, prob, **kwargs) + self.coords = self.get_act_coords() + self._n_steps = {i: i for i in range(self.n_agents)} + self._positions = np.array([self.coords[i] for i in range(self.n_agents)]) + #self._positions = {i: self.coords[i] for i in range(self.n_agents)} + + def update(self, action): + change = False + for agent, act in action.items(): + i = int(agent.split('_')[-1]) + self.rep.n_step = self._n_steps[i] + self.rep._pos = tuple(self.coords[self.n_step]) + change_i, _ = self.rep.update(act) + self._n_steps[i] += 1 + if self._n_steps[i] == len(self.coords): + self._n_steps[i] = 0 + if change_i: + y, x = self.rep._pos[1], self.rep._pos[0] + self.heatmaps[i][y][x] += 1 + self._positions[i] = self.coords[self._n_steps[i]] + change = change or change_i + return change, self.get_positions() + + def get_positions(self): + return self._positions + +class MultiAgentWideRepresentation(MultiAgentWrapper): + + def __init__(self, rep, **kwargs): + super().__init__(rep, **kwargs) + + def reset(self, dims, prob, **kwargs): + self.rep.reset(dims, prob, **kwargs) + # store the last known positions of the agents + self._positions = {i: i for i in range(self.n_agents)} + + def update(self, actions): + change = False + positions = [] + for agent, act in actions.items(): + change_i, pos = self.rep.update(act) + positions.append(pos) + change = change or change_i + self._positions = positions + return change, self._positions + def wrap_rep(rep: Representation, prob_cls: Problem, map_dims: tuple, static_build = False, multi = False, **kwargs): """Should only happen once!""" if multi: @@ -519,11 +594,28 @@ def wrap_rep(rep: Representation, prob_cls: Problem, map_dims: tuple, static_bui else: rep = HoleyRepresentation(rep, **kwargs) - - if kwargs.get("multiagent")['n_agents'] != 0: - if not issubclass(type(rep), TurtleRepresentation): + + try: + n_agents = kwargs.get('multiagent')['n_agents'] + except TypeError: + n_agents = json.loads(kwargs.get('multiagent').replace('\'', '\"'))['n_agents'] + #if isinstance(kwargs.get('multiagent'), str): + # kwargs['multiagent'] = json.loads(kwargs.get('multiagent').replace('\'', '\"')) + # #import pdb; pdb.set_trace() + #if not isinstance(multiagent_config, int): + # import pdb; pdb.set_trace() + if n_agents != 0: + #if kwargs.get("multiagent")['n_agents'] != 0: + if issubclass(type(rep), TurtleRepresentation): + rep = MultiAgentTurtleRepresentation(rep, **kwargs) + elif issubclass(type(rep), NarrowRepresentation): + rep = MultiAgentNarrowRepresentation(rep, **kwargs) + pass + elif issubclass(type(rep), WideRepresentation): + rep = MultiAgentWideRepresentation(rep, **kwargs) + else: raise NotImplementedError("Multiagent only works with TurtleRepresentation currently") - rep = MultiAgentRepresentation(rep, **kwargs) + return rep @@ -553,4 +645,4 @@ def wrap_rep(rep: Representation, prob_cls: Problem, map_dims: tuple, static_bui # np.random.shuffle(self._act_coords) # self._x, self._y = self._act_coords[self.n_step % len(self._act_coords)] # self.n_step += 1 -# return change, [self._x, self._y] \ No newline at end of file +# return change, [self._x, self._y] diff --git a/control_pcgrl/envs/reps/wrappers_copy.py b/control_pcgrl/envs/reps/wrappers_copy.py new file mode 100644 index 0000000..2e41232 --- /dev/null +++ b/control_pcgrl/envs/reps/wrappers_copy.py @@ -0,0 +1,597 @@ +from abc import ABC +from copy import deepcopy +from collections import OrderedDict +from inspect import isclass +import logging +import math +from pdb import set_trace as TT + +from gym import spaces +import gym +from control_pcgrl.envs import helper_3D +from control_pcgrl.envs.probs.holey_prob import HoleyProblem +from control_pcgrl.envs.probs.minecraft.mc_render import spawn_3D_maze +from control_pcgrl.envs.probs.minecraft.minecraft_3D_rain import Minecraft3Drain +from control_pcgrl.envs.probs.problem import Problem, Problem3D +import numpy as np +from PIL import Image + +from control_pcgrl.envs.helper_3D import gen_random_map as gen_random_map_3D +from control_pcgrl.envs.reps.ca_rep import CARepresentation +from control_pcgrl.envs.reps.narrow_rep import NarrowRepresentation +from control_pcgrl.envs.reps.representation import EgocentricRepresentation, Representation +from control_pcgrl.envs.reps.turtle_rep import TurtleRepresentation + + +# class RepresentationWrapper(Representation): +class RepresentationWrapper(): + def __init__(self, rep: Representation, **kwargs): + self.rep = rep + # TODO: implement below so that they all point to the same object + # self._map = self.rep._map + # self._bordered_map = self.rep._bordered_map # Doing this results in self._borderd_map != self.rep._bordered_map + # self._random_start = self.rep._random_start + + def _set_pos(self, pos): + self.rep._pos = pos + + def adjust_param(self, **kwargs): + return self.rep.adjust_param(**kwargs) + + def update(self, *args, **kwargs): + return self.rep.update(*args, **kwargs) + + def get_observation(self, *args, **kwargs): + return self.rep.get_observation(*args, **kwargs) + + def get_observation_space(self, *args, **kwargs): + return self.rep.get_observation_space(*args, **kwargs) + + def get_action_space(self, *args, **kwargs): + return self.rep.get_action_space(*args, **kwargs) + + def reset(self, *args, **kwargs): + ret = self.rep.reset(*args, **kwargs) + return ret + + def render(self, *args, **kwargs): + return self.rep.render(*args, **kwargs) + + def _update_bordered_map(self): + return self.rep._update_bordered_map() + + def __repr__(self): + return str(self) + + def __getattr__(self, name): + # Removing this check causes errors when serializing this object with pickle. E.g. when using ray for parallel + # environments. Variables that start with underscore will need to be unwrapped manually. + if name.startswith("_"): + raise AttributeError( + "attempted to get missing private attribute '{}'".format(name) + ) + return getattr(self.rep, name) + + # @property + # def spec(self): + # return self.rep.spec + + @classmethod + def class_name(cls): + return cls.__name__ + + # def close(self): + # return self.rep.close() + + def seed(self, seed=None): + return self.rep.seed(seed) + + def __str__(self): + return "<{}{}>".format(type(self).__name__, self.rep) + + @property + def unwrapped(self): + return self.rep.unwrapped + + +# class Representation3DABC(Representation): + + +# class Representation3D(rep_cls, Representation3DABC): +class Representation3D(RepresentationWrapper): + """ + The base class of all the 3D representations + + map in repr are np.array of numbers + """ + _dirs = [(-1,0,0), (1,0,0), (0,-1,0), (0,1,0),(0,0,-1),(0,0,1)] + _gen_random_map = helper_3D.gen_random_map + + # def _update_bordered_map(self): + # self._bordered_map[1:-1, 1:-1, 1:-1] = self._map + + def render(self, map, mode='human', **kwargs): + # TODO: Check if we are Egocentric. If so, render the agent edit. Otherwise, render the whole map (assume cellular) + spawn_3D_maze(map) + # return self.rep.render(mode, **kwargs) + # pass + + + +class HoleyRepresentation(RepresentationWrapper): + def set_holes(self, entrance_coords, exit_coords): + self.entrance_coords, self.exit_coords = entrance_coords, exit_coords + + def dig_holes(self, entrance_coords, exit_coords): + # TODO: Represent start/end differently to accommodate one-way paths. + self.unwrapped._bordered_map[entrance_coords[0], entrance_coords[1]] = self.unwrapped._empty_tile + self.unwrapped._bordered_map[exit_coords[0], exit_coords[1]] = self.unwrapped._empty_tile + + + def update(self, action): + ret = super().update(action) + return ret + + def reset(self, *args, **kwargs): + ret = super().reset(*args, **kwargs) + self.dig_holes(self.entrance_coords, self.exit_coords) + return ret + + def get_observation(self): + obs: dict = super().get_observation() + obs.update( + {'map': self.unwrapped._bordered_map.copy(),} + ) + if 'pos' in obs: + obs['pos'] += 1 # support variable border sizes? + return obs + + def get_observation_space(self, dims, num_tiles): + obs_space = super().get_observation_space(dims, num_tiles) + map_shape = tuple([i + 2 for i in obs_space['map'].shape]) + obs_space.spaces.update({ + "map": spaces.Box(low=0, high=num_tiles-1, dtype=np.uint8, shape=map_shape) + }) + if "pos" in obs_space.spaces: + old_pos_space = obs_space.spaces["pos"] + obs_space.spaces.update({ + "pos": spaces.Box(low=old_pos_space.low + 1, high=old_pos_space.high + 1, \ + dtype=old_pos_space.dtype, shape=old_pos_space.shape) + }) + return obs_space + + +class HoleyRepresentation3D(HoleyRepresentation): + """A 3D variant of the holey representation. Holes on the border of the map are 2 tiles high, to support the + size of the player in our Minecraft-inspired 3D problems.""" + + def dig_holes(self, s, e): + # TODO: Represent start/end differently to accommodate one-way paths. + self.unwrapped._bordered_map[s[0][0]][s[0][1]][s[0][2]] = self.unwrapped._bordered_map[s[1][0]][s[1][1]][s[1][2]] = self.unwrapped._empty_tile + self.unwrapped._bordered_map[e[0][0]][e[0][1]][e[0][2]] = self.unwrapped._bordered_map[e[1][0]][e[1][1]][e[1][2]] = self.unwrapped._empty_tile + + +class StaticBuildRepresentation(RepresentationWrapper): + def __init__(self, rep, **kwargs): + super().__init__(rep, **kwargs) + self.prob_static = 0.0 + self.window = None + + def adjust_param(self, **kwargs): + self.prob_static = kwargs.get('static_prob') + self.n_aux_tiles = kwargs.get('n_aux_tiles') + return super().adjust_param(**kwargs) + + def reset(self, *args, **kwargs): + ret = super().reset(*args, **kwargs) + # Uniformly sample a probability of static builds from within the range [0, self.prob_static] + prob_static = self.unwrapped._random.random() * self.prob_static + # TODO: take into account validity constraints on number of certain tiles + self.static_builds = (self.unwrapped._random.random(self.unwrapped._bordered_map.shape) < prob_static).astype(np.uint8) + # Borders are always static + self.static_builds[(0, -1), :] = 1 + self.static_builds[:, (0, -1)] = 1 + + # Remove any action coordinates that correspond to static tiles (unless we have aux chans, in which case + # we'll let the agent leave messages for itself on those channels, even on static tiles.) + # NOTE: We only have `_act_coords` for narrow representation. Can we make this cleaner? + if hasattr(self, '_act_coords') and self.n_aux_tiles == 0: + self._act_coords = self._act_coords[np.where( + self.static_builds[self._act_coords[:, 0], self._act_coords[:, 1]] == 0)] + return ret + + def get_observation_space(self, dims, num_tiles): + obs_space = super().get_observation_space(dims, num_tiles) + obs_space.spaces.update({ + 'static_builds': spaces.Box(low=0, high=1, dtype=np.uint8, shape=dims) + }) + return obs_space + + def get_observation(self): + obs = super().get_observation() + obs.update({ + 'static_builds': self.static_builds, + }) + return obs + + def render(self, lvl_image, tile_size, border_size=None): + lvl_image = super().render(lvl_image, tile_size, border_size) + im_arr = np.zeros((tile_size, tile_size, 4), dtype=np.uint8) + clr = (255, 0, 0, 255) + im_arr[(0, 1, -1, -2), :, :] = im_arr[:, (0, 1, -1, -2), :] = clr + x_graphics = Image.fromarray(im_arr) + + for (y, x) in np.argwhere(self.static_builds[1:-1, 1:-1] == 1): + y, x = y + 1, x + 1 # ignoring the border + lvl_image.paste(x_graphics, ((x+border_size[0]-1)*tile_size, (y+border_size[1]-1)*tile_size, + (x+border_size[0])*tile_size,(y+border_size[1])*tile_size), x_graphics) + + # if not hasattr(self, 'window'): + # self.window = cv2.namedWindow('static builds', cv2.WINDOW_NORMAL) + # cv2.resize('static builds', 100, 800) + # cv2.waitKey(1) + # im = self.static_builds.copy() + # cv2.imshow('static builds', im * 255) + # cv2.waitKey(1) + + return lvl_image + + # update = { + # CARepresentationHoley: update_ca_holey, + # }[rep_cls] + + def update(self, action, **kwargs): + old_state = self.unwrapped._bordered_map.copy() + change, pos = super().update(action, **kwargs) + new_state = self.unwrapped._bordered_map + # assert not(np.all(old_state == new_state)) + self.unwrapped._bordered_map = np.where(self.static_builds < 1, new_state, old_state) + # print(self._bordered_map) + self.unwrapped._map = self.unwrapped._bordered_map[ + tuple([slice(1, -1) for _ in range(len(self.unwrapped._map.shape))])] + change = np.any(old_state != new_state) + return change, pos + + +class RainRepresentation(RepresentationWrapper): + def get_action_space(self, dims, num_tiles): + # Need no-op because raining sand/acid will always change map (if column is not empty). + return spaces.Discrete(num_tiles + 1) + + # TODO: + def update(self, action, **kwargs): + # FIXME: Assuming a narrow representation! + change, pos = super().update(action, **kwargs) + if change: + self.unwrapped._map[pos[0], pos[1]] = self.unwrapped._empty_tile + return change, pos + + def render(self, map, mode='human', **kwargs): + # TODO: just place a sand block at the top + spawn_3D_maze(map) + + +class MultiActionRepresentation(RepresentationWrapper): + ''' + A wrapper that makes the action space change multiple tiles at each time step. Maybe useful for all representations + (for 2D, 3D, narrow, turtle, wide, ca, ...). + NOW JUST FOR EGOCENTRIC REPRESENTATIONS. + ''' + def _set_inner_padding(self, action_size): + """These are like buffers. The agent should not be centered on these buffers because it will act on them anyway + when at either edge of the map. + For any odd action patch, these are equal (e.g., for 3, they are both 1). For, e.g. 4, they are 1 and 2. + We define a left/right (bottom/top, close/far) pair for each map dimension.""" + self.inner_l_pads = np.floor((action_size - 1) / 2).astype(int) + self.inner_r_pads = np.ceil((action_size - 1) / 2).astype(int) + + def __init__(self, rep, map_dims, **kwargs): + super().__init__(rep, **kwargs) + self.action_size = np.array(kwargs.get('action_size')) # if we arrive here, there must be an action_size in kwargs + self._set_inner_padding(self.action_size) + self.map_size = map_dims # map_dims is a tuple (height, width, n_tiles) in 2D + self.map_dim = len(map_dims[:-1]) # 2 for 2D, 3 for 3D + self.strides = np.ones(len(self.map_size[:-1]), dtype=np.int32) * 3 # strides are just 3 for each dimension now + + # We should not set this here. This is defined in the underlying representation class. In this underlying class, + # it is initialized on `reset`. + # self._act_coords = None + + # Check the action size is the same dimension as the map + assert self.map_dim == len(self.action_size), \ + f"Action size ({len(self.action_size)}) should be the same dimension as the map size ({self.map_dim})" + # Check whether we have a valid action size and stride + for i in range(self.map_dim): + logging.warning(f"Not validating your action size ({self.action_size}) and stride ({self.strides}, w.r.t." + + " the map size ({self.map_size}). If these are mismatches, the agent may not be able to edit the bottom" + + " right/far edges of the map.") + # FIXME: below assertion is thrown whenever stride = 1 and action_size > 1. But these are valid settings. + # assert self.map_size[i] - self.action_size[i] + self.strides[i] == self.map_size[i] * self.strides[i], \ + # "Please make sure that the action size and stride are valid for the map size." + + # NOTE: This function will not be called by the object we are wrapping. (Like it would be if we + # inherited from it instead.) So we'll be gross, and overwrite this function in the wrapped class manually. + self.unwrapped.get_act_coords = self.get_act_coords + + def get_action_space(self, *args, **kwargs): + # the tiles inside the action are not neccearily the same + action_space = [] + for i in range(math.prod(self.action_size)): + action_space.append(self.map_size[-1]) + return spaces.MultiDiscrete(action_space) + + # This gets overwritten in the wrapped class in `__init__` above. + def get_act_coords(self): + ''' + Get the coordinates of the action space. Regards the top left corner's coordinate (the smallest coords in the + action block) as the coordinate of current action. + + The formula of calculating the size of 2d convolutional layer is: + (W-F+2P)/S + 1 + where W is the width of input (the map size here), F is the width of filter (action_size here), + P is the padding (0 here), S is the stride. To get the same size of input and output, we have: + (W-F)/S + 1 = W + => W - F + S = W * S for each dimension + ''' + coords = [] + for i in range(self.map_dim): + coords.append(np.arange(self.inner_l_pads[i], self.map_size[i] - self.inner_r_pads[i], self.strides[i])) + act_coords = np.array(np.meshgrid(*coords)).T.reshape(-1, self.map_dim) # tobe checked! copilot writes this but looks good + act_coords = np.flip(act_coords, axis=1) # E.g., in 2D, scan horizontally first. + return act_coords + + + def update(self, action, **kwargs): + ''' + Update the map according to the action, the action is a vector of size action_size + + In previous narrow_multi representation, the action is also a vector (MultiDiscrete). However the action + outside the map will be discarded (do I understand it right?). This will make the entries of the action space + sometimes crucial (inside the map) and sometimes trivial (outside the map). This is not good for RL. (copilot agree + this is not good) + ''' + # unravel the action from a vector to a matrix (of size action_size) + action = action.reshape(self.action_size) + + old_state = self.unwrapped._map.copy() + + # replace the map at self._pos with the action TODO: is there any better way to make it dimension independent? (sam: yes. Slices!) (copilot: yes, np.take_along_axis)(zehua:is this right?) (copilot:I think so) + _pos = self.unwrapped._pos # Why is _pos private again? (copilot: I don't know) Ok thanks copilot. (copilot: you're welcome) + + # Let's center the action patch around _pos. If the agent's observation is centered + # around _pos, then we want the action patch to be centered around _pos as well. + # The inner padding tells us how many tiles can be acted on to the left/right of _pos. + top_left = _pos - self.inner_l_pads + bottom_right = _pos + self.inner_r_pads + + slices = [slice(top_left[i], bottom_right[i] + 1) for i in range(self.map_dim)] + # (zehua: yes use slices!) + # not tested: map[tuple(starmap(slice, zip(top_left, bottom_right)))] = action + # Or this(more similar to sam's code but single line): map[tuple(slice(*indexes) for indexes in zip(top_left, bottom_right))] = action + + ### Some checks for safety (could comment these out later). ### + # Check that the action patch is within the map. + assert np.all(top_left >= 0), \ + f"Action patch is outside the map. Top left corner: {top_left}" + assert np.all(bottom_right < self.map_size[:-1]), \ + f"Action patch is outside the map. Bottom right corner: {bottom_right}" + ################################################################ + + self.unwrapped._map[tuple(slices)] = action + # if self.map_dim == 2: + # self.unwrapped._map[top_left[0]:bottom_right[0]+1, top_left[1]:bottom_right[1]+1] = action + # elif self.map_dim == 3: + # self.unwrapped._map[top_left[0]:bottom_right[0]+1, top_left[1]:bottom_right[1]+1, top_left[2]:bottom_right[2]+1] = action + + + new_state = self.unwrapped._map + if self.unwrapped._random_tile: + if self.unwrapped.n_step == len(self._act_coords): + np.random.shuffle(self._act_coords) + + # self._set_pos(self.unwrapped._act_coords[self.n_step % len(self.unwrapped._act_coords)]) + self._set_pos(self.get_pos_at_step(self.n_step)) + self.unwrapped.n_step += 1 + + self.unwrapped._bordered_map[tuple([slice(1, -1) for _ in range(len(self.unwrapped._map.shape))])] = self.unwrapped._map + + change = np.any(old_state != new_state) + + return change, self.unwrapped._pos + + def render(self, lvl_image, tile_size=16, border_size=None): + y, x = self.get_pos() + # This is a little image with our border in it + im_arr = np.zeros((tile_size * self.action_size[0], tile_size * self.action_size[1], 4), dtype=np.uint8) + # Grey color + clr = np.array([128, 128, 128, 255], dtype=np.uint8) + # Two pixels on each side for column + im_arr[(0, 1, -1, -2), :, :] = clr + # Two pixels on each side for row + im_arr[:, (0, 1, -1, -2), :] = clr + x_graphics = Image.fromarray(im_arr) + # Paste our border image into the level image at the agent's position + lvl_image.paste(x_graphics, ( + # Left corner of the image we're pasting in + (x+border_size[0]-self.inner_l_pads[0])*tile_size, (y+border_size[1]-self.inner_l_pads[1])*tile_size, + # Right corner + (x+border_size[0]+self.inner_r_pads[0]+1)*tile_size, (y+border_size[1]+self.inner_r_pads[1]+1)*tile_size), x_graphics) + return super().render(lvl_image, tile_size, border_size) + + +class MultiAgentRepresentation(RepresentationWrapper): + agent_colors = [ + (255, 255, 255, 255), + (0, 255, 0, 255), + (255, 0, 0, 255), + (0, 0, 255, 255), + (255, 255, 0, 255), + (255, 0, 255, 255), + (0, 255, 255, 255), + ] + def __init__(self, rep, **kwargs): + self.n_agents = kwargs['multiagent']['n_agents'] + # create a single representation for each agent + # all representations share maps + self._rep = rep + self.reps = {f'agent_{i}': deepcopy(rep) for i in range(self.n_agents)} + self._active_agent = None + super().__init__(rep, **kwargs) + + def get_rep_map(self): + return self.reps['agent_0']._map + + def reset(self, dims, prob, **kwargs): + self._active_agent = None + shared_map = None + for agent, r in self.reps.items(): + r.reset(dims, prob, **kwargs) + if shared_map is None: + shared_map = r._map + else: + r._map = shared_map + # default to random initialization + import pdb; pdb.set_trace() + r._pos = [int(r._random.random() * i) for i in dims] + + super().reset(dims, prob, **kwargs) + self.unwrapped._map = shared_map + + # FIXME: specific to turtle + #self._positions = np.floor(np.random.random((self.n_agents, len(dims))) * (np.array(dims))).astype(int) + + #def update(self, action): + # change = False + + # # FIXME: mostly specific to turtle + # # for i, pos_0 in enumerate(self._positions): + # for k, v in action.items(): + # i = int(k.split('_')[-1]) + # pos_0 = self._positions[i] + # change_i, pos = self.update_pos(action[f'agent_{i}'], pos_0) + # change = change or change_i + # self._positions[i] = pos + + # return change, self._positions + + def update(self, action): + change = False + for k, v in action.items(): + change_i, new_pos = self.reps[k].update(v) + #i = int(k.split('_')[-1]) + #self.rep._pos = self._positions[i] + #change_i, new_pos = self.rep.update(v) + change = change or change_i + self.reps[k]._pos = new_pos + for r in self.reps: + r._map = self.reps[k]._map + self._map = self.reps[k]._map + #self._positions[i] = new_pos + return change, self._positions + + def render(self, lvl_image, tile_size=16, border_size=None): + + for (y, x), clr in zip(self._positions, self.agent_colors): + im_arr = np.zeros((tile_size, tile_size, 4), dtype=np.uint8) + + im_arr[(0, 1, -1, -2), :, :] = im_arr[:, (0, 1, -1, -2), :] = clr + x_graphics = Image.fromarray(im_arr) + lvl_image.paste(x_graphics, ((x+border_size[0])*tile_size, (y+border_size[1])*tile_size, + (x+border_size[0]+1)*tile_size,(y+border_size[1]+1)*tile_size), x_graphics) + return lvl_image + + def get_positions(self): + return [r._pos for _, r in self.reps.items()] + + def get_observation(self, *args, **kwargs): + # Note that this returns a dummy/meaningless position that never changes... + base_obs = super().get_observation(*args, **kwargs) + + agent_name = self._active_agent + multiagent_obs = {} + if agent_name is None: + for agent, r in self.reps.items(): + multiagent_obs[agent] = r.get_observation(*args, **kwargs) + #for i in range(self.n_agents): + # obs_i = base_obs.copy() + # obs_i['pos'] = self._positions[i] + # multiagent_obs[f'agent_{i}'] = obs_i + return multiagent_obs + else: + multiagent_obs[agent_name] = self.reps[agent_name].get_observation(*args, **kwargs) + #multiagent_obs[agent_name] = base_obs + #multiagent_obs[agent_name]['pos'] = self._positions[int(agent_name.split('_')[-1])] + return multiagent_obs + # base_obs['pos'] = self._positions[int(agent_name.split('_')[-1])] + # return base_obs + + def set_active_agent(self, agent_name): + self._active_agent = agent_name + +def wrap_rep(rep: Representation, prob_cls: Problem, map_dims: tuple, static_build = False, multi = False, **kwargs): + """Should only happen once!""" + if multi: + rep = MultiActionRepresentation(rep, map_dims, **kwargs) + + if static_build: + # rep_cls = StaticBuildRepresentation(rep_cls) + rep = StaticBuildRepresentation(rep, **kwargs) + + + # FIXME: this is a hack to make sure that rep_cls is a class name but not an object + # rep_cls = rep_cls if isclass(rep_cls) else type(rep_cls) + # if issubclass(prob_cls, Minecraft3Drain): + # rep = RainRepresentation(rep) + if issubclass(prob_cls, Problem3D): + rep = Representation3D(rep, **kwargs) + # rep_cls = wrap_3D(rep_cls) + # if issubclass(rep_cls, EgocentricRepresentation): + # rep_cls = EgocentricRepresentation3D() + # else: + # rep_cls = Representation3D(rep_cls) + + # FIXME: this is a hack to make sure that rep_cls is a class name but not an object + # rep_cls = rep_cls if isclass(rep_cls) else type(rep_cls) + # if issubclass(prob_cls, HoleyProblem) and not issubclass(type(rep), HoleyRepresentation): + if issubclass(prob_cls, HoleyProblem): + + if issubclass(prob_cls, Problem3D): + rep = HoleyRepresentation3D(rep, **kwargs) + + else: + rep = HoleyRepresentation(rep, **kwargs) + + if kwargs.get("multiagent")['n_agents'] != 0: + #if not issubclass(type(rep), TurtleRepresentation): + # raise NotImplementedError("Multiagent only works with TurtleRepresentation currently") + rep = MultiAgentRepresentation(rep, **kwargs) + + return rep + + + + +# def update_ca_holey(self, action, **kwargs): +# old_state = self._bordered_map.copy() +# change, pos = CARepresentationHoley.update(self, action, **kwargs) +# new_state = self._bordered_map +# # assert not(np.all(old_state == new_state)) +# self._bordered_map = np.where(self.static_builds < 1, new_state, old_state) +# # print(self._bordered_map) +# self._map = self._bordered_map[1:-1, 1:-1] +# change = np.any(old_state != new_state) +# return change, pos + + +# def update_narrow_holey(self, action, **kwargs): +# change = 0 +# if action > 0: +# change += [0,1][self._map[self._y][self._x] != action-1] +# self._map[self._y][self._x] = action-1 +# self._bordered_map[self._y+1][self._x+1] = action-1 +# if self._random_tile: +# if self.n_step == len(self._act_coords): +# np.random.shuffle(self._act_coords) +# self._x, self._y = self._act_coords[self.n_step % len(self._act_coords)] +# self.n_step += 1 +# return change, [self._x, self._y] \ No newline at end of file diff --git a/control_pcgrl/rl/callbacks.py b/control_pcgrl/rl/callbacks.py index d9ef49f..e119209 100644 --- a/control_pcgrl/rl/callbacks.py +++ b/control_pcgrl/rl/callbacks.py @@ -40,8 +40,11 @@ def on_episode_start( f'{k}-trg': None, }) for k in env.metrics: + if k == 'solution': + continue episode.hist_data.update({f'{k}-val': None, }) + if self.holey: episode.hist_data.update({ 'holes_start': None, @@ -88,13 +91,15 @@ def on_episode_end( # 'path-length': np.mean(path_lengths), # } env = base_env.get_sub_environments()[env_index] - episode_stats = env.unwrapped._rep_stats + unwrapped = env._unwrapped if hasattr(env, '_unwrapped') else env.unwrapped + episode_stats = unwrapped._rep_stats # stats_list = ['regions', 'connectivity', 'path-length'] # write to tensorboard file (if enabled) # episode.hist_data.update({k: [v] for k, v in episode_stats.items()}) - episode.custom_metrics.update({k: [v] for k, v in episode_stats.items()}) + episode.custom_metrics.update({k: [v] for k, v in episode_stats.items() if k != 'solution'}) + # TODO: log ctrl targets and success rate as heatmap: x is timestep, y is ctrl target, heatmap is success rate @@ -104,12 +109,17 @@ def on_episode_end( f'{k}-trg': [env.metric_trgs[k]], # rllib needs these values to be lists :) }) for k in env.metrics: + # avoid adding non-numeric values + #if isinstance(env.metrics[k], int) or isinstance(env.metrics[k], float): + if k == 'solution': + continue episode.hist_data.update({f'{k}-val': [env.metrics[k]],}) + # episode.hist_data.update({k: [v] for k, v in episode_stats.items() if k in stats_list}) # episode.custom_metrics.update({k: [v] for k, v in episode_stats.items() if k in stats_list}) - if hasattr(env.unwrapped._prob, '_hole_queue'): + if hasattr(unwrapped._prob, '_hole_queue'): entrance_coords, exit_coords = env.unwrapped._prob.entrance_coords, env.unwrapped._prob.exit_coords if len(entrance_coords.shape) == 1: # Then it's 2D. @@ -120,6 +130,6 @@ def on_episode_end( else: # Just record the foot-room if 3D episode.hist_data.update({ - 'holes_start': [tuple(env.unwrapped._prob.entrance_coords[0])], - 'holes_end': [tuple(env.unwrapped._prob.exit_coords[0])], + 'holes_start': [tuple(unwrapped._prob.entrance_coords[0])], + 'holes_end': [tuple(unwrapped._prob.exit_coords[0])], }) diff --git a/control_pcgrl/rl/envs.py b/control_pcgrl/rl/envs.py index 26b81de..7007ea3 100644 --- a/control_pcgrl/rl/envs.py +++ b/control_pcgrl/rl/envs.py @@ -2,6 +2,7 @@ from collections import namedtuple import os from pdb import set_trace as TT +import json from typing import Dict from control_pcgrl import wrappers @@ -84,7 +85,12 @@ def make_env(cfg): # # RenderMonitor must come last # env = RenderMonitor(env, rank, log_dir, **kwargs) - if cfg.multiagent.n_agents != 0: + try: + n_agents = cfg_dict['multiagent']['n_agents'] + except TypeError: + n_agents = json.loads(cfg_dict['multiagent'].replace('\'', '\"'))['n_agents'] + + if n_agents != 0: env = wrappers.MultiAgentWrapper(env, **cfg_dict) return env diff --git a/control_pcgrl/rl/evaluate.py b/control_pcgrl/rl/evaluate.py index fb26f2f..fdd9382 100644 --- a/control_pcgrl/rl/evaluate.py +++ b/control_pcgrl/rl/evaluate.py @@ -15,9 +15,9 @@ LOAD_STATS = True -CONTROL_DOORS = True +CONTROL_DOORS = False CONTROLS = False -GENERAL_EVAL = False +GENERAL_EVAL = True def evaluate(trainer, env, cfg): diff --git a/control_pcgrl/rl/models.py b/control_pcgrl/rl/models.py index f788875..566c8f5 100644 --- a/control_pcgrl/rl/models.py +++ b/control_pcgrl/rl/models.py @@ -1,4 +1,5 @@ from typing import Dict, List +import json from einops import rearrange import numpy as np @@ -31,6 +32,7 @@ def __init__(self, # self.obs_size = get_preprocessor(obs_space)(obs_space).size obs_shape = obs_space.shape + self.img_shape = obs_shape obs_shape = (obs_shape[2], obs_shape[0], obs_shape[1]) self.fc_size = fc_size @@ -52,6 +54,11 @@ def value_function(self): return th.reshape(self.value_branch(self._features), [-1]) def forward(self, input_dict, state, seq_lens): + #raise ValueError(input_dict['obs'].shape) + input_dict['obs'] = input_dict['obs'].reshape( + input_dict['obs'].size(0), + *self.img_shape + ) input = input_dict["obs"].permute(0, 3, 1, 2) # Because rllib order tensors the tensorflow way (channel last) x = nn.functional.relu(self.conv_1(input.float())) x = nn.functional.relu(self.conv_2(x)) @@ -135,7 +142,9 @@ def __init__(self, # self.n_aux_chan = n_aux_chan self.conv_filters = conv_filters # self.obs_size = get_preprocessor(obs_space)(obs_space).size - obs_shape = obs_space.shape + obs_shape = (32, 32, 3) + #obs_shape = obs_space.shape + self.obs_shape = obs_shape # orig_obs_space = model_config['custom_model_config']['orig_obs_space'] # obs_shape = orig_obs_space['map'].shape # metrics_size = orig_obs_space['ctrl_metrics'].shape \ @@ -170,6 +179,11 @@ def value_function(self): return th.reshape(self.value_branch(self._features), [-1]) def forward(self, input_dict, state, seq_lens): + #import pdb; pdb.set_trace() + input_dict['obs'] = input_dict['obs'].reshape( + input_dict['obs'].size(0), + *self.obs_shape + ) input = input_dict['obs'].permute(0, 3, 1, 2) # input = th.cat([input, self._last_aux_activ], dim=1) x = nn.functional.relu(self.conv_1(input.float())) @@ -258,8 +272,8 @@ def forward(self, input_dict, state, seq_lens): x = self.fc_1(x) self._features = x x = x.reshape(*pre_fc_shape) - x = nn.functional.relu(self.deconv_1(x)) - x = x + x1 + x = nn.functional.relu(self.deconv_1(x)) + x = x.repeat(1, 1, 2, 2) + x1 x = nn.functional.relu(self.deconv_2(x)) action_out = x.reshape(x.size(0), -1) diff --git a/control_pcgrl/rl/rllib_utils.py b/control_pcgrl/rl/rllib_utils.py index 7d417a4..be48a0b 100644 --- a/control_pcgrl/rl/rllib_utils.py +++ b/control_pcgrl/rl/rllib_utils.py @@ -1,6 +1,7 @@ from pdb import set_trace as TT import numpy as np from ray.rllib.algorithms.ppo import PPO as RlLibPPOTrainer +from ray.rllib.algorithms.qmix import QMix as RlLibQMIXTrainer import torchinfo import torch as th @@ -16,130 +17,156 @@ # done = np.any(self.num_timesteps > old_num_timesteps) # return done - -class PPOTrainer(RlLibPPOTrainer): - log_keys = ['episode_reward_max', 'episode_reward_mean', 'episode_reward_min', 'episode_len_mean'] - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - # wandb.init(**self.config['wandb']) - self.checkpoint_path_file = kwargs['config']['checkpoint_path_file'] - self.ctrl_metrics = self.config['env_config']['controls'] - self.ctrl_metrics = {} if self.ctrl_metrics is None else self.ctrl_metrics - cbs = self.workers.foreach_env(lambda env: env.unwrapped.cond_bounds) - cbs = [cb for worker_cbs in cbs for cb in worker_cbs if cb is not None] - cond_bounds = cbs[0] - self.metric_ranges = {k: v[1] - v[0] for k, v in cond_bounds.items()} - # self.checkpoint_path_file = checkpoint_path_file - - def setup(self, config): - ret = super().setup(config) - n_params = 0 - param_dict = self.get_weights()['default_policy'] - - for v in param_dict.values(): - n_params += np.prod(v.shape) - model = self.get_policy('default_policy').model - print(f'default_policy has {n_params} parameters.') - print('Model overview(s):') - print(model) - print("=============") - # torchinfo summaries are very confusing at the moment - torchinfo.summary(model, input_data={ - "input_dict": {"obs": th.zeros((1, *self.config['model']['custom_model_config']['dummy_env_obs_space'].shape))}}) - return ret - - @classmethod - def get_default_config(cls): - # def_cfg = super().get_default_config() - def_cfg = RlLibPPOTrainer.get_default_config() - def_cfg.update({ - 'checkpoint_path_file': None, - 'wandb': { - 'project': 'PCGRL', - 'name': 'default_name', - 'id': 'default_id', - }, - }) - return def_cfg - - def save(self, *args, **kwargs): - ckp_path = super().save(*args, **kwargs) - with open(self.checkpoint_path_file, 'w') as f: - f.write(ckp_path) - return ckp_path - - # @wandb_mixin - def train(self, *args, **kwargs): - result = super().train(*args, **kwargs) - log_result = {k: v for k, v in result.items() if k in self.log_keys} - log_result['info: learner:'] = result['info']['learner'] - - # Either doing multi-agent... - if 'num_agent_steps_sampled_this_iter' in result: - result['fps'] = result['num_agent_steps_trained_this_iter'] / result['time_this_iter_s'] - # or single-agent. +def ControllablaTrainerFactory(trainer): + if isinstance(trainer, str): + if trainer.lower() == 'ppo': + trainer = RlLibPPOTrainer + elif trainer == 'QMIX': + trainer = RlLibQMIXTrainer else: - result['fps'] = result['num_env_steps_trained_this_iter'] / result['time_this_iter_s'] - - # TODO: Send a heatmap to tb/wandb representing success reaching various control targets? - if len(result['custom_metrics']) > 0: - n_bins = 20 - result['custom_plots'] = {} - for metric in self.ctrl_metrics: - - # Scatter plots via wandb - # trgs = result['hist_stats'][f'{metric}-trg'] - # vals = result['hist_stats'][f'{metric}-val'] - # data = [[x, y] for (x, y) in zip(trgs, vals)] - # table = wandb.Table(data=data, columns=['trg', 'val']) - # scatter = wandb.plot.scatter(table, "trg", "val", title=f"{metric}-trg-val") - # result['custom_plots']["scatter_{}".format(metric)] = scatter - # scatter.save(f"{metric}-trg-val.png") - # wandb.log({f'{metric}-scc': scatter}, step=self.iteration) - - # Spoofed histograms - # FIXME: weird interpolation behavior here??? - bin_size = self.metric_ranges[metric] / n_bins # 30 is the default number of tensorboard histogram bins (HACK) - trg_dict = {} - - for i, trg in enumerate(result['hist_stats'][f'{metric}-trg']): - val = result['hist_stats'][f'{metric}-val'][i] - scc = 1 - abs(val - trg) / self.metric_ranges[metric] - trg_bin = trg // bin_size - if trg not in trg_dict: - trg_dict[trg_bin] = [scc] - else: - trg_dict[trg_bin] += [scc] - # Get average success rate in meeting each target. - trg_dict = {k: np.mean(v) for k, v in trg_dict.items()} - # Repeat each target based on how successful we were in reaching it. (Appears at least once if sampled) - spoof_data = [[trg * bin_size] * (1 + int(20 * scc)) for trg, scc in trg_dict.items()] - spoof_data = [e for ee in spoof_data for e in ee] # flatten the list - result['hist_stats'][f'{metric}-scc'] = spoof_data - - # Make a heatmap. - # ax, fig = plt.subplots(figsize=(10, 10)) - # data = np.zeros(n_bins) - # for trg, scc in trg_dict.items(): - # data[trg] = scc - # wandb.log({f'{metric}-scc': wandb.Histogram(data, n_bins=n_bins)}) - - # plt.imshow(data, cmap='hot') - # plt.savefig(f'{metric}.png') - - - - # for k, v in result['hist_stats'].items(): - # if '-trg' in k or '-val' in k: - # result['custom_metrics'][k] = [v] - - # print('-----------------------------------------') - # print(pretty_print(log_result)) - return result - - def evaluate(self): - # TODO: Set the evaluation maps here! - # self.eval_workers.foreach_env_with_context(fn) - result = super().evaluate() - return result + raise ValueError( + 'Unsupported trainer type. ' + \ + 'Acceptable arguments are {PPO, QMIX}. '+ \ + 'For custom trainers, pass a trainer object as a parameter') + + """ + Wrap trainer object with extra logging and custom metric checkpointing + """ + class Trainer(trainer): + log_keys = ['episode_reward_max', 'episode_reward_mean', 'episode_reward_min', 'episode_len_mean'] + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + # wandb.init(**self.config['wandb']) + self.checkpoint_path_file = kwargs['config']['checkpoint_path_file'] + self.ctrl_metrics = self.config['env_config']['controls'] + self.ctrl_metrics = {} if self.ctrl_metrics is None else self.ctrl_metrics + cbs = self.workers.foreach_env(lambda env: env._unwrapped.cond_bounds if hasattr(env, '_unwrapped') else env.unwrapped.cond_bounds) + + cbs = [cb for worker_cbs in cbs for cb in worker_cbs if cb is not None] + cond_bounds = cbs[0] + self.metric_ranges = {k: v[1] - v[0] for k, v in cond_bounds.items()} + #self.checkpoint_path_file = checkpoint_path_file + + def setup(self, config): + #import pdb; pdb.set_trace() + #config['replay_buffer_config'] = {'type': 'ReplayBuffer'} + ret = super().setup(config) + n_params = 0 + #agent_id = config['multiagent'] + multiagent = config.get('multiagent', None) + if multiagent is None: + sample_agent_id = 'default_policy' + else: + sample_agent_id = list(multiagent['policies'].keys())[0] + param_dict = self.get_weights()[sample_agent_id] + + # DOES NOT WORK FOR QMIX MODEL + #for v in param_dict.values(): + # n_params += np.prod(v.shape) + #model = self.get_policy(sample_agent_id).model + #print(f'default_policy has {n_params} parameters.') + #print('Model overview(s):') + #print(model) + #print("=============") + # torchinfo summaries are very confusing at the moment + #torchinfo.summary(model, input_data={ + # "input_dict": {"obs": th.zeros((1, *self.config['model']['custom_model_config']['dummy_env_obs_space'].shape))}}) + return ret + + + @classmethod + def get_default_config(cls): + # def_cfg = super().get_default_config() + def_cfg = trainer.get_default_config() + def_cfg.update({ + 'checkpoint_path_file': None, + 'wandb': { + 'project': 'PCGRL', + 'name': 'default_name', + 'id': 'default_id', + }, + }) + return def_cfg + + def save(self, *args, **kwargs): + ckp_path = super().save(*args, **kwargs) + with open(self.checkpoint_path_file, 'w+') as f: + f.write(ckp_path) + return ckp_path + + # @wandb_mixin + def train(self, *args, **kwargs): + result = super().train(*args, **kwargs) + log_result = {k: v for k, v in result.items() if k in self.log_keys} + log_result['info: learner:'] = result['info']['learner'] + + # Either doing multi-agent... + if 'num_agent_steps_sampled_this_iter' in result: + result['fps'] = result['num_agent_steps_trained_this_iter'] / result['time_this_iter_s'] + # or single-agent. + else: + result['fps'] = result['num_env_steps_trained_this_iter'] / result['time_this_iter_s'] + + # TODO: Send a heatmap to tb/wandb representing success reaching various control targets? + if len(result['custom_metrics']) > 0: + n_bins = 20 + result['custom_plots'] = {} + for metric in self.ctrl_metrics: + # Scatter plots via wandb + # trgs = result['hist_stats'][f'{metric}-trg'] + # vals = result['hist_stats'][f'{metric}-val'] + # data = [[x, y] for (x, y) in zip(trgs, vals)] + # table = wandb.Table(data=data, columns=['trg', 'val']) + # scatter = wandb.plot.scatter(table, "trg", "val", title=f"{metric}-trg-val") + # result['custom_plots']["scatter_{}".format(metric)] = scatter + # scatter.save(f"{metric}-trg-val.png") + # wandb.log({f'{metric}-scc': scatter}, step=self.iteration) + + # Spoofed histograms + # FIXME: weird interpolation behavior here??? + bin_size = self.metric_ranges[metric] / n_bins # 30 is the default number of tensorboard histogram bins (HACK) + trg_dict = {} + + for i, trg in enumerate(result['hist_stats'][f'{metric}-trg']): + val = result['hist_stats'][f'{metric}-val'][i] + scc = 1 - abs(val - trg) / self.metric_ranges[metric] + trg_bin = trg // bin_size + if trg not in trg_dict: + trg_dict[trg_bin] = [scc] + else: + trg_dict[trg_bin] += [scc] + # Get average success rate in meeting each target. + trg_dict = {k: np.mean(v) for k, v in trg_dict.items()} + # Repeat each target based on how successful we were in reaching it. (Appears at least once if sampled) + spoof_data = [[trg * bin_size] * (1 + int(20 * scc)) for trg, scc in trg_dict.items()] + spoof_data = [e for ee in spoof_data for e in ee] # flatten the list + result['hist_stats'][f'{metric}-scc'] = spoof_data + + # Make a heatmap. + # ax, fig = plt.subplots(figsize=(10, 10)) + # data = np.zeros(n_bins) + # for trg, scc in trg_dict.items(): + # data[trg] = scc + # wandb.log({f'{metric}-scc': wandb.Histogram(data, n_bins=n_bins)}) + + # plt.imshow(data, cmap='hot') + # plt.savefig(f'{metric}.png') + + + + # for k, v in result['hist_stats'].items(): + # if '-trg' in k or '-val' in k: + # result['custom_metrics'][k] = [v] + + # print('-----------------------------------------') + # print(pretty_print(log_result)) + return result + + def evaluate(self): + # TODO: Set the evaluation maps here! + # self.eval_workers.foreach_env_with_context(fn) + result = super().evaluate() + return result + + return Trainer diff --git a/control_pcgrl/rl/train_ctrl.py b/control_pcgrl/rl/train_ctrl.py index 01cf106..81eb7f2 100644 --- a/control_pcgrl/rl/train_ctrl.py +++ b/control_pcgrl/rl/train_ctrl.py @@ -1,4 +1,3 @@ - import copy import json import os @@ -12,6 +11,7 @@ from typing import Dict import gym +from tqdm import tqdm import hydra import matplotlib import numpy as np @@ -43,8 +43,8 @@ CustomFeedForwardModel, CustomFeedForwardModel3D, Decoder, DenseNCA, SeqNCA, SeqNCA3D, WideModel3D, WideModel3DSkip) -from control_pcgrl.rl.utils import IdxCounter, get_env_name, get_exp_name, get_map_width -from control_pcgrl.rl.rllib_utils import PPOTrainer +from control_pcgrl.rl.utils import IdxCounter, get_env_name, get_exp_name, get_map_width, TrainerConfigParsers +from control_pcgrl.rl.rllib_utils import ControllablaTrainerFactory from control_pcgrl.configs.config import ControlPCGRLConfig import control_pcgrl from control_pcgrl.envs.probs import PROBLEMS @@ -94,7 +94,12 @@ def main(cfg: ControlPCGRLConfig) -> None: print('env name: ', cfg.env_name) exp_name = get_exp_name(cfg) exp_name_id = f'{exp_name}_{cfg.exp_id}' - cfg.log_dir = log_dir = os.path.join(PROJ_DIR, f'rl_runs/{exp_name_id}_log') + default_dir = os.path.join(PROJ_DIR, 'rl_runs') + cfg.log_dir = log_dir = os.path.join( + cfg.log_dir if cfg.log_dir is not None else default_dir, + cfg.algorithm, + f'{exp_name_id}_log' + ) if not cfg.load: @@ -156,7 +161,8 @@ def main(cfg: ControlPCGRLConfig) -> None: ### DEBUG ### if cfg.debug: - for _ in range(100): + #import pdb; pdb.set_trace() + for _ in tqdm(range(100)): obs = dummy_env.reset() for i in range(500): # if i > 3: @@ -168,6 +174,7 @@ def main(cfg: ControlPCGRLConfig) -> None: # print(obs.transpose(2, 0, 1)[:, 10:-10, 10:-10]) if cfg.render: dummy_env.render() + #import pdb; pdb.set_trace() print('DEBUG: Congratulations! You can now use the environment.') sys.exit() @@ -182,92 +189,57 @@ def main(cfg: ControlPCGRLConfig) -> None: model_cfg.pop('name') if cfg.multiagent.n_agents != 0: - multiagent_config = { - "policies": { - f"default_policy": PolicySpec( + multiagent_config = {} + if cfg.multiagent.policies == "centralized": + multiagent_config['policies'] = { + 'default_policy': PolicySpec( policy_class=None, observation_space=agent_obs_space, action_space=agent_act_space, - config=None,) - }, - "policy_mapping_fn": lambda agent_id: "default_policy", - "count_steps_by": "agent_steps", - } + config=None + ) + } + multiagent_config['policy_mapping_fn'] = lambda agent_id: 'default_policy' + elif cfg.multiagent.policies == "decentralized": + multiagent_config['policies'] = { + f'agent_{i}': PolicySpec( + policy_class=None, + observation_space=agent_obs_space, + action_space=agent_act_space, + config={ + 'custom_model': 'custom_model', + 'custom_model_config': { + "dummy_env_obs_space": copy.copy(agent_obs_space), + **model_cfg, + } + } + ) for i in range(cfg.multiagent.n_agents) + } + multiagent_config['policy_mapping_fn'] = lambda agent_id: agent_id + else: + raise ValueError('Unrecognized policy type. Policy values can either be centralized or decentralized') + + multiagent_config['count_steps_by'] = 'agent_steps' multiagent_config = {"multiagent": multiagent_config} + else: multiagent_config = {} # The rllib trainer config (see the docs here: https://docs.ray.io/en/latest/rllib/rllib-training.html) - trainer_config = { - 'env': 'pcgrl', - **multiagent_config, - 'framework': 'torch', - 'num_workers': num_workers if not (cfg.evaluate or cfg.infer) else 0, - 'num_gpus': cfg.hardware.n_gpu, - 'env_config': { - **cfg, # Maybe env should get its own config? (A subset of the original?) - "evaluation_env": False, - }, - # 'env_config': { - # 'change_percentage': cfg.change_percentage, - # }, - 'num_envs_per_worker': num_envs_per_worker, - 'render_env': cfg.render, - 'lr': cfg.learning_rate, - 'gamma': cfg.gamma, - 'model': { - 'custom_model': 'custom_model', - 'custom_model_config': { - "dummy_env_obs_space": copy.copy(agent_obs_space), - **model_cfg, - }, - }, - "evaluation_interval" : 1 if cfg.evaluate else 1, - "evaluation_duration": max(1, num_workers), - "evaluation_duration_unit": "episodes", - "evaluation_num_workers": eval_num_workers, - "env_task_fn": set_map_fn, - "evaluation_config": { - "env_config": { - **cfg, - "evaluation_env": True, - "num_eval_envs": num_envs_per_worker * eval_num_workers, - }, - "explore": True, - }, - "logger_config": { - # "wandb": { - # "project": "PCGRL", - # "name": exp_name_id, - # "id": exp_name_id, - # "api_key_file": "~/.wandb_api_key" - # }, - **logger_type, - # Optional: Custom logdir (do not define this here - # for using ~/ray_results/...). - "logdir": log_dir, - }, -# "exploration_config": { -# "type": "Curiosity", -# } -# "log_level": "INFO", - # "train_batch_size": 50, - # "sgd_minibatch_size": 50, - 'callbacks': stats_callbacks, - - # To take random actions while changing all tiles at once seems to invite too much chaos. - 'explore': True, - - # `ray.tune` seems to need these spaces specified here. - # 'observation_space': dummy_env.observation_space, - # 'action_space': dummy_env.action_space, - - # 'create_env_on_driver': True, - 'checkpoint_path_file': checkpoint_path_file, - # 'record_env': log_dir, - # 'stfu': True, - 'disable_env_checking': True, - } + num_workers = num_workers if not (cfg.evaluate or cfg.infer) else 1 + trainer_config = TrainerConfigParsers[cfg.algorithm]( + cfg, + agent_obs_space, + log_dir, + logger_type, + stats_callbacks, + checkpoint_path_file, + model_cfg, + multiagent_config, + num_workers=num_workers, + num_envs_per_worker=num_envs_per_worker, + eval_num_workers=eval_num_workers + ) register_env('pcgrl', make_env) @@ -315,7 +287,8 @@ def main(cfg: ControlPCGRLConfig) -> None: # Quit the program before agent starts training. sys.exit() - tune.register_trainable("CustomPPO", PPOTrainer) + #tune.register_trainable("CustomPPO", PPOTrainer) + tune.register_trainable(f"CustomTrainer", ControllablaTrainerFactory(cfg.algorithm)) # Limit the number of rows. reporter = CLIReporter( @@ -347,7 +320,7 @@ def main(cfg: ControlPCGRLConfig) -> None: try: # TODO: ray overwrites the current config with the re-loaded one. How to avoid this? analysis = tune.run( - "CustomPPO", + "CustomTrainer", resume="AUTO" if (cfg.load and not cfg.overwrite) else False, config={ **trainer_config, @@ -355,8 +328,10 @@ def main(cfg: ControlPCGRLConfig) -> None: # checkpoint_score_attr="episode_reward_mean", # TODO: makes timestep total input by user.(n_frame) stop={"timesteps_total": 1e10}, + mode='max', + checkpoint_score_attr='episode_reward_mean', checkpoint_at_end=True, - checkpoint_freq=10, + checkpoint_freq=1, keep_checkpoints_num=2, local_dir=log_dir, verbose=1, diff --git a/control_pcgrl/rl/utils.py b/control_pcgrl/rl/utils.py index 2312223..6eb0010 100644 --- a/control_pcgrl/rl/utils.py +++ b/control_pcgrl/rl/utils.py @@ -2,18 +2,24 @@ Helper functions for train, infer, and eval modules. """ from pdb import set_trace as TT +import copy import glob import os import ray import re import numpy as np +import gym +from ray.tune import register_env +from gym.spaces import Tuple # from stable_baselines import PPO2 # from stable_baselines.bench import Monitor #from stable_baselines.common.vec_env import DummyVecEnv, SubprocVecEnv from control_pcgrl.configs.config import ControlPCGRLConfig from control_pcgrl import wrappers +from control_pcgrl.task_assignment import set_map_fn +from control_pcgrl.rl.envs import make_env # NOTE: minecraft has to precede zelda since minecraft zelda maze has both phrases in its name. MAP_WIDTHS = [("binary", 16), ("minecraft_3D_rain", 7), ("minecraft_3D", 15), ("zelda", 16), ("sokoban", 5)] @@ -220,8 +226,8 @@ def get_exp_name(cfg: ControlPCGRLConfig): cfg.problem.name, "weights_" + "-".join(f"{k}-{v}" for k, v in cfg.problem.weights.items()), cfg.representation, + cfg.multiagent.policies, # default to single policy ) - exp_name += '/' if cfg.model.name is not None: @@ -316,3 +322,199 @@ def max_exp_idx(exp_name): n = max(log_ns) return int(n) + +def parse_ppo_config( + config, + agent_obs_space, + log_dir, + logger_type, + stats_callbacks, + checkpoint_path_file, + model_cfg, + multiagent_config={}, + **kwargs + ): + num_workers = kwargs.get('num_workers', 0) + num_envs_per_worker = kwargs.get('num_envs_per_worker', 1) + eval_num_workers = kwargs.get('num_workers', 0) + + return { + 'env': 'pcgrl', + **multiagent_config, + 'framework': 'torch', + 'num_workers': num_workers if not (config.evaluate or config.infer) else 0, + 'num_gpus': config.hardware.n_gpu, + 'env_config': { + **config, # Maybe env should get its own config? (A subset of the original?) + "evaluation_env": False, + }, + # 'env_config': { + # 'change_percentage': cfg.change_percentage, + # }, + 'num_envs_per_worker': num_envs_per_worker, + 'render_env': config.render, + 'lr': config.learning_rate, + 'gamma': config.gamma, + 'model': { + 'custom_model': 'custom_model', + 'custom_model_config': { + "dummy_env_obs_space": copy.copy(agent_obs_space), + **model_cfg, + }, + }, + "evaluation_interval" : 1 if config.evaluate else 1, + "evaluation_duration": max(1, num_workers), + "evaluation_duration_unit": "episodes", + "evaluation_num_workers": eval_num_workers, + "env_task_fn": set_map_fn, + "evaluation_config": { + "env_config": { + **config, + "evaluation_env": True, + "num_eval_envs": num_envs_per_worker * eval_num_workers, + }, + "explore": True, + }, + "logger_config": { + # "wandb": { + # "project": "PCGRL", + # "name": exp_name_id, + # "id": exp_name_id, + # "api_key_file": "~/.wandb_api_key" + # }, + **logger_type, + # Optional: Custom logdir (do not define this here + # for using ~/ray_results/...). + "logdir": log_dir, + }, +# "exploration_config": { +# "type": "Curiosity", +# } +# "log_level": "INFO", + # "train_batch_size": 50, + # "sgd_minibatch_size": 50, + 'callbacks': stats_callbacks, + + # To take random actions while changing all tiles at once seems to invite too much chaos. + 'explore': True, + + # `ray.tune` seems to need these spaces specified here. + # 'observation_space': dummy_env.observation_space, + # 'action_space': dummy_env.action_space, + + # 'create_env_on_driver': True, + 'checkpoint_path_file': checkpoint_path_file, + # 'record_env': log_dir, + # 'stfu': True, + 'disable_env_checking': True, + } + + +def make_grouped_env(config): + + n_agents = config.multiagent.n_agents + dummy_env = make_env(config) + groups = {'group_1': list(dummy_env.observation_space.keys())} + obs_space = Tuple(dummy_env.observation_space.values()) + act_space = Tuple(dummy_env.action_space.values()) + #import pdb; pdb.set_trace() + register_env( + 'grouped_pcgrl', + lambda config: wrappers.GroupedEnvironmentWrapper(make_env(config).with_agent_groups( + groups, obs_space=obs_space, act_space=act_space)) + + ) + + +def parse_qmix_config( + config, + agent_obs_space, + log_dir, + logger_type, + stats_callbacks, + checkpoint_path_file, + model_cfg, + multiagent_config={}, + **kwargs + ): + # register grouped version of environment + #import pdb; pdb.set_trace() + make_grouped_env(config) + num_workers = kwargs.get('num_workers', 0) + num_envs_per_worker = kwargs.get('num_envs_per_worker', 1) + eval_num_workers = kwargs.get('num_workers', 0) + return { + 'env': 'grouped_pcgrl', # replace with grouped environment + 'rollout_fragment_length': 1, + 'train_batch_size': 32, + 'framework': 'torch', + 'num_workers': num_workers if not (config.evaluate or config.infer) else 0, + 'num_gpus': 0, # config.hardware.n_gpu GPU's don't work for QMIX + 'env_config': { + **config, # Maybe env should get its own config? (A subset of the original?) + "evaluation_env": False, + }, + #'mixer': 'qmix', + 'num_envs_per_worker': num_envs_per_worker, + 'render_env': config.render, + 'lr': config.learning_rate, + 'gamma': config.gamma, + 'model': { + 'custom_model': 'custom_model', + 'custom_model_config': { + "dummy_env_obs_space": copy.copy(agent_obs_space), + **model_cfg, + }, + }, + "evaluation_interval" : 1 if config.evaluate else 1, + "evaluation_duration": max(1, num_workers), + "evaluation_duration_unit": "episodes", + "evaluation_num_workers": eval_num_workers, + #"env_task_fn": set_map_fn, + "evaluation_config": { + "env_config": { + **config, + "evaluation_env": True, + "num_eval_envs": num_envs_per_worker * eval_num_workers, + }, + "explore": True, + }, + "logger_config": { + # "wandb": { + # "project": "PCGRL", + # "name": exp_name_id, + # "id": exp_name_id, + # "api_key_file": "~/.wandb_api_key" + # }, + **logger_type, + # Optional: Custom logdir (do not define this here + # for using ~/ray_results/...). + "logdir": log_dir, + }, +# "exploration_config": { +# "type": "Curiosity", +# } +# "log_level": "INFO", + # "train_batch_size": 50, + # "sgd_minibatch_size": 50, + 'callbacks': stats_callbacks, + + # To take random actions while changing all tiles at once seems to invite too much chaos. + 'explore': True, + + # `ray.tune` seems to need these spaces specified here. + # 'observation_space': dummy_env.observation_space, + # 'action_space': dummy_env.action_space, + + # 'create_env_on_driver': True, + 'checkpoint_path_file': checkpoint_path_file, + # 'record_env': log_dir, + # 'stfu': True, + 'disable_env_checking': True, + } + + +TrainerConfigParsers = { + 'PPO': parse_ppo_config, + 'QMIX': parse_qmix_config +} diff --git a/control_pcgrl/wrappers.py b/control_pcgrl/wrappers.py index 543e712..d8b2c1f 100644 --- a/control_pcgrl/wrappers.py +++ b/control_pcgrl/wrappers.py @@ -1,4 +1,5 @@ from functools import partial +import json from pdb import set_trace as TT from typing import Iterable @@ -108,7 +109,12 @@ class TransformObs(gym.Wrapper): """Lil' hack to transform nested observation dicts when dealing with multi-agent environments.""" def __init__(self, *args, **kwargs): super().__init__(self.env) - if kwargs.get("multiagent")['n_agents'] != 0: + try: + n_agents = kwargs.get('multiagent')['n_agents'] + except TypeError: + n_agents = json.loads(kwargs.get('multiagent').replace('\'', '\"'))['n_agents'] + if n_agents != 0: + #if kwargs.get("multiagent")['n_agents'] != 0: self.transform = self._transform_multiagent else: self.transform = self._transform @@ -161,8 +167,13 @@ def __init__(self, game, names, **kwargs): max_value = self.env.observation_space[n].high.max() self.names = names + self.show_agents = kwargs.get('show_agents', False) + try: + n_agents = kwargs['multiagent']['n_agents'] + except: + n_agents = json.loads(kwargs['multiagent'].replace('\'', '\"'))['n_agents'] self.observation_space = spaces.Box( - low=0, high=max_value, shape=(*self.shape[:-1], depth) + low=0, high=max_value if self.show_agents else max(max_value, n_agents), shape=(*self.shape[:-1], depth) ) @@ -184,21 +195,12 @@ def _transform(self, obs): final = np.empty([]) for n in self.names: -# if len(self.env.observation_space.spaces[n].shape) == 3: if len(final.shape) == 0: final = obs[n].reshape(*self.shape[:-1], -1) else: final = np.append( final, obs[n].reshape(*self.shape[:-1], -1), axis=-1 ) -# else: -# if len(final.shape) == 0: -# final = obs[n].reshape(self.shape[0], self.shape[1], self.shape[2], -1) -# else: -# final = np.append( -# final, obs[n].reshape(self.shape[0], self.shape[1], self.shape[2], -1), axis=2 -# ) - return final class ToImageCA(ToImage): @@ -254,11 +256,15 @@ def __init__(self, game, name, padded: bool = False, **kwargs): + 1 ) - for v in shape: - new_shape.append(v) - new_shape.append(self.dim) + new_shape.extend(shape) + if len(new_shape) > 2: + new_shape[-1] += self.dim - 1 + else: + new_shape.append(self.dim) + #import pdb; pdb.set_trace() + self.show_agents = kwargs.get('show_agents', False) self.observation_space.spaces[self.name] = gym.spaces.Box( - low=0, high=1, shape=new_shape, dtype=np.uint8 + low=0, high=1 if not self.show_agents else max(1, kwargs['multiagent']['n_agents']), shape=new_shape, dtype=np.uint8 ) def step(self, action, **kwargs): @@ -275,16 +281,25 @@ def reset(self): return obs def _transform(self, obs): - old = obs[self.name] + named_obs = obs[self.name] + if self.show_agents: # if agent positions are a part of the observation, then the map observation will already have an extra dimension + old = named_obs[:, :, 0] + else: + old = named_obs + if self.padded: # Replace out-of-bounds values with all-zeros. new = np.eye(self.dim + 1)[old] new = new[..., 1:] - else: new = np.eye(self.dim)[old] + # add the agent positions back into the observation + if self.show_agents: + new = np.concatenate((new, named_obs[:, :, -1][:, :, None]), axis=-1) + obs[self.name] = new + #import pdb; pdb.set_trace() return obs @@ -390,8 +405,18 @@ def __init__(self, game, crop_shape: Iterable, pad_value: int, name: str, **kwar len(self.env.observation_space.spaces[name].shape) in [2, 3] ), "This wrapper only works on 2D or 3D arrays." self.name = name - self.shape = crop_shape - self.pad = crop_shape // 2 + self.show_agents = kwargs.get('show_agents', False) + try: + self.shape = np.array(list(crop_shape)) + self.pad = crop_shape // 2 + except TypeError: + #import pdb; pdb.set_trace() + self.shape = np.array(json.loads(str(crop_shape))) + self.pad = self.shape // 2 + if self.show_agents: + self.shape.append(2) # add extra two channels for the positions + self.shape = np.array(self.shape) + #self.pad = crop_shape // 2 self.pad_value = pad_value self.observation_space = gym.spaces.Dict({}) @@ -400,7 +425,7 @@ def __init__(self, game, crop_shape: Iterable, pad_value: int, name: str, **kwar self.observation_space.spaces[k] = s high_value = self.observation_space[self.name].high.max() + 1 # 0s correspond to out-of-bounds tiles self.observation_space.spaces[self.name] = gym.spaces.Box( - low=0, high=high_value, shape=tuple(crop_shape), dtype=np.uint8 + low=0, high=high_value if not self.show_agents else max(high_value, kwargs['multiagent']['n_agents']), shape=tuple(self.shape), dtype=np.uint8 ) def step(self, action, **kwargs): @@ -422,13 +447,32 @@ def _transform(self, obs): # x, y = obs["pos"] pos = obs['pos'] - # View Centering - # padded = np.pad(map, self.pad, constant_values=self.pad_value) + # View Padding padded = np.pad(map, self.pad, constant_values=0) # Denote out-of-bounds tiles as 0. - # cropped = padded[x : x + self.size, y : y + self.size] # Compensate for the bottom-left padding. + # View Centering cropped = padded[tuple([slice(p, p + self.shape[i]) for i, p in enumerate(pos)])] + + # if show positions is turned on: add an extra channel that shows agent positions + # NOTE: Wide representaion cannot use this, since positions are not stored in representation + if self.show_agents: + #import pdb; pdb.set_trace() + #map_expanded = map[:, :, None] + agent_positions = self.unwrapped.get_agent_position() + agent_positions_map = np.zeros(map.shape) + for i, pos in enumerate(agent_positions): + agent_positions_map[tuple(pos)] = i + 1 + #agent_positions_map[agent_positions[:, 0], agent_positions[:, 1]] = 1 + # view padding + padded_positions = np.pad(agent_positions_map, self.pad, constant_values=0) + + # view centering + cropped_positions = padded_positions[tuple([slice(p, p + self.shape[i]) for i, p in enumerate(pos)])] + + cropped = np.concatenate((cropped[:, :, None], cropped_positions[:, :, None]), axis=-1).astype(np.uint8) + + #import pdb; pdb.set_trace() obs[self.name] = cropped return obs @@ -693,13 +737,50 @@ def reset(self): # TODO + +def disable_passive_env_checker(env): + # remove the passive environment checker wrapper from the env attribute of an env + # base case -> the environment is not a wrapper + if not hasattr(env, 'env'): + return env + + root = env + prev = env + while hasattr(prev, 'env'): + next_ = prev.env + if isinstance(next_, gym.wrappers.env_checker.PassiveEnvChecker): + prev.env = next_.env + prev = next_ + + return root + +""" +gym wrappers do not allow for consistent seeding +add a seed method to each wrapper +""" +#def seedify(env): +# def seed(self, s): +# print(self) +# return self.env.seed(s) +# +# root = env +# curr = env +# while hasattr(curr, 'env'): +# type(curr).seed = seed +# curr = curr.env +# return root + + class MultiAgentWrapper(gym.Wrapper, MultiAgentEnv): def __init__(self, game, **kwargs): multiagent_args = kwargs.get('multiagent') - self.env = game + self.env = disable_passive_env_checker(game) # DISABLE GYM PASSIVE ENVIRONMENT CHECKER gym.Wrapper.__init__(self, self.env) MultiAgentEnv.__init__(self.env) - self.n_agents = multiagent_args.get('n_agents', 2) + try: + self.n_agents = multiagent_args.get('n_agents', 2) + except AttributeError: + self.n_agents = json.loads(multiagent_args.replace('\'', '\"'))['n_agents'] self.observation_space = gym.spaces.Dict({}) self.action_space = gym.spaces.Dict({}) for i in range(self.n_agents): @@ -713,26 +794,51 @@ def reset(self): obs = super().reset() return obs + def seed(self, s): + return self.unwrapped.seed(s) + def step(self, action): # print(f"Step:") # print(f"Action: {action}") obs, rew, done, info = {}, {}, {}, {} + for k, v in action.items(): self.unwrapped._rep.set_active_agent(k) obs_k, rew[k], done[k], info[k] = super().step(action={k: v}) obs.update(obs_k) - # rew = {f'agent_{i}': rew for i in range(self.n_agents)} - # done = {f'agent_{i}': done for i in range(self.n_agents)} done['__all__'] = np.all(list(done.values())) - # info = {f'agent_{i}': info for i in range(self.n_agents)} - - # for i in range(self.n_agents): - # act_i = action[f'agent_{i}'] - # print(act_i) - # obs_i, rew_i, done_i, info_i = super().step(act_i, **kwargs) - # obs.update({f'agent_{i}': obs_i}) - # rew.update({f'agent_{i}': rew_i}) - # done.update({f'agent_{i}': done_i}) - # info.update({f'agent_{i}': info_i}) return obs, rew, done, info + + +class GroupedEnvironmentWrapper(MultiAgentEnv): + def __init__(self, env, **kwargs): + #import pdb; pdb.set_trace() + MultiAgentEnv.__init__(self) + #gym.Wrapper.__init__(self, env.env) + self.env = env + self.groups = self.env.groups + self.agent_id_to_group = self.env.agent_id_to_group + self._unwrapped = self.env.env.unwrapped + #self.thing = 5 + #super().__init__(env) # inherit the attributes of the base environment + #self.env = env + self.observation_space = self.env.observation_space + self.action_space = self.env.action_space + self.ctrl_metrics = self.env.env.ctrl_metrics + self.metrics = self.env.env.metrics + + def reset(self): + return self.env.reset() + + def step(self, actions): + return self.env.step(actions) + + def _ungroup_items(self, items): + return self.env._ungroup_items(items) + + def _group_items(self, items): + return self.env._group_items(items) + + + diff --git a/rllib_inference.py b/rllib_inference.py new file mode 100644 index 0000000..db1ad4f --- /dev/null +++ b/rllib_inference.py @@ -0,0 +1,300 @@ +import matplotlib as mpl +mpl.use('Agg') +import matplotlib.pyplot as plt +import copy +import argparse +import numpy as np +from pathlib import Path +import uuid +from tqdm import tqdm +from pathlib import Path +import json +import imageio +import pandas as pd +import ray.rllib.agents.ppo as ppo +from gym.spaces import Tuple +import ray.rllib.algorithms.qmix as qmix +from ray.tune.registry import register_env +from ray.rllib.policy.policy import PolicySpec +from ray.rllib.models import ModelCatalog +from control_pcgrl.rl import models +from control_pcgrl.rl.envs import make_env +from control_pcgrl.rl.rllib_utils import ControllablaTrainerFactory as trainer_factory +from control_pcgrl import wrappers + +def load_config(experiment_path): + with open(Path(experiment_path, 'params.json'), 'r') as f: + config = json.load(f) + # override multiagent policy mapping function + + if 'multiagent' in config: + if 'default_policy' in config: + config['multiagent']['policy_mapping_fn'] = lambda agent_id: 'default_policy' + else: + config['multiagent']['policy_mapping_fn'] = lambda agent_id: agent_id + config['env_config']['multiagent'] = json.loads(config['env_config']['multiagent'].replace("\'", "\"")) + + config['evaluation_env'] = True + config['explore'] = False # turn off exploration for evaluation + config['env_config']['crop_shape'] = json.loads(config['env_config']['crop_shape']) + config['env_config']['problem'] = json.loads(config['env_config']['problem'].replace("\'", "\"")) + + env_name = config['env_config']['env_name'] + return config + +def setup_multiagent_config(config, model_cfg): + dummy_env = make_env(config) + obs_space = dummy_env.observation_space['agent_0'] + act_space = dummy_env.action_space['agent_0'] + multiagent_config = {} + if config['multiagent']['policies'] == "centralized": + multiagent_config['policies'] = { + 'default_policy': PolicySpec( + policy_class=None, + observation_space=obs_space, + action_space=act_space, + config={ + 'custom_model': 'custom_model', + 'custom_model_config': { + "dummy_env_obs_space": copy.copy(obs_space), + **json.loads(model_cfg.replace('\'', '\"')), + } + } + ) + } + multiagent_config['policy_mapping_fn'] = lambda agent_id: 'default_policy' + elif config['multiagent']['policies'] == "decentralized": + multiagent_config['policies'] = { + f'agent_{i}': PolicySpec( + policy_class=None, + observation_space=obs_space, + action_space=act_space, + config={ + 'custom_model': 'custom_model', + 'custom_model_config': { + "dummy_env_obs_space": copy.copy(obs_space), + **json.loads(model_cfg.replace('\'', '\"')), + } + } + ) for i in range(config['multiagent']['n_agents']) + } + multiagent_config['policy_mapping_fn'] = lambda agent_id: agent_id + return multiagent_config + +def checkpoints_iter(experiment_path): + experiment_path = Path(experiment_path) + return filter(lambda f: 'checkpoint' in f.name, experiment_path.iterdir()) + + +def get_best_checkpoint(experiment_path, config): + # load progress.csv + progress = pd.read_csv(Path(experiment_path, 'progress.csv')) + + max_episode_reward = float('-inf') + max_checkpoint = None + max_checkpoint_name = None + for checkpoint in checkpoints_iter(experiment_path): + # get number after underscore in checkpoint + trainer = restore_trainer(Path(checkpoint), config) + iteration = trainer._iteration + # look up iteration in progress dataframe + trainer_performance = progress.loc[progress['training_iteration'] == iteration] + trainer_reward = trainer_performance['episode_reward_mean'].values[0] + # sometimes the first checkpoint has a nan reward + if np.isnan(trainer_reward) or trainer_reward > max_episode_reward: + max_episode_reward = float('-inf') if np.isnan(trainer_reward) else trainer_reward + max_checkpoint = trainer + max_checkpoint_name = checkpoint + print(f'Loaded from checkpoint: {max_checkpoint_name}') + return max_checkpoint + +def restore_trainer(checkpoint_path, config): + if config['env_config']['algorithm'] == 'QMIX': + trainer = qmix.QMix(config=config) + else: + trainer = ppo.PPOTrainer(config=config) + print(checkpoint_path) + trainer.restore(str(checkpoint_path)) + return trainer + +def register_model(config): + MODELS = {"NCA": models.NCA, "DenseNCA": models.DenseNCA, "SeqNCA": models.SeqNCA, "SeqNCA3D": models.SeqNCA3D} + model_conf_str = config['env_config']['model'].replace('\'', '\"') + model_name_default = model_conf_str.find('None') + if model_name_default > 0: + model_conf_str = model_conf_str[:model_name_default-1] + f' \"None\"' + model_conf_str[model_name_default+4:] + model_config = json.loads(model_conf_str) + if model_config.get('name') == "None": + if config['env_config']['representation'] == 'wide': + model_cls = models.ConvDeconv2D + else: + model_cls = models.CustomFeedForwardModel + else: + model_cls = MODELS[model_config['name']] + ModelCatalog.register_custom_model('custom_model', model_cls) + +def rollout(env_config, trainer, policy_mapping_fn=None, seed=None): + env = make_env(env_config) + env.seed(seed) + env.reset() + env.seed(seed) + #env.unwrapped._max_iterations *= 2 + obs = env.reset() + done = False + acts, obss, rews, infos, frames = [], [], [], [], [] + while not done: + if policy_mapping_fn is not None: + actions = get_multi_agent_actions(trainer, obs, policy_mapping_fn) + acts.append({agent: int(act) for agent, act in actions.items()}) + elif env_config['algorithm'] == 'QMIX': + actions = get_qmix_actions(trainer, obs) + acts.append({agent: int(act) for agent, act in actions.items()}) + else: + actions = get_single_agent_actions(trainer, obs) + acts.append({'agent_0': int(actions)}) + + # build action histogram + obs, rew, done, info = env.step(actions) + #import pdb; pdb.set_trace() + frame = env.render(mode='rgb_array') + frames.append(frame) + rews.append(rew) + infos.append(int(env.unwrapped._rep_stats['path-length'])) + #infos.append(info) + if isinstance(done, dict): + done = done['__all__'] + + #import pdb; pdb.set_trace() + return { + 'actions': acts, + 'rewards': rews, + 'infos': infos, + 'frames': frames, + 'success': env.unwrapped._prob.get_episode_over(env.unwrapped._rep_stats, None), + 'heatmaps': env.unwrapped._rep.heatmaps + } + +def save_trial_metrics(metrics, logdir): + # save initial frame, final frame, and gif of frames + imageio.imsave(Path(logdir, 'initial_map.png'), metrics['frames'][0]) + imageio.imsave(Path(logdir, 'final_map.png'), metrics['frames'][-1]) + imageio.mimsave(Path(logdir, 'frames.gif'), metrics['frames']) + # save rewards in json file + with open(Path(logdir, 'rewards.json'), 'w+') as f: + f.write(json.dumps(metrics['rewards'])) + # graph rewards over time + # save infos in json file + with open(Path(logdir, 'infos.json'), 'w+') as f: + f.write(json.dumps(metrics['infos'])) + # plot path length over time + # save actions in json file + with open(Path(logdir, 'actions.json'), 'w+') as f: + f.write(json.dumps(list(metrics['actions']))) + + # check success + with open(Path(logdir, 'success.json'), 'w+') as f: + f.write(json.dumps({'success': bool(metrics['success'])})) + + for i, heatmap in enumerate(metrics['heatmaps']): + fig, ax = plt.subplots() + im = ax.imshow(heatmap) + cbar = ax.figure.colorbar(im, ax=ax) + cbar.ax.set_ylabel('changes', rotation=-90, va="bottom") + ax.grid(which="minor", color="w", linestyle='-', linewidth=3) + fig.savefig(Path(logdir, f'{i}_heatmap.png'), dpi=400) + plt.close(fig) # close figure to prevent memory issues + + +def get_qmix_actions(trainer, observations): + actions = trainer.compute_single_action(tuple(observations.values())) + return {agent: action for agent, action in zip(observations.keys(), actions)} + + +def get_single_agent_actions(trainer, observations): + return trainer.compute_single_action(observations) + +def get_multi_agent_actions(trainer, observations, policy_mapping_fn): + return { + agent_id: trainer.compute_single_action(agent_obs, policy_id=policy_mapping_fn(agent_id)) + for agent_id, agent_obs in observations.items() + } + +def make_grouped_env(config): + + try: + n_agents = config['multiagent']['n_agents'] + except: + n_agents = json.loads(config['multiagent'].replace('\'', '\"'))['n_agents'] + dummy_env = make_env(config) + groups = {'group_1': list(dummy_env.observation_space.keys())} + obs_space = Tuple(dummy_env.observation_space.values()) + act_space = Tuple(dummy_env.action_space.values()) + #import pdb; pdb.set_trace() + register_env( + 'grouped_pcgrl', + lambda config: wrappers.GroupedEnvironmentWrapper(make_env(config).with_agent_groups( + groups, obs_space=obs_space, act_space=act_space)) + + ) + +# run evals with the checkpoint +def evaluate(experiment_path): + # load and setup config + config = load_config(experiment_path) + if 'multiagent' in config: + config['multiagent'] = setup_multiagent_config(config['env_config'], config['env_config']['model']) + # delete keys not recognized by rllib + del config['checkpoint_path_file'] + del config['evaluation_env'] + del config['callbacks'] + del config['num_workers'] + del config['num_envs_per_worker'] + #del config['multiagent'] + #import pdb; pdb.set_trace() + if config['env_config']['algorithm'] == 'PPO': + register_env('pcgrl', make_env) + else: + make_grouped_env(config['env_config']) + #register_env('grouped_pcgrl', make_grouped_env) + config['num_gpus'] = 0 + register_model(config) + # load trainer from checkpoint + trainer = get_best_checkpoint(experiment_path, config) + # rollout the model for n trials + logdir = Path(experiment_path, f'eval_best_{uuid.uuid4()}') + logdir.mkdir() + + try: + policy_mapping_fn = config['multiagent']['policy_mapping_fn'] + except KeyError: + policy_mapping_fn = None + + paths = [] + max_changes = 0 + for trial in tqdm(range(40)): + results = rollout(config['env_config'], trainer, policy_mapping_fn, seed=trial*100) + #results = rollout(config['env_config'], trainer, config['multiagent']['policy_mapping_fn'], seed=trial*100) + trial_log_dir = Path(logdir, f'{trial}') + trial_log_dir.mkdir() + paths.append(results['infos'][-1]) + #changes.append(results['infos'][-1]['changes'] / results['infos'][-1]['iterations']) + save_trial_metrics(results, trial_log_dir) + + print(f'Wrote logs to: {logdir}') + + + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument( + '--experiment_path', + '-e', + dest='experiment_path', + type=str, + required=True + ) + + #parser.add_argument('checkpoint_loader') # just load the best for now + args = parser.parse_args() + evaluate(Path(args.experiment_path)) diff --git a/tests/test_multiagent_representations.py b/tests/test_multiagent_representations.py new file mode 100644 index 0000000..43f50ba --- /dev/null +++ b/tests/test_multiagent_representations.py @@ -0,0 +1,205 @@ +import pytest +from random import randint +from copy import deepcopy +from pathlib import Path +from itertools import permutations, product +import numpy as np +from control_pcgrl import wrappers +from control_pcgrl.rl.envs import make_env + +@pytest.fixture +def basic_env_config(): + return { + 'problem': { + 'name': 'binary', + 'weights': {'path_length': 100}, + 'controls': '???', + 'alp_gmm': '???', + }, + 'hardware': {'n_cpu': 1, 'n_gpu': 1, 'num_envs_per_worker': 10}, + 'multiagent': {'n_agents': 2}, + 'representation': 'turtle', + 'map_shape': [16, 16], + 'crop_shape': [32, 32], + 'max_board_scans': 3, + 'n_aux_tiles': 0, + 'evaluation_env': False, + 'observation_size': None, + 'controls': None, + 'change_percentage': None, + 'static_prob': None, + 'action_size': None, + 'log_dir': Path('./'), + } + + +def validate_turtle_actions(actions, old_positions, new_positions, old_map, new_map): + map_shape = old_map.shape + def validate_move(action, old_position, new_position): + if action == 0: + if old_position[0] == 0: + assert new_position[0] == old_position[0] + else: + assert old_position[0] - new_position[0] == 1 + assert old_position[1] - new_position[1] == 0 + elif action == 1: + if old_position[0] == map_shape[0] - 1: + assert new_position[0] == old_position[0] + else: + assert old_position[0] - new_position[0] == -1 + assert old_position[1] - new_position[1] == 0 + elif action == 2: + if old_position[1] == 0: + assert new_position[1] == old_position[1] + else: + assert old_position[1] - new_position[1] == 1 + assert old_position[0] - new_position[0] == 0 + elif action == 3: + if old_position[1] == map_shape[1] - 1: + assert new_position[1] == old_position[1] + else: + assert old_position[1] - new_position[1] == -1 + assert old_position[0] - new_position[0] == 0 + + for agent, old_pos, new_pos in zip(actions, old_positions, new_positions): + action = actions[agent] + if action < 4: + validate_move(action, old_pos, new_pos) + else: + # position shouldn't change when we place an item + assert tuple(old_pos) == tuple(new_pos) + assert new_map[tuple(new_pos)] == action - 4 + +@pytest.mark.parametrize( + 'action_0,action_1', + permutations(list(range(6)), 2) +) +def test_multiagent_turtle(basic_env_config, action_0, action_1): + # GIVEN + env_config = basic_env_config + env_name = 'binary-turtle-v0' + env = wrappers.CroppedImagePCGRLWrapper(env_name, **env_config) + env = wrappers.MultiAgentWrapper(env, **env_config) + actions = {'agent_0': action_0, 'agent_1': action_1} + env.reset() + rep = env.unwrapped._rep + init_positions = deepcopy(rep.get_positions()) + #init_positions = deepcopy(rep._positions) + init_map = deepcopy(rep.rep._map) + + # WHEN + rep.update(actions) + + # THEN + validate_turtle_actions( + actions, + init_positions, + rep._positions, + init_map, + rep.rep._map + ) + +@pytest.mark.parametrize( + 'action_0,action_1', + permutations(list(range(2)), 2) +) +def test_multiagent_narrow(basic_env_config, action_0, action_1): + # GIVEN + env_config = basic_env_config + env_config['representation'] = 'narrow' + env_name = 'binary-narrow-v0' + env = wrappers.CroppedImagePCGRLWrapper(env_name, **env_config) + env = wrappers.MultiAgentWrapper(env, **env_config) + env.reset() + rep = env.unwrapped._rep + init_map = deepcopy(rep.rep._map) + init_positions = deepcopy(rep._positions) + actions = {'agent_0': action_0, 'agent_1': action_1} + # WHEN + rep.update(actions) + new_map = rep.rep._map + + # THEN + new_positions = rep.get_positions() + # check that position is updated correctly + # Note: Test does not account for changes in vertical position + assert new_positions[0][1] - 1 == init_positions[0][1] + assert new_positions[1][1] - 1 == init_positions[1][1] + # check that map is updated correctly + assert new_map[tuple(init_positions[0])] == actions['agent_0'] + assert new_map[tuple(init_positions[1])] == actions['agent_1'] + +# INCOMPLETE TEST +@pytest.mark.parametrize( + 'position_x_0,position_y_0,action_0,position_x_1,position_y_1,action_1', + [[randint(0, 15), randint(0, 15), randint(0, 1), randint(0, 15), randint(0, 15), randint(0, 1)]] +) +def test_multiagent_wide( + basic_env_config, + position_x_0, + position_y_0, + action_0, + position_x_1, + position_y_1, + action_1 + ): + # GIVEN + env_config = basic_env_config + env_config['representation'] = 'wide' + env_name = 'binary-wide-v0' + env = wrappers.ActionMapImagePCGRLWrapper(env_name, **env_config) + env = wrappers.MultiAgentWrapper(env, **env_config) + env.reset() + rep = env.unwrapped._rep + init_map = deepcopy(rep.rep._map) + init_positions = deepcopy(rep._positions) + actions = { + 'agent_0': [position_y_0, position_x_0, action_0], + 'agent_1': [position_y_1, position_x_1, action_1] + } + + # WHEN + rep.update(actions) + + # THEN check that map is changed correctly + new_map = rep.rep._map + assert new_map[position_y_0][position_x_0] == action_0 + assert new_map[position_y_1][position_x_1] == action_1 + # make sure that the map being modified is the same one that the pcgrl env uses + np.testing.assert_array_equal(new_map, rep.unwrapped._map) + + +def print_agent_positions(agent): + print() + pass + +def test_multiagent_position_sharing(basic_env_config): + # GIVEN + env_config = basic_env_config + env_config['env_name'] = 'binary-narrow-v0' + env_config['show_agents'] = True + env_config['evaluate'] = False + + env = make_env(env_config) + + # WHEN + obs = env.reset() + print(obs['agent_0'].shape) + actions = {'agent_0': 0, 'agent_1': 0} + + newobs, _, _, _ = env.step(actions) + agent_positions = np.where(newobs['agent_0'][:, :, -1]>0) + print(agent_positions) + agent_positions = np.where(newobs['agent_1'][:, :, -1]>0) + print(agent_positions) + newobs, _, _, _ = env.step(actions) + agent_positions = np.where(newobs['agent_0'][:, :, -1]>0) + print(agent_positions) + agent_positions = np.where(newobs['agent_1'][:, :, -1]>0) + print(agent_positions) + print(env.unwrapped._rep._positions) + import pdb; pdb.set_trace() + + + # THEN + pass