diff --git a/.cache/v/cache/lastfailed b/.cache/v/cache/lastfailed
new file mode 100644
index 0000000..6effa1d
--- /dev/null
+++ b/.cache/v/cache/lastfailed
@@ -0,0 +1,3 @@
+{
+  "tests/test_multiagent_representations.py": true
+}
\ No newline at end of file
diff --git a/control_pcgrl/configs/config.py b/control_pcgrl/configs/config.py
index cb1883e..6ecd1ea 100644
--- a/control_pcgrl/configs/config.py
+++ b/control_pcgrl/configs/config.py
@@ -32,6 +32,15 @@ class BinaryPathConfig(ProblemConfig):
     name: str = 'binary'
     # Regions weight will be 0 by default.
     weights: Dict[str, int] = field(default_factory = lambda: ({
+    #    'player': 1,
+    #    'create': 1,
+    #    'target': 1,
+    #    'regions': 1,
+    #    'ratio': 1,
+    #    'dist-win': 1,
+    #    'sol-length': 2
+
+
         'path-length': 100,
     }))
 
@@ -51,6 +60,8 @@ class BinaryControlConfig(ProblemConfig):
 @dataclass
 class MultiagentConfig:
     n_agents: int = MISSING
+    # valid values: (shared, independent, JSON string)
+    policies: str = "centralized"  # use shared weights by default
 
 
 @dataclass
@@ -107,6 +118,7 @@ class ControlPCGRLConfig:
     multiagent: MultiagentConfig = MISSING
     problem: ProblemConfig = MISSING
 
+    algorithm: str = 'PPO'
     debug: bool = False
     render: bool = False
     infer: bool = False
@@ -117,6 +129,7 @@ class ControlPCGRLConfig:
 
     exp_id: str = '0'
     representation: str = 'turtle'
+    show_agents: bool = False
     learning_rate: float = 5e-6
     gamma: float = 0.99
     map_shape: List[Any] = field(default_factory=lambda: 
@@ -158,4 +171,4 @@ class ControlPCGRLConfig:
 cs.store(name="binary_path", group="problem", node=BinaryPathConfig)
 
 cs.store(name="default_model", group="model", node=ModelConfig)
-cs.store(name="seqnca", group="model", node=SeqNCAConfig)
\ No newline at end of file
+cs.store(name="seqnca", group="model", node=SeqNCAConfig)
diff --git a/control_pcgrl/configs/config.yaml b/control_pcgrl/configs/config.yaml
index 8d412bd..189ae22 100644
--- a/control_pcgrl/configs/config.yaml
+++ b/control_pcgrl/configs/config.yaml
@@ -3,22 +3,22 @@ defaults:
   - _self_
 
   # Why can't we override this on the command line?
-  # - override hydra/launcher: submitit_local
+  #- override hydra/launcher: submitit_local
   - override hydra/launcher: submitit_slurm
 
 hydra:
   sweeper:
     params:
-      exp_id: 0, 1, 2
-      learning_rate: 5e-4, 1e-4, 5e-5, 1e-5, 5e-6, 1e-6
+      #exp_id: 0, 1, 2
+      learning_rate: 5e-5
 
   launcher:
     tasks_per_node: 1
 
     #FIXME: Can't set this to 1 or 2 even when only asking from 1 ("0") worker from ray...
-    cpus_per_task: 10
+    cpus_per_task: 22
 
     gpus_per_node: 1
-    timeout_min: 1440
+    timeout_min: 1440 # 1 days of training
     mem_gb: 30
     # Emails maybe?
diff --git a/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696/27482696_submission.sh b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696/27482696_submission.sh
new file mode 100644
index 0000000..1ae510b
--- /dev/null
+++ b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696/27482696_submission.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+# Parameters
+#SBATCH --array=0-3%4
+#SBATCH --cpus-per-task=10
+#SBATCH --error=/scratch/rd2893/control-pcgrl/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/%A_%a/%A_%a_0_log.err
+#SBATCH --gpus-per-node=1
+#SBATCH --job-name=train_ctrl
+#SBATCH --mem=30GB
+#SBATCH --nodes=1
+#SBATCH --ntasks-per-node=1
+#SBATCH --open-mode=append
+#SBATCH --output=/scratch/rd2893/control-pcgrl/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/%A_%a/%A_%a_0_log.out
+#SBATCH --signal=USR2@120
+#SBATCH --time=1440
+#SBATCH --wckey=submitit
+
+# command
+export SUBMITIT_EXECUTOR=slurm
+srun --unbuffered --output /scratch/rd2893/control-pcgrl/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/%A_%a/%A_%a_%t_log.out --error /scratch/rd2893/control-pcgrl/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/%A_%a/%A_%a_%t_log.err /scratch/rd2893/miniconda3/envs/pcgrl/bin/python -u -m submitit.core._submit /scratch/rd2893/control-pcgrl/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/%j
diff --git a/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_0/27482696_0_0_log.err b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_0/27482696_0_0_log.err
new file mode 100644
index 0000000..8e48be2
--- /dev/null
+++ b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_0/27482696_0_0_log.err
@@ -0,0 +1,70 @@
+/scratch/rd2893/miniconda3/envs/pcgrl/lib/python3.7/site-packages/gym/utils/passive_env_checker.py:32: UserWarning: [33mWARN: A Box observation space has an unconventional shape (neither an image, nor a 1D vector). We recommend flattening the observation to have only a 1D vector or use a custom policy to properly process the data. Actual observation shape: (16, 16)[0m
+  "A Box observation space has an unconventional shape (neither an image, nor a 1D vector). "
+2022-11-29 15:05:25,106	WARNING env.py:236 -- Your MultiAgentEnv <MultiAgentWrapper<ControlWrapper<CroppedImagePCGRLWrapper<ToImage<OneHotEncoding<Cropped<OrderEnforcing<PcgrlCtrlEnv<binary-turtle-v0>>>>>>>>> does not have some or all of the needed base-class attributes! Make sure you call `super().__init__` from within your MutiAgentEnv's constructor. This will raise an error in the future.
+2022-11-29 15:05:27,892	INFO worker.py:1518 -- Started a local Ray instance.
+[2m[36m(PPOTrainer pid=3593784)[0m 2022-11-29 15:05:35,626	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
+[2m[36m(PPOTrainer pid=3593784)[0m 2022-11-29 15:05:35,628	INFO algorithm.py:358 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
+[2m[36m(PPOTrainer pid=3593784)[0m /scratch/rd2893/miniconda3/envs/pcgrl/lib/python3.7/site-packages/gym/utils/passive_env_checker.py:32: UserWarning: [33mWARN: A Box observation space has an unconventional shape (neither an image, nor a 1D vector). We recommend flattening the observation to have only a 1D vector or use a custom policy to properly process the data. Actual observation shape: (16, 16)[0m
+[2m[36m(PPOTrainer pid=3593784)[0m   "A Box observation space has an unconventional shape (neither an image, nor a 1D vector). "
+[2m[36m(PPOTrainer pid=3593784)[0m 2022-11-29 15:05:42,133	WARNING deprecation.py:48 -- DeprecationWarning: `simple_optimizer` has been deprecated. This will raise an error in the future!
+[2m[36m(PPOTrainer pid=3593784)[0m 2022-11-29 15:05:42,337	WARNING util.py:66 -- Install gputil for GPU system monitoring.
+[2m[36m(PPOTrainer pid=3593784)[0m 2022-11-29 15:05:42,421	WARNING deprecation.py:48 -- DeprecationWarning: `policy_mapping_fn(agent_id)` has been deprecated. Use `policy_mapping_fn(agent_id, episode, worker, **kwargs)` instead. This will raise an error in the future!
+[2m[36m(PPOTrainer pid=3593784)[0m 2022-11-29 15:05:44,433	WARNING deprecation.py:48 -- DeprecationWarning: `concat_samples` has been deprecated. Use `concat_samples() from rllib.policy.sample_batch` instead. This will raise an error in the future!
+[2m[36m(PPOTrainer pid=3593784)[0m /scratch/rd2893/miniconda3/envs/pcgrl/lib/python3.7/site-packages/ray/rllib/utils/metrics/learner_info.py:110: RuntimeWarning: Mean of empty slice
+[2m[36m(PPOTrainer pid=3593784)[0m   return np.nanmean(tower_data)
+2022-11-30 01:46:43,745	WARNING util.py:244 -- The `callbacks.on_trial_result` operation took 7.095 s, which may be a performance bottleneck.
+2022-11-30 01:46:43,747	WARNING util.py:244 -- The `process_trial_result` operation took 7.099 s, which may be a performance bottleneck.
+2022-11-30 01:46:43,747	WARNING util.py:244 -- Processing trial results took 7.099 s, which may be a performance bottleneck. Please consider reporting results less frequently to Ray Tune.
+2022-11-30 01:46:43,747	WARNING util.py:244 -- The `process_trial_result` operation took 7.100 s, which may be a performance bottleneck.
+2022-11-30 01:48:00,324	WARNING util.py:244 -- The `callbacks.on_trial_result` operation took 9.173 s, which may be a performance bottleneck.
+2022-11-30 01:48:00,327	WARNING util.py:244 -- The `process_trial_result` operation took 9.178 s, which may be a performance bottleneck.
+2022-11-30 01:48:00,331	WARNING util.py:244 -- Processing trial results took 9.182 s, which may be a performance bottleneck. Please consider reporting results less frequently to Ray Tune.
+2022-11-30 01:48:00,332	WARNING util.py:244 -- The `process_trial_result` operation took 9.184 s, which may be a performance bottleneck.
+2022-11-30 01:50:26,182	WARNING util.py:244 -- The `callbacks.on_trial_result` operation took 6.208 s, which may be a performance bottleneck.
+2022-11-30 01:50:26,184	WARNING util.py:244 -- The `process_trial_result` operation took 6.212 s, which may be a performance bottleneck.
+2022-11-30 01:50:26,184	WARNING util.py:244 -- Processing trial results took 6.212 s, which may be a performance bottleneck. Please consider reporting results less frequently to Ray Tune.
+2022-11-30 01:50:26,184	WARNING util.py:244 -- The `process_trial_result` operation took 6.214 s, which may be a performance bottleneck.
+submitit WARNING (2022-11-30 15:02:50,873) - Caught signal SIGUSR2 on gv002.hpc.nyu.edu: this job is timed-out.
+submitit WARNING (2022-11-30 15:02:51,153) - Bypassing signal SIGCONT
+Traceback (most recent call last):
+  File "/scratch/rd2893/miniconda3/envs/pcgrl/lib/python3.7/site-packages/submitit/core/job_environment.py", line 226, in checkpoint_and_try_requeue
+    raise utils.UncompletedJobError(message)
+submitit.core.utils.UncompletedJobError: Job not requeued because: timed-out too many times.
+Exception ignored in: 'ray._raylet.check_signals'
+Traceback (most recent call last):
+  File "/scratch/rd2893/miniconda3/envs/pcgrl/lib/python3.7/site-packages/submitit/core/job_environment.py", line 226, in checkpoint_and_try_requeue
+    raise utils.UncompletedJobError(message)
+submitit.core.utils.UncompletedJobError: Job not requeued because: timed-out too many times.
+slurmstepd: error: *** JOB 27482702 ON gv002 CANCELLED AT 2022-11-30T15:05:22 DUE TO TIME LIMIT ***
+slurmstepd: error: *** STEP 27482702.0 ON gv002 CANCELLED AT 2022-11-30T15:05:22 DUE TO TIME LIMIT ***
+*** SIGTERM received at time=1669838722 on cpu 6 ***
+[failure_signal_handler.cc : 331] RAW: Signal 15 raised at PC=0x14df1d5017b0 while already in AbslFailureSignalHandler()
+*** SIGTERM received at time=1669838722 on cpu 6 ***
+PC: @     0x14df1d5017b0  (unknown)  absl::lts_20211102::debugging_internal::ParseMangledName()
+    @     0x14df25374b20       3424  (unknown)
+    @     0x14df1d4fbf78         32  absl::lts_20211102::debugging_internal::DemangleInplace()
+    @     0x14df1d4fd4b2       1296  absl::lts_20211102::debugging_internal::(anonymous namespace)::Symbolizer::GetSymbol()
+    @     0x14df1d4fd7fe         80  absl::lts_20211102::Symbolize()
+    @     0x14df1d4d162d       2144  absl::lts_20211102::debugging_internal::DumpPCAndFrameSizeAndSymbol()
+    @     0x14df1d4d1751        192  absl::lts_20211102::debugging_internal::DumpPCAndFrameSizesAndStackTrace()
+    @     0x14df1d4d10c6        496  absl::lts_20211102::WriteStackTrace()
+    @     0x14df1d4d12d1         80  absl::lts_20211102::AbslFailureSignalHandler()
+    @     0x14df25374b20  (unknown)  (unknown)
+[2022-11-30 15:05:22,384 E 3592250 3592250] logging.cc:361: *** SIGTERM received at time=1669838722 on cpu 6 ***
+[2022-11-30 15:05:22,384 E 3592250 3592250] logging.cc:361: PC: @     0x14df1d5017b0  (unknown)  absl::lts_20211102::debugging_internal::ParseMangledName()
+[2022-11-30 15:05:22,384 E 3592250 3592250] logging.cc:361:     @     0x14df25374b20       3424  (unknown)
+[2022-11-30 15:05:22,384 E 3592250 3592250] logging.cc:361:     @     0x14df1d4fbf78         32  absl::lts_20211102::debugging_internal::DemangleInplace()
+[2022-11-30 15:05:22,384 E 3592250 3592250] logging.cc:361:     @     0x14df1d4fd4b2       1296  absl::lts_20211102::debugging_internal::(anonymous namespace)::Symbolizer::GetSymbol()
+[2022-11-30 15:05:22,384 E 3592250 3592250] logging.cc:361:     @     0x14df1d4fd7fe         80  absl::lts_20211102::Symbolize()
+[2022-11-30 15:05:22,384 E 3592250 3592250] logging.cc:361:     @     0x14df1d4d162d       2144  absl::lts_20211102::debugging_internal::DumpPCAndFrameSizeAndSymbol()
+[2022-11-30 15:05:22,384 E 3592250 3592250] logging.cc:361:     @     0x14df1d4d1751        192  absl::lts_20211102::debugging_internal::DumpPCAndFrameSizesAndStackTrace()
+[2022-11-30 15:05:22,384 E 3592250 3592250] logging.cc:361:     @     0x14df1d4d10c6        496  absl::lts_20211102::WriteStackTrace()
+[2022-11-30 15:05:22,384 E 3592250 3592250] logging.cc:361:     @     0x14df1d4d12d1         80  absl::lts_20211102::AbslFailureSignalHandler()
+[2022-11-30 15:05:22,385 E 3592250 3592250] logging.cc:361:     @     0x14df25374b20  (unknown)  (unknown)
+PC: @     0x14df2537064a  (unknown)  pthread_cond_timedwait@@GLIBC_2.3.2
+    @     0x14df25374b20  (unknown)  (unknown)
+[2022-11-30 15:05:22,385 E 3592250 3592250] logging.cc:361: *** SIGTERM received at time=1669838722 on cpu 6 ***
+[2022-11-30 15:05:22,385 E 3592250 3592250] logging.cc:361: PC: @     0x14df2537064a  (unknown)  pthread_cond_timedwait@@GLIBC_2.3.2
+[2022-11-30 15:05:22,385 E 3592250 3592250] logging.cc:361:     @     0x14df25374b20  (unknown)  (unknown)
+submitit WARNING (2022-11-30 15:05:22,799) - Bypassing signal SIGTERM
+submitit WARNING (2022-11-30 15:05:22,819) - Bypassing signal SIGCONT
diff --git a/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_0/27482696_0_submitted.pkl b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_0/27482696_0_submitted.pkl
new file mode 100644
index 0000000..b5d7e93
Binary files /dev/null and b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_0/27482696_0_submitted.pkl differ
diff --git a/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_1/27482696_1_0_log.err b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_1/27482696_1_0_log.err
new file mode 100644
index 0000000..704e5b9
--- /dev/null
+++ b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_1/27482696_1_0_log.err
@@ -0,0 +1,20 @@
+/scratch/rd2893/miniconda3/envs/pcgrl/lib/python3.7/site-packages/gym/utils/passive_env_checker.py:32: UserWarning: [33mWARN: A Box observation space has an unconventional shape (neither an image, nor a 1D vector). We recommend flattening the observation to have only a 1D vector or use a custom policy to properly process the data. Actual observation shape: (16, 16)[0m
+  "A Box observation space has an unconventional shape (neither an image, nor a 1D vector). "
+2022-11-29 15:05:25,086	WARNING env.py:236 -- Your MultiAgentEnv <MultiAgentWrapper<ControlWrapper<CroppedImagePCGRLWrapper<ToImage<OneHotEncoding<Cropped<OrderEnforcing<PcgrlCtrlEnv<binary-turtle-v0>>>>>>>>> does not have some or all of the needed base-class attributes! Make sure you call `super().__init__` from within your MutiAgentEnv's constructor. This will raise an error in the future.
+2022-11-29 15:05:27,858	INFO worker.py:1518 -- Started a local Ray instance.
+[2m[36m(PPOTrainer pid=532048)[0m 2022-11-29 15:05:35,162	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
+[2m[36m(PPOTrainer pid=532048)[0m 2022-11-29 15:05:35,163	INFO algorithm.py:358 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
+[2m[36m(PPOTrainer pid=532048)[0m /scratch/rd2893/miniconda3/envs/pcgrl/lib/python3.7/site-packages/gym/utils/passive_env_checker.py:32: UserWarning: [33mWARN: A Box observation space has an unconventional shape (neither an image, nor a 1D vector). We recommend flattening the observation to have only a 1D vector or use a custom policy to properly process the data. Actual observation shape: (16, 16)[0m
+[2m[36m(PPOTrainer pid=532048)[0m   "A Box observation space has an unconventional shape (neither an image, nor a 1D vector). "
+[2m[36m(PPOTrainer pid=532048)[0m 2022-11-29 15:05:41,998	WARNING deprecation.py:48 -- DeprecationWarning: `simple_optimizer` has been deprecated. This will raise an error in the future!
+[2m[36m(PPOTrainer pid=532048)[0m 2022-11-29 15:05:42,202	WARNING util.py:66 -- Install gputil for GPU system monitoring.
+[2m[36m(PPOTrainer pid=532048)[0m 2022-11-29 15:05:42,269	WARNING deprecation.py:48 -- DeprecationWarning: `policy_mapping_fn(agent_id)` has been deprecated. Use `policy_mapping_fn(agent_id, episode, worker, **kwargs)` instead. This will raise an error in the future!
+[2m[36m(PPOTrainer pid=532048)[0m 2022-11-29 15:05:44,548	WARNING deprecation.py:48 -- DeprecationWarning: `concat_samples` has been deprecated. Use `concat_samples() from rllib.policy.sample_batch` instead. This will raise an error in the future!
+[2m[36m(PPOTrainer pid=532048)[0m /scratch/rd2893/miniconda3/envs/pcgrl/lib/python3.7/site-packages/ray/rllib/utils/metrics/learner_info.py:110: RuntimeWarning: Mean of empty slice
+[2m[36m(PPOTrainer pid=532048)[0m   return np.nanmean(tower_data)
+2022-11-30 01:48:00,310	WARNING util.py:244 -- The `callbacks.on_trial_result` operation took 9.325 s, which may be a performance bottleneck.
+2022-11-30 01:48:00,312	WARNING util.py:244 -- The `process_trial_result` operation took 9.327 s, which may be a performance bottleneck.
+2022-11-30 01:48:00,312	WARNING util.py:244 -- Processing trial results took 9.328 s, which may be a performance bottleneck. Please consider reporting results less frequently to Ray Tune.
+2022-11-30 01:48:00,312	WARNING util.py:244 -- The `process_trial_result` operation took 9.329 s, which may be a performance bottleneck.
+submitit WARNING (2022-11-30 15:02:50,633) - Caught signal SIGUSR2 on gv005.hpc.nyu.edu: this job is timed-out.
+submitit WARNING (2022-11-30 15:02:50,633) - Bypassing signal SIGCONT
diff --git a/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_1/27482696_1_0_result.pkl b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_1/27482696_1_0_result.pkl
new file mode 100644
index 0000000..c263664
Binary files /dev/null and b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_1/27482696_1_0_result.pkl differ
diff --git a/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_1/27482696_1_submitted.pkl b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_1/27482696_1_submitted.pkl
new file mode 100644
index 0000000..5b10016
Binary files /dev/null and b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_1/27482696_1_submitted.pkl differ
diff --git a/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_2/27482696_2_0_log.err b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_2/27482696_2_0_log.err
new file mode 100644
index 0000000..45400d9
--- /dev/null
+++ b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_2/27482696_2_0_log.err
@@ -0,0 +1,56 @@
+/scratch/rd2893/miniconda3/envs/pcgrl/lib/python3.7/site-packages/gym/utils/passive_env_checker.py:32: UserWarning: [33mWARN: A Box observation space has an unconventional shape (neither an image, nor a 1D vector). We recommend flattening the observation to have only a 1D vector or use a custom policy to properly process the data. Actual observation shape: (16, 16)[0m
+  "A Box observation space has an unconventional shape (neither an image, nor a 1D vector). "
+2022-11-29 15:05:25,081	WARNING env.py:236 -- Your MultiAgentEnv <MultiAgentWrapper<ControlWrapper<CroppedImagePCGRLWrapper<ToImage<OneHotEncoding<Cropped<OrderEnforcing<PcgrlCtrlEnv<binary-turtle-v0>>>>>>>>> does not have some or all of the needed base-class attributes! Make sure you call `super().__init__` from within your MutiAgentEnv's constructor. This will raise an error in the future.
+2022-11-29 15:05:27,854	INFO worker.py:1518 -- Started a local Ray instance.
+[2m[36m(PPOTrainer pid=1434787)[0m 2022-11-29 15:05:35,204	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
+[2m[36m(PPOTrainer pid=1434787)[0m 2022-11-29 15:05:35,205	INFO algorithm.py:358 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
+[2m[36m(PPOTrainer pid=1434787)[0m /scratch/rd2893/miniconda3/envs/pcgrl/lib/python3.7/site-packages/gym/utils/passive_env_checker.py:32: UserWarning: [33mWARN: A Box observation space has an unconventional shape (neither an image, nor a 1D vector). We recommend flattening the observation to have only a 1D vector or use a custom policy to properly process the data. Actual observation shape: (16, 16)[0m
+[2m[36m(PPOTrainer pid=1434787)[0m   "A Box observation space has an unconventional shape (neither an image, nor a 1D vector). "
+[2m[36m(PPOTrainer pid=1434787)[0m 2022-11-29 15:05:41,952	WARNING deprecation.py:48 -- DeprecationWarning: `simple_optimizer` has been deprecated. This will raise an error in the future!
+[2m[36m(PPOTrainer pid=1434787)[0m 2022-11-29 15:05:42,156	WARNING util.py:66 -- Install gputil for GPU system monitoring.
+[2m[36m(PPOTrainer pid=1434787)[0m 2022-11-29 15:05:42,261	WARNING deprecation.py:48 -- DeprecationWarning: `policy_mapping_fn(agent_id)` has been deprecated. Use `policy_mapping_fn(agent_id, episode, worker, **kwargs)` instead. This will raise an error in the future!
+[2m[36m(PPOTrainer pid=1434787)[0m 2022-11-29 15:05:44,313	WARNING deprecation.py:48 -- DeprecationWarning: `concat_samples` has been deprecated. Use `concat_samples() from rllib.policy.sample_batch` instead. This will raise an error in the future!
+[2m[36m(PPOTrainer pid=1434787)[0m /scratch/rd2893/miniconda3/envs/pcgrl/lib/python3.7/site-packages/ray/rllib/utils/metrics/learner_info.py:110: RuntimeWarning: Mean of empty slice
+[2m[36m(PPOTrainer pid=1434787)[0m   return np.nanmean(tower_data)
+2022-11-30 01:30:35,219	WARNING util.py:244 -- The `callbacks.on_trial_result` operation took 8.386 s, which may be a performance bottleneck.
+2022-11-30 01:30:35,220	WARNING util.py:244 -- The `process_trial_result` operation took 8.388 s, which may be a performance bottleneck.
+2022-11-30 01:30:35,220	WARNING util.py:244 -- Processing trial results took 8.388 s, which may be a performance bottleneck. Please consider reporting results less frequently to Ray Tune.
+2022-11-30 01:30:35,220	WARNING util.py:244 -- The `process_trial_result` operation took 8.389 s, which may be a performance bottleneck.
+2022-11-30 01:31:45,292	WARNING util.py:244 -- The `callbacks.on_trial_result` operation took 10.041 s, which may be a performance bottleneck.
+2022-11-30 01:31:45,293	WARNING util.py:244 -- The `process_trial_result` operation took 10.042 s, which may be a performance bottleneck.
+2022-11-30 01:31:45,296	WARNING util.py:244 -- Processing trial results took 10.046 s, which may be a performance bottleneck. Please consider reporting results less frequently to Ray Tune.
+2022-11-30 01:31:45,296	WARNING util.py:244 -- The `process_trial_result` operation took 10.047 s, which may be a performance bottleneck.
+submitit WARNING (2022-11-30 15:02:50,940) - Caught signal SIGUSR2 on gv006.hpc.nyu.edu: this job is timed-out.
+submitit WARNING (2022-11-30 15:03:01,174) - Bypassing signal SIGCONT
+Traceback (most recent call last):
+  File "/scratch/rd2893/miniconda3/envs/pcgrl/lib/python3.7/site-packages/submitit/core/job_environment.py", line 226, in checkpoint_and_try_requeue
+    raise utils.UncompletedJobError(message)
+submitit.core.utils.UncompletedJobError: Job not requeued because: timed-out too many times.
+Exception ignored in: 'ray._raylet.check_signals'
+Traceback (most recent call last):
+  File "/scratch/rd2893/miniconda3/envs/pcgrl/lib/python3.7/site-packages/submitit/core/job_environment.py", line 226, in checkpoint_and_try_requeue
+    raise utils.UncompletedJobError(message)
+submitit.core.utils.UncompletedJobError: Job not requeued because: timed-out too many times.
+slurmstepd: error: *** JOB 27482704 ON gv006 CANCELLED AT 2022-11-30T15:05:22 DUE TO TIME LIMIT ***
+slurmstepd: error: *** STEP 27482704.0 ON gv006 CANCELLED AT 2022-11-30T15:05:22 DUE TO TIME LIMIT ***
+*** SIGTERM received at time=1669838722 on cpu 10 ***
+PC: @     0x14b4c1a2a64a  (unknown)  pthread_cond_timedwait@@GLIBC_2.3.2
+[failure_signal_handler.cc : 331] RAW: Signal 15 raised at PC=0x14b4c17b4627 while already in AbslFailureSignalHandler()
+*** SIGTERM received at time=1669838722 on cpu 10 ***
+PC: @     0x14b4c17b4627  (unknown)  __strlen_avx2
+    @     0x14b4c1a2eb20       5712  (unknown)
+    @     0x14b4c16a74f6       1456  _IO_vfprintf
+    @     0x14b4c16d0784       1664  __vsnprintf
+    @ 0x3462343178302020  (unknown)  (unknown)
+[2022-11-30 15:05:22,211 E 1432424 1432424] logging.cc:361: *** SIGTERM received at time=1669838722 on cpu 10 ***
+[2022-11-30 15:05:22,211 E 1432424 1432424] logging.cc:361: PC: @     0x14b4c17b4627  (unknown)  __strlen_avx2
+[2022-11-30 15:05:22,213 E 1432424 1432424] logging.cc:361:     @     0x14b4c1a2eb20       5712  (unknown)
+[2022-11-30 15:05:22,213 E 1432424 1432424] logging.cc:361:     @     0x14b4c16a74f6       1456  _IO_vfprintf
+[2022-11-30 15:05:22,213 E 1432424 1432424] logging.cc:361:     @     0x14b4c16d0784       1664  __vsnprintf
+[2022-11-30 15:05:22,214 E 1432424 1432424] logging.cc:361:     @ 0x3462343178302020  (unknown)  (unknown)
+    @     0x14b4c1a2eb20  (unknown)  (unknown)
+[2022-11-30 15:05:22,214 E 1432424 1432424] logging.cc:361: *** SIGTERM received at time=1669838722 on cpu 10 ***
+[2022-11-30 15:05:22,214 E 1432424 1432424] logging.cc:361: PC: @     0x14b4c1a2a64a  (unknown)  pthread_cond_timedwait@@GLIBC_2.3.2
+[2022-11-30 15:05:22,216 E 1432424 1432424] logging.cc:361:     @     0x14b4c1a2eb20  (unknown)  (unknown)
+submitit WARNING (2022-11-30 15:05:22,705) - Bypassing signal SIGTERM
+submitit WARNING (2022-11-30 15:05:22,705) - Bypassing signal SIGCONT
diff --git a/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_2/27482696_2_submitted.pkl b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_2/27482696_2_submitted.pkl
new file mode 100644
index 0000000..30a8f15
Binary files /dev/null and b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_2/27482696_2_submitted.pkl differ
diff --git a/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_3/27482696_3_0_log.err b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_3/27482696_3_0_log.err
new file mode 100644
index 0000000..bb9f91b
--- /dev/null
+++ b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_3/27482696_3_0_log.err
@@ -0,0 +1,70 @@
+/scratch/rd2893/miniconda3/envs/pcgrl/lib/python3.7/site-packages/gym/utils/passive_env_checker.py:32: UserWarning: [33mWARN: A Box observation space has an unconventional shape (neither an image, nor a 1D vector). We recommend flattening the observation to have only a 1D vector or use a custom policy to properly process the data. Actual observation shape: (16, 16)[0m
+  "A Box observation space has an unconventional shape (neither an image, nor a 1D vector). "
+2022-11-29 15:05:25,249	WARNING env.py:236 -- Your MultiAgentEnv <MultiAgentWrapper<ControlWrapper<CroppedImagePCGRLWrapper<ToImage<OneHotEncoding<Cropped<OrderEnforcing<PcgrlCtrlEnv<binary-turtle-v0>>>>>>>>> does not have some or all of the needed base-class attributes! Make sure you call `super().__init__` from within your MutiAgentEnv's constructor. This will raise an error in the future.
+2022-11-29 15:05:27,900	INFO worker.py:1518 -- Started a local Ray instance.
+2022-11-29 15:05:42,607	WARNING trial_runner.py:331 -- The maximum number of pending trials has been automatically set to the number of available cluster CPUs, which is high (176 CPUs/pending trials). If you're running an experiment with a large number of trials, this could lead to scheduling overhead. In this case, consider setting the `TUNE_MAX_PENDING_TRIALS_PG` environment variable to the desired maximum number of concurrent trials.
+[2m[36m(PPOTrainer pid=1389415)[0m 2022-11-29 15:05:48,172	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
+[2m[36m(PPOTrainer pid=1389415)[0m 2022-11-29 15:05:48,174	INFO algorithm.py:358 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
+[2m[36m(PPOTrainer pid=1389415)[0m /scratch/rd2893/miniconda3/envs/pcgrl/lib/python3.7/site-packages/gym/utils/passive_env_checker.py:32: UserWarning: [33mWARN: A Box observation space has an unconventional shape (neither an image, nor a 1D vector). We recommend flattening the observation to have only a 1D vector or use a custom policy to properly process the data. Actual observation shape: (16, 16)[0m
+[2m[36m(PPOTrainer pid=1389415)[0m   "A Box observation space has an unconventional shape (neither an image, nor a 1D vector). "
+[2m[36m(PPOTrainer pid=1389415)[0m 2022-11-29 15:05:55,949	WARNING deprecation.py:48 -- DeprecationWarning: `simple_optimizer` has been deprecated. This will raise an error in the future!
+[2m[36m(PPOTrainer pid=1389415)[0m 2022-11-29 15:05:56,175	WARNING util.py:66 -- Install gputil for GPU system monitoring.
+[2m[36m(PPOTrainer pid=1389415)[0m 2022-11-29 15:05:56,299	WARNING deprecation.py:48 -- DeprecationWarning: `policy_mapping_fn(agent_id)` has been deprecated. Use `policy_mapping_fn(agent_id, episode, worker, **kwargs)` instead. This will raise an error in the future!
+[2m[36m(PPOTrainer pid=1389415)[0m 2022-11-29 15:05:58,764	WARNING deprecation.py:48 -- DeprecationWarning: `concat_samples` has been deprecated. Use `concat_samples() from rllib.policy.sample_batch` instead. This will raise an error in the future!
+[2m[36m(PPOTrainer pid=1389415)[0m /scratch/rd2893/miniconda3/envs/pcgrl/lib/python3.7/site-packages/ray/rllib/utils/metrics/learner_info.py:110: RuntimeWarning: Mean of empty slice
+[2m[36m(PPOTrainer pid=1389415)[0m   return np.nanmean(tower_data)
+2022-11-30 01:30:35,354	WARNING util.py:244 -- The `callbacks.on_trial_result` operation took 7.989 s, which may be a performance bottleneck.
+2022-11-30 01:30:35,358	WARNING util.py:244 -- The `process_trial_result` operation took 7.994 s, which may be a performance bottleneck.
+2022-11-30 01:30:35,359	WARNING util.py:244 -- Processing trial results took 7.996 s, which may be a performance bottleneck. Please consider reporting results less frequently to Ray Tune.
+2022-11-30 01:30:35,362	WARNING util.py:244 -- The `process_trial_result` operation took 8.001 s, which may be a performance bottleneck.
+2022-11-30 01:32:50,896	WARNING util.py:244 -- The `callbacks.on_trial_result` operation took 5.364 s, which may be a performance bottleneck.
+2022-11-30 01:32:50,901	WARNING util.py:244 -- The `process_trial_result` operation took 5.372 s, which may be a performance bottleneck.
+2022-11-30 01:32:50,902	WARNING util.py:244 -- Processing trial results took 5.372 s, which may be a performance bottleneck. Please consider reporting results less frequently to Ray Tune.
+2022-11-30 01:32:50,902	WARNING util.py:244 -- The `process_trial_result` operation took 5.375 s, which may be a performance bottleneck.
+2022-11-30 01:46:43,788	WARNING util.py:244 -- The `callbacks.on_trial_result` operation took 8.858 s, which may be a performance bottleneck.
+2022-11-30 01:46:43,810	WARNING util.py:244 -- The `process_trial_result` operation took 8.881 s, which may be a performance bottleneck.
+2022-11-30 01:46:43,810	WARNING util.py:244 -- Processing trial results took 8.882 s, which may be a performance bottleneck. Please consider reporting results less frequently to Ray Tune.
+2022-11-30 01:46:43,810	WARNING util.py:244 -- The `process_trial_result` operation took 8.885 s, which may be a performance bottleneck.
+2022-11-30 01:48:00,389	WARNING util.py:244 -- The `callbacks.on_trial_result` operation took 8.216 s, which may be a performance bottleneck.
+2022-11-30 01:48:00,394	WARNING util.py:244 -- The `process_trial_result` operation took 8.223 s, which may be a performance bottleneck.
+2022-11-30 01:48:00,394	WARNING util.py:244 -- Processing trial results took 8.223 s, which may be a performance bottleneck. Please consider reporting results less frequently to Ray Tune.
+2022-11-30 01:48:00,394	WARNING util.py:244 -- The `process_trial_result` operation took 8.226 s, which may be a performance bottleneck.
+2022-11-30 01:50:26,268	WARNING util.py:244 -- The `callbacks.on_trial_result` operation took 10.092 s, which may be a performance bottleneck.
+2022-11-30 01:50:26,287	WARNING util.py:244 -- The `process_trial_result` operation took 10.112 s, which may be a performance bottleneck.
+2022-11-30 01:50:26,287	WARNING util.py:244 -- Processing trial results took 10.112 s, which may be a performance bottleneck. Please consider reporting results less frequently to Ray Tune.
+2022-11-30 01:50:26,287	WARNING util.py:244 -- The `process_trial_result` operation took 10.114 s, which may be a performance bottleneck.
+2022-11-30 01:51:35,706	WARNING util.py:244 -- The `callbacks.on_trial_result` operation took 8.865 s, which may be a performance bottleneck.
+2022-11-30 01:51:35,710	WARNING util.py:244 -- The `process_trial_result` operation took 8.872 s, which may be a performance bottleneck.
+2022-11-30 01:51:35,712	WARNING util.py:244 -- Processing trial results took 8.873 s, which may be a performance bottleneck. Please consider reporting results less frequently to Ray Tune.
+2022-11-30 01:51:35,712	WARNING util.py:244 -- The `process_trial_result` operation took 8.877 s, which may be a performance bottleneck.
+2022-11-30 04:05:42,396	WARNING util.py:244 -- The `callbacks.on_trial_result` operation took 0.530 s, which may be a performance bottleneck.
+2022-11-30 04:05:42,416	WARNING util.py:244 -- The `process_trial_result` operation took 0.552 s, which may be a performance bottleneck.
+2022-11-30 04:05:42,416	WARNING util.py:244 -- Processing trial results took 0.553 s, which may be a performance bottleneck. Please consider reporting results less frequently to Ray Tune.
+2022-11-30 04:05:42,416	WARNING util.py:244 -- The `process_trial_result` operation took 0.555 s, which may be a performance bottleneck.
+2022-11-30 07:25:45,666	WARNING util.py:244 -- The `callbacks.on_trial_result` operation took 0.690 s, which may be a performance bottleneck.
+2022-11-30 07:25:45,684	WARNING util.py:244 -- The `process_trial_result` operation took 0.711 s, which may be a performance bottleneck.
+2022-11-30 07:25:45,685	WARNING util.py:244 -- Processing trial results took 0.712 s, which may be a performance bottleneck. Please consider reporting results less frequently to Ray Tune.
+2022-11-30 07:25:45,685	WARNING util.py:244 -- The `process_trial_result` operation took 0.714 s, which may be a performance bottleneck.
+2022-11-30 13:45:51,631	WARNING util.py:244 -- The `on_step_begin` operation took 0.592 s, which may be a performance bottleneck.
+submitit WARNING (2022-11-30 15:02:51,594) - Caught signal SIGUSR2 on ga019.hpc.nyu.edu: this job is timed-out.
+submitit WARNING (2022-11-30 15:02:51,898) - Bypassing signal SIGCONT
+Traceback (most recent call last):
+  File "/scratch/rd2893/miniconda3/envs/pcgrl/lib/python3.7/site-packages/submitit/core/job_environment.py", line 226, in checkpoint_and_try_requeue
+    raise utils.UncompletedJobError(message)
+submitit.core.utils.UncompletedJobError: Job not requeued because: timed-out too many times.
+Exception ignored in: 'ray._raylet.check_signals'
+Traceback (most recent call last):
+  File "/scratch/rd2893/miniconda3/envs/pcgrl/lib/python3.7/site-packages/submitit/core/job_environment.py", line 226, in checkpoint_and_try_requeue
+    raise utils.UncompletedJobError(message)
+submitit.core.utils.UncompletedJobError: Job not requeued because: timed-out too many times.
+slurmstepd: error: *** STEP 27482696.0 ON ga019 CANCELLED AT 2022-11-30T15:05:22 DUE TO TIME LIMIT ***
+srun: Job step aborted: Waiting up to 32 seconds for job step to finish.
+slurmstepd: error: *** JOB 27482696 ON ga019 CANCELLED AT 2022-11-30T15:05:22 DUE TO TIME LIMIT ***
+*** SIGTERM received at time=1669838722 on cpu 85 ***
+PC: @     0x145586a8864a  (unknown)  pthread_cond_timedwait@@GLIBC_2.3.2
+    @     0x145586a8cb20  (unknown)  (unknown)
+[2022-11-30 15:05:22,219 E 1384324 1384324] logging.cc:361: *** SIGTERM received at time=1669838722 on cpu 85 ***
+[2022-11-30 15:05:22,219 E 1384324 1384324] logging.cc:361: PC: @     0x145586a8864a  (unknown)  pthread_cond_timedwait@@GLIBC_2.3.2
+[2022-11-30 15:05:22,219 E 1384324 1384324] logging.cc:361:     @     0x145586a8cb20  (unknown)  (unknown)
+submitit WARNING (2022-11-30 15:05:22,623) - Bypassing signal SIGTERM
+submitit WARNING (2022-11-30 15:05:22,624) - Bypassing signal SIGCONT
diff --git a/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_3/27482696_3_submitted.pkl b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_3/27482696_3_submitted.pkl
new file mode 100644
index 0000000..d14c79f
Binary files /dev/null and b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/.submitit/27482696_3/27482696_3_submitted.pkl differ
diff --git a/control_pcgrl/configs/multirun/2022-11-29/15-04-08/0/.hydra/config.yaml b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/0/.hydra/config.yaml
new file mode 100644
index 0000000..9f91ffc
--- /dev/null
+++ b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/0/.hydra/config.yaml
@@ -0,0 +1,43 @@
+problem:
+  name: binary
+  weights:
+    path-length: 100
+  controls: ???
+  alp_gmm: ???
+hardware:
+  n_cpu: 1
+  n_gpu: 1
+  num_envs_per_worker: 10
+model:
+  name: null
+  conv_filters: 64
+  fc_size: 64
+multiagent:
+  n_agents: 1
+debug: false
+render: false
+infer: false
+evaluate: false
+load: false
+overwrite: false
+wandb: false
+exp_id: '0'
+representation: turtle
+learning_rate: 5.0e-06
+gamma: 0.99
+map_shape:
+- 16
+- 16
+crop_shape:
+- 32
+- 32
+max_board_scans: 3
+n_aux_tiles: 0
+observation_size: null
+controls: null
+change_percentage: null
+static_prob: null
+action_size: null
+log_dir: null
+env_name: null
+evaluation_env: null
diff --git a/control_pcgrl/configs/multirun/2022-11-29/15-04-08/0/.hydra/hydra.yaml b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/0/.hydra/hydra.yaml
new file mode 100644
index 0000000..bfefa97
--- /dev/null
+++ b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/0/.hydra/hydra.yaml
@@ -0,0 +1,186 @@
+hydra:
+  run:
+    dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}
+  sweep:
+    dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
+    subdir: ${hydra.job.num}
+  launcher:
+    submitit_folder: ${hydra.sweep.dir}/.submitit/%j
+    timeout_min: 1440
+    cpus_per_task: 10
+    gpus_per_node: 1
+    tasks_per_node: 1
+    mem_gb: 30
+    nodes: 1
+    name: ${hydra.job.name}
+    stderr_to_stdout: false
+    _target_: hydra_plugins.hydra_submitit_launcher.submitit_launcher.SlurmLauncher
+    partition: null
+    qos: null
+    comment: null
+    constraint: null
+    exclude: null
+    gres: null
+    cpus_per_gpu: null
+    gpus_per_task: null
+    mem_per_gpu: null
+    mem_per_cpu: null
+    account: null
+    signal_delay_s: 120
+    max_num_timeout: 0
+    additional_parameters: {}
+    array_parallelism: 256
+    setup: null
+  sweeper:
+    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
+    max_batch_size: null
+    params: null
+  help:
+    app_name: ${hydra.job.name}
+    header: '${hydra.help.app_name} is powered by Hydra.
+
+      '
+    footer: 'Powered by Hydra (https://hydra.cc)
+
+      Use --hydra-help to view Hydra specific help
+
+      '
+    template: '${hydra.help.header}
+
+      == Configuration groups ==
+
+      Compose your configuration from those groups (group=option)
+
+
+      $APP_CONFIG_GROUPS
+
+
+      == Config ==
+
+      Override anything in the config (foo.bar=value)
+
+
+      $CONFIG
+
+
+      ${hydra.help.footer}
+
+      '
+  hydra_help:
+    template: 'Hydra (${hydra.runtime.version})
+
+      See https://hydra.cc for more info.
+
+
+      == Flags ==
+
+      $FLAGS_HELP
+
+
+      == Configuration groups ==
+
+      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
+      to command line)
+
+
+      $HYDRA_CONFIG_GROUPS
+
+
+      Use ''--cfg hydra'' to Show the Hydra config.
+
+      '
+    hydra_help: ???
+  hydra_logging:
+    version: 1
+    formatters:
+      simple:
+        format: '[%(asctime)s][HYDRA] %(message)s'
+    handlers:
+      console:
+        class: logging.StreamHandler
+        formatter: simple
+        stream: ext://sys.stdout
+    root:
+      level: INFO
+      handlers:
+      - console
+    loggers:
+      logging_example:
+        level: DEBUG
+    disable_existing_loggers: false
+  job_logging:
+    version: 1
+    formatters:
+      simple:
+        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
+    handlers:
+      console:
+        class: logging.StreamHandler
+        formatter: simple
+        stream: ext://sys.stdout
+      file:
+        class: logging.FileHandler
+        formatter: simple
+        filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
+    root:
+      level: INFO
+      handlers:
+      - console
+      - file
+    disable_existing_loggers: false
+  env: {}
+  mode: MULTIRUN
+  searchpath: []
+  callbacks: {}
+  output_subdir: .hydra
+  overrides:
+    hydra:
+    - hydra.mode=MULTIRUN
+    task:
+    - multiagent.n_agents=1
+    - representation=turtle
+    - load=False
+  job:
+    name: train_ctrl
+    chdir: null
+    override_dirname: load=False,multiagent.n_agents=1,representation=turtle
+    id: '27482696_0'
+    num: 0
+    config_name: config
+    env_set: {}
+    env_copy: []
+    config:
+      override_dirname:
+        kv_sep: '='
+        item_sep: ','
+        exclude_keys: []
+  runtime:
+    version: 1.2.0
+    version_base: '1.2'
+    cwd: /scratch/rd2893/control-pcgrl/control_pcgrl/configs
+    config_sources:
+    - path: hydra.conf
+      schema: pkg
+      provider: hydra
+    - path: control_pcgrl.configs
+      schema: pkg
+      provider: main
+    - path: ''
+      schema: structured
+      provider: schema
+    output_dir: /scratch/rd2893/control-pcgrl/control_pcgrl/configs/multirun/2022-11-29/15-04-08/0
+    choices:
+      multiagent: single_agent
+      model: default_model
+      hardware: remote
+      problem: binary_path
+      hydra/env: default
+      hydra/callbacks: null
+      hydra/job_logging: default
+      hydra/hydra_logging: default
+      hydra/hydra_help: default
+      hydra/help: default
+      hydra/sweeper: basic
+      hydra/launcher: submitit_slurm
+      hydra/output: default
+  verbose: false
diff --git a/control_pcgrl/configs/multirun/2022-11-29/15-04-08/0/.hydra/overrides.yaml b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/0/.hydra/overrides.yaml
new file mode 100644
index 0000000..886263e
--- /dev/null
+++ b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/0/.hydra/overrides.yaml
@@ -0,0 +1,3 @@
+- multiagent.n_agents=1
+- representation=turtle
+- load=False
diff --git a/control_pcgrl/configs/multirun/2022-11-29/15-04-08/1/.hydra/config.yaml b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/1/.hydra/config.yaml
new file mode 100644
index 0000000..0bc6d71
--- /dev/null
+++ b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/1/.hydra/config.yaml
@@ -0,0 +1,43 @@
+problem:
+  name: binary
+  weights:
+    path-length: 100
+  controls: ???
+  alp_gmm: ???
+hardware:
+  n_cpu: 1
+  n_gpu: 1
+  num_envs_per_worker: 10
+model:
+  name: null
+  conv_filters: 64
+  fc_size: 64
+multiagent:
+  n_agents: 2
+debug: false
+render: false
+infer: false
+evaluate: false
+load: false
+overwrite: false
+wandb: false
+exp_id: '0'
+representation: turtle
+learning_rate: 5.0e-06
+gamma: 0.99
+map_shape:
+- 16
+- 16
+crop_shape:
+- 32
+- 32
+max_board_scans: 3
+n_aux_tiles: 0
+observation_size: null
+controls: null
+change_percentage: null
+static_prob: null
+action_size: null
+log_dir: null
+env_name: null
+evaluation_env: null
diff --git a/control_pcgrl/configs/multirun/2022-11-29/15-04-08/1/.hydra/hydra.yaml b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/1/.hydra/hydra.yaml
new file mode 100644
index 0000000..9783c95
--- /dev/null
+++ b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/1/.hydra/hydra.yaml
@@ -0,0 +1,186 @@
+hydra:
+  run:
+    dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}
+  sweep:
+    dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
+    subdir: ${hydra.job.num}
+  launcher:
+    submitit_folder: ${hydra.sweep.dir}/.submitit/%j
+    timeout_min: 1440
+    cpus_per_task: 10
+    gpus_per_node: 1
+    tasks_per_node: 1
+    mem_gb: 30
+    nodes: 1
+    name: ${hydra.job.name}
+    stderr_to_stdout: false
+    _target_: hydra_plugins.hydra_submitit_launcher.submitit_launcher.SlurmLauncher
+    partition: null
+    qos: null
+    comment: null
+    constraint: null
+    exclude: null
+    gres: null
+    cpus_per_gpu: null
+    gpus_per_task: null
+    mem_per_gpu: null
+    mem_per_cpu: null
+    account: null
+    signal_delay_s: 120
+    max_num_timeout: 0
+    additional_parameters: {}
+    array_parallelism: 256
+    setup: null
+  sweeper:
+    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
+    max_batch_size: null
+    params: null
+  help:
+    app_name: ${hydra.job.name}
+    header: '${hydra.help.app_name} is powered by Hydra.
+
+      '
+    footer: 'Powered by Hydra (https://hydra.cc)
+
+      Use --hydra-help to view Hydra specific help
+
+      '
+    template: '${hydra.help.header}
+
+      == Configuration groups ==
+
+      Compose your configuration from those groups (group=option)
+
+
+      $APP_CONFIG_GROUPS
+
+
+      == Config ==
+
+      Override anything in the config (foo.bar=value)
+
+
+      $CONFIG
+
+
+      ${hydra.help.footer}
+
+      '
+  hydra_help:
+    template: 'Hydra (${hydra.runtime.version})
+
+      See https://hydra.cc for more info.
+
+
+      == Flags ==
+
+      $FLAGS_HELP
+
+
+      == Configuration groups ==
+
+      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
+      to command line)
+
+
+      $HYDRA_CONFIG_GROUPS
+
+
+      Use ''--cfg hydra'' to Show the Hydra config.
+
+      '
+    hydra_help: ???
+  hydra_logging:
+    version: 1
+    formatters:
+      simple:
+        format: '[%(asctime)s][HYDRA] %(message)s'
+    handlers:
+      console:
+        class: logging.StreamHandler
+        formatter: simple
+        stream: ext://sys.stdout
+    root:
+      level: INFO
+      handlers:
+      - console
+    loggers:
+      logging_example:
+        level: DEBUG
+    disable_existing_loggers: false
+  job_logging:
+    version: 1
+    formatters:
+      simple:
+        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
+    handlers:
+      console:
+        class: logging.StreamHandler
+        formatter: simple
+        stream: ext://sys.stdout
+      file:
+        class: logging.FileHandler
+        formatter: simple
+        filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
+    root:
+      level: INFO
+      handlers:
+      - console
+      - file
+    disable_existing_loggers: false
+  env: {}
+  mode: MULTIRUN
+  searchpath: []
+  callbacks: {}
+  output_subdir: .hydra
+  overrides:
+    hydra:
+    - hydra.mode=MULTIRUN
+    task:
+    - multiagent.n_agents=2
+    - representation=turtle
+    - load=False
+  job:
+    name: train_ctrl
+    chdir: null
+    override_dirname: load=False,multiagent.n_agents=2,representation=turtle
+    id: '27482696_1'
+    num: 1
+    config_name: config
+    env_set: {}
+    env_copy: []
+    config:
+      override_dirname:
+        kv_sep: '='
+        item_sep: ','
+        exclude_keys: []
+  runtime:
+    version: 1.2.0
+    version_base: '1.2'
+    cwd: /scratch/rd2893/control-pcgrl/control_pcgrl/configs
+    config_sources:
+    - path: hydra.conf
+      schema: pkg
+      provider: hydra
+    - path: control_pcgrl.configs
+      schema: pkg
+      provider: main
+    - path: ''
+      schema: structured
+      provider: schema
+    output_dir: /scratch/rd2893/control-pcgrl/control_pcgrl/configs/multirun/2022-11-29/15-04-08/1
+    choices:
+      multiagent: single_agent
+      model: default_model
+      hardware: remote
+      problem: binary_path
+      hydra/env: default
+      hydra/callbacks: null
+      hydra/job_logging: default
+      hydra/hydra_logging: default
+      hydra/hydra_help: default
+      hydra/help: default
+      hydra/sweeper: basic
+      hydra/launcher: submitit_slurm
+      hydra/output: default
+  verbose: false
diff --git a/control_pcgrl/configs/multirun/2022-11-29/15-04-08/1/.hydra/overrides.yaml b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/1/.hydra/overrides.yaml
new file mode 100644
index 0000000..f7ae698
--- /dev/null
+++ b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/1/.hydra/overrides.yaml
@@ -0,0 +1,3 @@
+- multiagent.n_agents=2
+- representation=turtle
+- load=False
diff --git a/control_pcgrl/configs/multirun/2022-11-29/15-04-08/2/.hydra/config.yaml b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/2/.hydra/config.yaml
new file mode 100644
index 0000000..5712828
--- /dev/null
+++ b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/2/.hydra/config.yaml
@@ -0,0 +1,43 @@
+problem:
+  name: binary
+  weights:
+    path-length: 100
+  controls: ???
+  alp_gmm: ???
+hardware:
+  n_cpu: 1
+  n_gpu: 1
+  num_envs_per_worker: 10
+model:
+  name: null
+  conv_filters: 64
+  fc_size: 64
+multiagent:
+  n_agents: 3
+debug: false
+render: false
+infer: false
+evaluate: false
+load: false
+overwrite: false
+wandb: false
+exp_id: '0'
+representation: turtle
+learning_rate: 5.0e-06
+gamma: 0.99
+map_shape:
+- 16
+- 16
+crop_shape:
+- 32
+- 32
+max_board_scans: 3
+n_aux_tiles: 0
+observation_size: null
+controls: null
+change_percentage: null
+static_prob: null
+action_size: null
+log_dir: null
+env_name: null
+evaluation_env: null
diff --git a/control_pcgrl/configs/multirun/2022-11-29/15-04-08/2/.hydra/hydra.yaml b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/2/.hydra/hydra.yaml
new file mode 100644
index 0000000..c4e3218
--- /dev/null
+++ b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/2/.hydra/hydra.yaml
@@ -0,0 +1,186 @@
+hydra:
+  run:
+    dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}
+  sweep:
+    dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
+    subdir: ${hydra.job.num}
+  launcher:
+    submitit_folder: ${hydra.sweep.dir}/.submitit/%j
+    timeout_min: 1440
+    cpus_per_task: 10
+    gpus_per_node: 1
+    tasks_per_node: 1
+    mem_gb: 30
+    nodes: 1
+    name: ${hydra.job.name}
+    stderr_to_stdout: false
+    _target_: hydra_plugins.hydra_submitit_launcher.submitit_launcher.SlurmLauncher
+    partition: null
+    qos: null
+    comment: null
+    constraint: null
+    exclude: null
+    gres: null
+    cpus_per_gpu: null
+    gpus_per_task: null
+    mem_per_gpu: null
+    mem_per_cpu: null
+    account: null
+    signal_delay_s: 120
+    max_num_timeout: 0
+    additional_parameters: {}
+    array_parallelism: 256
+    setup: null
+  sweeper:
+    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
+    max_batch_size: null
+    params: null
+  help:
+    app_name: ${hydra.job.name}
+    header: '${hydra.help.app_name} is powered by Hydra.
+
+      '
+    footer: 'Powered by Hydra (https://hydra.cc)
+
+      Use --hydra-help to view Hydra specific help
+
+      '
+    template: '${hydra.help.header}
+
+      == Configuration groups ==
+
+      Compose your configuration from those groups (group=option)
+
+
+      $APP_CONFIG_GROUPS
+
+
+      == Config ==
+
+      Override anything in the config (foo.bar=value)
+
+
+      $CONFIG
+
+
+      ${hydra.help.footer}
+
+      '
+  hydra_help:
+    template: 'Hydra (${hydra.runtime.version})
+
+      See https://hydra.cc for more info.
+
+
+      == Flags ==
+
+      $FLAGS_HELP
+
+
+      == Configuration groups ==
+
+      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
+      to command line)
+
+
+      $HYDRA_CONFIG_GROUPS
+
+
+      Use ''--cfg hydra'' to Show the Hydra config.
+
+      '
+    hydra_help: ???
+  hydra_logging:
+    version: 1
+    formatters:
+      simple:
+        format: '[%(asctime)s][HYDRA] %(message)s'
+    handlers:
+      console:
+        class: logging.StreamHandler
+        formatter: simple
+        stream: ext://sys.stdout
+    root:
+      level: INFO
+      handlers:
+      - console
+    loggers:
+      logging_example:
+        level: DEBUG
+    disable_existing_loggers: false
+  job_logging:
+    version: 1
+    formatters:
+      simple:
+        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
+    handlers:
+      console:
+        class: logging.StreamHandler
+        formatter: simple
+        stream: ext://sys.stdout
+      file:
+        class: logging.FileHandler
+        formatter: simple
+        filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
+    root:
+      level: INFO
+      handlers:
+      - console
+      - file
+    disable_existing_loggers: false
+  env: {}
+  mode: MULTIRUN
+  searchpath: []
+  callbacks: {}
+  output_subdir: .hydra
+  overrides:
+    hydra:
+    - hydra.mode=MULTIRUN
+    task:
+    - multiagent.n_agents=3
+    - representation=turtle
+    - load=False
+  job:
+    name: train_ctrl
+    chdir: null
+    override_dirname: load=False,multiagent.n_agents=3,representation=turtle
+    id: '27482696_2'
+    num: 2
+    config_name: config
+    env_set: {}
+    env_copy: []
+    config:
+      override_dirname:
+        kv_sep: '='
+        item_sep: ','
+        exclude_keys: []
+  runtime:
+    version: 1.2.0
+    version_base: '1.2'
+    cwd: /scratch/rd2893/control-pcgrl/control_pcgrl/configs
+    config_sources:
+    - path: hydra.conf
+      schema: pkg
+      provider: hydra
+    - path: control_pcgrl.configs
+      schema: pkg
+      provider: main
+    - path: ''
+      schema: structured
+      provider: schema
+    output_dir: /scratch/rd2893/control-pcgrl/control_pcgrl/configs/multirun/2022-11-29/15-04-08/2
+    choices:
+      multiagent: single_agent
+      model: default_model
+      hardware: remote
+      problem: binary_path
+      hydra/env: default
+      hydra/callbacks: null
+      hydra/job_logging: default
+      hydra/hydra_logging: default
+      hydra/hydra_help: default
+      hydra/help: default
+      hydra/sweeper: basic
+      hydra/launcher: submitit_slurm
+      hydra/output: default
+  verbose: false
diff --git a/control_pcgrl/configs/multirun/2022-11-29/15-04-08/2/.hydra/overrides.yaml b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/2/.hydra/overrides.yaml
new file mode 100644
index 0000000..a342a9a
--- /dev/null
+++ b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/2/.hydra/overrides.yaml
@@ -0,0 +1,3 @@
+- multiagent.n_agents=3
+- representation=turtle
+- load=False
diff --git a/control_pcgrl/configs/multirun/2022-11-29/15-04-08/3/.hydra/config.yaml b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/3/.hydra/config.yaml
new file mode 100644
index 0000000..fb00417
--- /dev/null
+++ b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/3/.hydra/config.yaml
@@ -0,0 +1,43 @@
+problem:
+  name: binary
+  weights:
+    path-length: 100
+  controls: ???
+  alp_gmm: ???
+hardware:
+  n_cpu: 1
+  n_gpu: 1
+  num_envs_per_worker: 10
+model:
+  name: null
+  conv_filters: 64
+  fc_size: 64
+multiagent:
+  n_agents: 4
+debug: false
+render: false
+infer: false
+evaluate: false
+load: false
+overwrite: false
+wandb: false
+exp_id: '0'
+representation: turtle
+learning_rate: 5.0e-06
+gamma: 0.99
+map_shape:
+- 16
+- 16
+crop_shape:
+- 32
+- 32
+max_board_scans: 3
+n_aux_tiles: 0
+observation_size: null
+controls: null
+change_percentage: null
+static_prob: null
+action_size: null
+log_dir: null
+env_name: null
+evaluation_env: null
diff --git a/control_pcgrl/configs/multirun/2022-11-29/15-04-08/3/.hydra/hydra.yaml b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/3/.hydra/hydra.yaml
new file mode 100644
index 0000000..d6651f9
--- /dev/null
+++ b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/3/.hydra/hydra.yaml
@@ -0,0 +1,186 @@
+hydra:
+  run:
+    dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}
+  sweep:
+    dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
+    subdir: ${hydra.job.num}
+  launcher:
+    submitit_folder: ${hydra.sweep.dir}/.submitit/%j
+    timeout_min: 1440
+    cpus_per_task: 10
+    gpus_per_node: 1
+    tasks_per_node: 1
+    mem_gb: 30
+    nodes: 1
+    name: ${hydra.job.name}
+    stderr_to_stdout: false
+    _target_: hydra_plugins.hydra_submitit_launcher.submitit_launcher.SlurmLauncher
+    partition: null
+    qos: null
+    comment: null
+    constraint: null
+    exclude: null
+    gres: null
+    cpus_per_gpu: null
+    gpus_per_task: null
+    mem_per_gpu: null
+    mem_per_cpu: null
+    account: null
+    signal_delay_s: 120
+    max_num_timeout: 0
+    additional_parameters: {}
+    array_parallelism: 256
+    setup: null
+  sweeper:
+    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
+    max_batch_size: null
+    params: null
+  help:
+    app_name: ${hydra.job.name}
+    header: '${hydra.help.app_name} is powered by Hydra.
+
+      '
+    footer: 'Powered by Hydra (https://hydra.cc)
+
+      Use --hydra-help to view Hydra specific help
+
+      '
+    template: '${hydra.help.header}
+
+      == Configuration groups ==
+
+      Compose your configuration from those groups (group=option)
+
+
+      $APP_CONFIG_GROUPS
+
+
+      == Config ==
+
+      Override anything in the config (foo.bar=value)
+
+
+      $CONFIG
+
+
+      ${hydra.help.footer}
+
+      '
+  hydra_help:
+    template: 'Hydra (${hydra.runtime.version})
+
+      See https://hydra.cc for more info.
+
+
+      == Flags ==
+
+      $FLAGS_HELP
+
+
+      == Configuration groups ==
+
+      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
+      to command line)
+
+
+      $HYDRA_CONFIG_GROUPS
+
+
+      Use ''--cfg hydra'' to Show the Hydra config.
+
+      '
+    hydra_help: ???
+  hydra_logging:
+    version: 1
+    formatters:
+      simple:
+        format: '[%(asctime)s][HYDRA] %(message)s'
+    handlers:
+      console:
+        class: logging.StreamHandler
+        formatter: simple
+        stream: ext://sys.stdout
+    root:
+      level: INFO
+      handlers:
+      - console
+    loggers:
+      logging_example:
+        level: DEBUG
+    disable_existing_loggers: false
+  job_logging:
+    version: 1
+    formatters:
+      simple:
+        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
+    handlers:
+      console:
+        class: logging.StreamHandler
+        formatter: simple
+        stream: ext://sys.stdout
+      file:
+        class: logging.FileHandler
+        formatter: simple
+        filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
+    root:
+      level: INFO
+      handlers:
+      - console
+      - file
+    disable_existing_loggers: false
+  env: {}
+  mode: MULTIRUN
+  searchpath: []
+  callbacks: {}
+  output_subdir: .hydra
+  overrides:
+    hydra:
+    - hydra.mode=MULTIRUN
+    task:
+    - multiagent.n_agents=4
+    - representation=turtle
+    - load=False
+  job:
+    name: train_ctrl
+    chdir: null
+    override_dirname: load=False,multiagent.n_agents=4,representation=turtle
+    id: '27482696_3'
+    num: 3
+    config_name: config
+    env_set: {}
+    env_copy: []
+    config:
+      override_dirname:
+        kv_sep: '='
+        item_sep: ','
+        exclude_keys: []
+  runtime:
+    version: 1.2.0
+    version_base: '1.2'
+    cwd: /scratch/rd2893/control-pcgrl/control_pcgrl/configs
+    config_sources:
+    - path: hydra.conf
+      schema: pkg
+      provider: hydra
+    - path: control_pcgrl.configs
+      schema: pkg
+      provider: main
+    - path: ''
+      schema: structured
+      provider: schema
+    output_dir: /scratch/rd2893/control-pcgrl/control_pcgrl/configs/multirun/2022-11-29/15-04-08/3
+    choices:
+      multiagent: single_agent
+      model: default_model
+      hardware: remote
+      problem: binary_path
+      hydra/env: default
+      hydra/callbacks: null
+      hydra/job_logging: default
+      hydra/hydra_logging: default
+      hydra/hydra_help: default
+      hydra/help: default
+      hydra/sweeper: basic
+      hydra/launcher: submitit_slurm
+      hydra/output: default
+  verbose: false
diff --git a/control_pcgrl/configs/multirun/2022-11-29/15-04-08/3/.hydra/overrides.yaml b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/3/.hydra/overrides.yaml
new file mode 100644
index 0000000..74c02b5
--- /dev/null
+++ b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/3/.hydra/overrides.yaml
@@ -0,0 +1,3 @@
+- multiagent.n_agents=4
+- representation=turtle
+- load=False
diff --git a/control_pcgrl/configs/multirun/2022-11-29/15-04-08/multirun.yaml b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/multirun.yaml
new file mode 100644
index 0000000..8ed5e80
--- /dev/null
+++ b/control_pcgrl/configs/multirun/2022-11-29/15-04-08/multirun.yaml
@@ -0,0 +1,229 @@
+hydra:
+  run:
+    dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}
+  sweep:
+    dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
+    subdir: ${hydra.job.num}
+  launcher:
+    submitit_folder: ${hydra.sweep.dir}/.submitit/%j
+    timeout_min: 1440
+    cpus_per_task: 10
+    gpus_per_node: 1
+    tasks_per_node: 1
+    mem_gb: 30
+    nodes: 1
+    name: ${hydra.job.name}
+    stderr_to_stdout: false
+    _target_: hydra_plugins.hydra_submitit_launcher.submitit_launcher.SlurmLauncher
+    partition: null
+    qos: null
+    comment: null
+    constraint: null
+    exclude: null
+    gres: null
+    cpus_per_gpu: null
+    gpus_per_task: null
+    mem_per_gpu: null
+    mem_per_cpu: null
+    account: null
+    signal_delay_s: 120
+    max_num_timeout: 0
+    additional_parameters: {}
+    array_parallelism: 256
+    setup: null
+  sweeper:
+    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
+    max_batch_size: null
+    params: null
+  help:
+    app_name: ${hydra.job.name}
+    header: '${hydra.help.app_name} is powered by Hydra.
+
+      '
+    footer: 'Powered by Hydra (https://hydra.cc)
+
+      Use --hydra-help to view Hydra specific help
+
+      '
+    template: '${hydra.help.header}
+
+      == Configuration groups ==
+
+      Compose your configuration from those groups (group=option)
+
+
+      $APP_CONFIG_GROUPS
+
+
+      == Config ==
+
+      Override anything in the config (foo.bar=value)
+
+
+      $CONFIG
+
+
+      ${hydra.help.footer}
+
+      '
+  hydra_help:
+    template: 'Hydra (${hydra.runtime.version})
+
+      See https://hydra.cc for more info.
+
+
+      == Flags ==
+
+      $FLAGS_HELP
+
+
+      == Configuration groups ==
+
+      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
+      to command line)
+
+
+      $HYDRA_CONFIG_GROUPS
+
+
+      Use ''--cfg hydra'' to Show the Hydra config.
+
+      '
+    hydra_help: ???
+  hydra_logging:
+    version: 1
+    formatters:
+      simple:
+        format: '[%(asctime)s][HYDRA] %(message)s'
+    handlers:
+      console:
+        class: logging.StreamHandler
+        formatter: simple
+        stream: ext://sys.stdout
+    root:
+      level: INFO
+      handlers:
+      - console
+    loggers:
+      logging_example:
+        level: DEBUG
+    disable_existing_loggers: false
+  job_logging:
+    version: 1
+    formatters:
+      simple:
+        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
+    handlers:
+      console:
+        class: logging.StreamHandler
+        formatter: simple
+        stream: ext://sys.stdout
+      file:
+        class: logging.FileHandler
+        formatter: simple
+        filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
+    root:
+      level: INFO
+      handlers:
+      - console
+      - file
+    disable_existing_loggers: false
+  env: {}
+  mode: MULTIRUN
+  searchpath: []
+  callbacks: {}
+  output_subdir: .hydra
+  overrides:
+    hydra:
+    - hydra.mode=MULTIRUN
+    task:
+    - multiagent.n_agents=1,2,3,4
+    - representation=turtle
+    - load=False
+  job:
+    name: train_ctrl
+    chdir: null
+    override_dirname: load=False,multiagent.n_agents=1,2,3,4,representation=turtle
+    id: ???
+    num: ???
+    config_name: config
+    env_set: {}
+    env_copy: []
+    config:
+      override_dirname:
+        kv_sep: '='
+        item_sep: ','
+        exclude_keys: []
+  runtime:
+    version: 1.2.0
+    version_base: '1.2'
+    cwd: /scratch/rd2893/control-pcgrl/control_pcgrl/configs
+    config_sources:
+    - path: hydra.conf
+      schema: pkg
+      provider: hydra
+    - path: control_pcgrl.configs
+      schema: pkg
+      provider: main
+    - path: ''
+      schema: structured
+      provider: schema
+    output_dir: ???
+    choices:
+      multiagent: single_agent
+      model: default_model
+      hardware: remote
+      problem: binary_path
+      hydra/env: default
+      hydra/callbacks: null
+      hydra/job_logging: default
+      hydra/hydra_logging: default
+      hydra/hydra_help: default
+      hydra/help: default
+      hydra/sweeper: basic
+      hydra/launcher: submitit_slurm
+      hydra/output: default
+  verbose: false
+problem:
+  name: binary
+  weights:
+    path-length: 100
+  controls: ???
+  alp_gmm: ???
+hardware:
+  n_cpu: 1
+  n_gpu: 1
+  num_envs_per_worker: 10
+model:
+  name: null
+  conv_filters: 64
+  fc_size: 64
+multiagent:
+  n_agents: 0
+debug: false
+render: false
+infer: false
+evaluate: false
+load: false
+overwrite: false
+wandb: false
+exp_id: '0'
+representation: turtle
+learning_rate: 5.0e-06
+gamma: 0.99
+map_shape:
+- 16
+- 16
+crop_shape:
+- 32
+- 32
+max_board_scans: 3
+n_aux_tiles: 0
+observation_size: null
+controls: null
+change_percentage: null
+static_prob: null
+action_size: null
+log_dir: null
+env_name: null
+evaluation_env: null
diff --git a/control_pcgrl/configs/outputs/2022-11-29/14-39-14/.hydra/config.yaml b/control_pcgrl/configs/outputs/2022-11-29/14-39-14/.hydra/config.yaml
new file mode 100644
index 0000000..ceca151
--- /dev/null
+++ b/control_pcgrl/configs/outputs/2022-11-29/14-39-14/.hydra/config.yaml
@@ -0,0 +1,43 @@
+problem:
+  name: binary
+  weights:
+    path-length: 100
+  controls: ???
+  alp_gmm: ???
+hardware:
+  n_cpu: 1
+  n_gpu: 1
+  num_envs_per_worker: 10
+model:
+  name: null
+  conv_filters: 64
+  fc_size: 64
+multiagent:
+  n_agents: 0
+debug: false
+render: false
+infer: false
+evaluate: false
+load: true
+overwrite: false
+wandb: false
+exp_id: '0'
+representation: turtle
+learning_rate: 5.0e-06
+gamma: 0.99
+map_shape:
+- 16
+- 16
+crop_shape:
+- 32
+- 32
+max_board_scans: 3
+n_aux_tiles: 0
+observation_size: null
+controls: null
+change_percentage: null
+static_prob: null
+action_size: null
+log_dir: null
+env_name: null
+evaluation_env: null
diff --git a/control_pcgrl/configs/outputs/2022-11-29/14-39-14/.hydra/hydra.yaml b/control_pcgrl/configs/outputs/2022-11-29/14-39-14/.hydra/hydra.yaml
new file mode 100644
index 0000000..507fb81
--- /dev/null
+++ b/control_pcgrl/configs/outputs/2022-11-29/14-39-14/.hydra/hydra.yaml
@@ -0,0 +1,168 @@
+hydra:
+  run:
+    dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}
+  sweep:
+    dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
+    subdir: ${hydra.job.num}
+  launcher:
+    submitit_folder: ${hydra.sweep.dir}/.submitit/%j
+    timeout_min: 1440
+    cpus_per_task: 10
+    gpus_per_node: 1
+    tasks_per_node: 1
+    mem_gb: 30
+    nodes: 1
+    name: ${hydra.job.name}
+    stderr_to_stdout: false
+    _target_: hydra_plugins.hydra_submitit_launcher.submitit_launcher.LocalLauncher
+  sweeper:
+    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
+    max_batch_size: null
+    params: null
+  help:
+    app_name: ${hydra.job.name}
+    header: '${hydra.help.app_name} is powered by Hydra.
+
+      '
+    footer: 'Powered by Hydra (https://hydra.cc)
+
+      Use --hydra-help to view Hydra specific help
+
+      '
+    template: '${hydra.help.header}
+
+      == Configuration groups ==
+
+      Compose your configuration from those groups (group=option)
+
+
+      $APP_CONFIG_GROUPS
+
+
+      == Config ==
+
+      Override anything in the config (foo.bar=value)
+
+
+      $CONFIG
+
+
+      ${hydra.help.footer}
+
+      '
+  hydra_help:
+    template: 'Hydra (${hydra.runtime.version})
+
+      See https://hydra.cc for more info.
+
+
+      == Flags ==
+
+      $FLAGS_HELP
+
+
+      == Configuration groups ==
+
+      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
+      to command line)
+
+
+      $HYDRA_CONFIG_GROUPS
+
+
+      Use ''--cfg hydra'' to Show the Hydra config.
+
+      '
+    hydra_help: ???
+  hydra_logging:
+    version: 1
+    formatters:
+      simple:
+        format: '[%(asctime)s][HYDRA] %(message)s'
+    handlers:
+      console:
+        class: logging.StreamHandler
+        formatter: simple
+        stream: ext://sys.stdout
+    root:
+      level: INFO
+      handlers:
+      - console
+    loggers:
+      logging_example:
+        level: DEBUG
+    disable_existing_loggers: false
+  job_logging:
+    version: 1
+    formatters:
+      simple:
+        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
+    handlers:
+      console:
+        class: logging.StreamHandler
+        formatter: simple
+        stream: ext://sys.stdout
+      file:
+        class: logging.FileHandler
+        formatter: simple
+        filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
+    root:
+      level: INFO
+      handlers:
+      - console
+      - file
+    disable_existing_loggers: false
+  env: {}
+  mode: RUN
+  searchpath: []
+  callbacks: {}
+  output_subdir: .hydra
+  overrides:
+    hydra:
+    - hydra.mode=RUN
+    task:
+    - hardware.n_gpu=1
+  job:
+    name: train_ctrl
+    chdir: null
+    override_dirname: hardware.n_gpu=1
+    id: ???
+    num: ???
+    config_name: config
+    env_set: {}
+    env_copy: []
+    config:
+      override_dirname:
+        kv_sep: '='
+        item_sep: ','
+        exclude_keys: []
+  runtime:
+    version: 1.2.0
+    version_base: '1.2'
+    cwd: /scratch/rd2893/control-pcgrl/control_pcgrl/configs
+    config_sources:
+    - path: hydra.conf
+      schema: pkg
+      provider: hydra
+    - path: control_pcgrl.configs
+      schema: pkg
+      provider: main
+    - path: ''
+      schema: structured
+      provider: schema
+    output_dir: /scratch/rd2893/control-pcgrl/control_pcgrl/configs/outputs/2022-11-29/14-39-14
+    choices:
+      multiagent: single_agent
+      model: default_model
+      hardware: remote
+      problem: binary_path
+      hydra/env: default
+      hydra/callbacks: null
+      hydra/job_logging: default
+      hydra/hydra_logging: default
+      hydra/hydra_help: default
+      hydra/help: default
+      hydra/sweeper: basic
+      hydra/launcher: submitit_local
+      hydra/output: default
+  verbose: false
diff --git a/control_pcgrl/configs/outputs/2022-11-29/14-39-14/.hydra/overrides.yaml b/control_pcgrl/configs/outputs/2022-11-29/14-39-14/.hydra/overrides.yaml
new file mode 100644
index 0000000..23d31ba
--- /dev/null
+++ b/control_pcgrl/configs/outputs/2022-11-29/14-39-14/.hydra/overrides.yaml
@@ -0,0 +1 @@
+- hardware.n_gpu=1
diff --git a/control_pcgrl/configs/outputs/2022-11-29/14-39-51/.hydra/config.yaml b/control_pcgrl/configs/outputs/2022-11-29/14-39-51/.hydra/config.yaml
new file mode 100644
index 0000000..9c0420a
--- /dev/null
+++ b/control_pcgrl/configs/outputs/2022-11-29/14-39-51/.hydra/config.yaml
@@ -0,0 +1,43 @@
+problem:
+  name: binary
+  weights:
+    path-length: 100
+  controls: ???
+  alp_gmm: ???
+hardware:
+  n_cpu: 1
+  n_gpu: 0
+  num_envs_per_worker: 10
+model:
+  name: null
+  conv_filters: 64
+  fc_size: 64
+multiagent:
+  n_agents: 0
+debug: false
+render: false
+infer: false
+evaluate: false
+load: true
+overwrite: false
+wandb: false
+exp_id: '0'
+representation: turtle
+learning_rate: 5.0e-06
+gamma: 0.99
+map_shape:
+- 16
+- 16
+crop_shape:
+- 32
+- 32
+max_board_scans: 3
+n_aux_tiles: 0
+observation_size: null
+controls: null
+change_percentage: null
+static_prob: null
+action_size: null
+log_dir: null
+env_name: null
+evaluation_env: null
diff --git a/control_pcgrl/configs/outputs/2022-11-29/14-39-51/.hydra/hydra.yaml b/control_pcgrl/configs/outputs/2022-11-29/14-39-51/.hydra/hydra.yaml
new file mode 100644
index 0000000..9d5a305
--- /dev/null
+++ b/control_pcgrl/configs/outputs/2022-11-29/14-39-51/.hydra/hydra.yaml
@@ -0,0 +1,168 @@
+hydra:
+  run:
+    dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}
+  sweep:
+    dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
+    subdir: ${hydra.job.num}
+  launcher:
+    submitit_folder: ${hydra.sweep.dir}/.submitit/%j
+    timeout_min: 1440
+    cpus_per_task: 10
+    gpus_per_node: 1
+    tasks_per_node: 1
+    mem_gb: 30
+    nodes: 1
+    name: ${hydra.job.name}
+    stderr_to_stdout: false
+    _target_: hydra_plugins.hydra_submitit_launcher.submitit_launcher.LocalLauncher
+  sweeper:
+    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
+    max_batch_size: null
+    params: null
+  help:
+    app_name: ${hydra.job.name}
+    header: '${hydra.help.app_name} is powered by Hydra.
+
+      '
+    footer: 'Powered by Hydra (https://hydra.cc)
+
+      Use --hydra-help to view Hydra specific help
+
+      '
+    template: '${hydra.help.header}
+
+      == Configuration groups ==
+
+      Compose your configuration from those groups (group=option)
+
+
+      $APP_CONFIG_GROUPS
+
+
+      == Config ==
+
+      Override anything in the config (foo.bar=value)
+
+
+      $CONFIG
+
+
+      ${hydra.help.footer}
+
+      '
+  hydra_help:
+    template: 'Hydra (${hydra.runtime.version})
+
+      See https://hydra.cc for more info.
+
+
+      == Flags ==
+
+      $FLAGS_HELP
+
+
+      == Configuration groups ==
+
+      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
+      to command line)
+
+
+      $HYDRA_CONFIG_GROUPS
+
+
+      Use ''--cfg hydra'' to Show the Hydra config.
+
+      '
+    hydra_help: ???
+  hydra_logging:
+    version: 1
+    formatters:
+      simple:
+        format: '[%(asctime)s][HYDRA] %(message)s'
+    handlers:
+      console:
+        class: logging.StreamHandler
+        formatter: simple
+        stream: ext://sys.stdout
+    root:
+      level: INFO
+      handlers:
+      - console
+    loggers:
+      logging_example:
+        level: DEBUG
+    disable_existing_loggers: false
+  job_logging:
+    version: 1
+    formatters:
+      simple:
+        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
+    handlers:
+      console:
+        class: logging.StreamHandler
+        formatter: simple
+        stream: ext://sys.stdout
+      file:
+        class: logging.FileHandler
+        formatter: simple
+        filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
+    root:
+      level: INFO
+      handlers:
+      - console
+      - file
+    disable_existing_loggers: false
+  env: {}
+  mode: RUN
+  searchpath: []
+  callbacks: {}
+  output_subdir: .hydra
+  overrides:
+    hydra:
+    - hydra.mode=RUN
+    task:
+    - hardware.n_gpu=0
+  job:
+    name: train_ctrl
+    chdir: null
+    override_dirname: hardware.n_gpu=0
+    id: ???
+    num: ???
+    config_name: config
+    env_set: {}
+    env_copy: []
+    config:
+      override_dirname:
+        kv_sep: '='
+        item_sep: ','
+        exclude_keys: []
+  runtime:
+    version: 1.2.0
+    version_base: '1.2'
+    cwd: /scratch/rd2893/control-pcgrl/control_pcgrl/configs
+    config_sources:
+    - path: hydra.conf
+      schema: pkg
+      provider: hydra
+    - path: control_pcgrl.configs
+      schema: pkg
+      provider: main
+    - path: ''
+      schema: structured
+      provider: schema
+    output_dir: /scratch/rd2893/control-pcgrl/control_pcgrl/configs/outputs/2022-11-29/14-39-51
+    choices:
+      multiagent: single_agent
+      model: default_model
+      hardware: remote
+      problem: binary_path
+      hydra/env: default
+      hydra/callbacks: null
+      hydra/job_logging: default
+      hydra/hydra_logging: default
+      hydra/hydra_help: default
+      hydra/help: default
+      hydra/sweeper: basic
+      hydra/launcher: submitit_local
+      hydra/output: default
+  verbose: false
diff --git a/control_pcgrl/configs/outputs/2022-11-29/14-39-51/.hydra/overrides.yaml b/control_pcgrl/configs/outputs/2022-11-29/14-39-51/.hydra/overrides.yaml
new file mode 100644
index 0000000..0c836a4
--- /dev/null
+++ b/control_pcgrl/configs/outputs/2022-11-29/14-39-51/.hydra/overrides.yaml
@@ -0,0 +1 @@
+- hardware.n_gpu=0
diff --git a/control_pcgrl/configs/outputs/2022-11-29/14-40-16/.hydra/config.yaml b/control_pcgrl/configs/outputs/2022-11-29/14-40-16/.hydra/config.yaml
new file mode 100644
index 0000000..aced9bb
--- /dev/null
+++ b/control_pcgrl/configs/outputs/2022-11-29/14-40-16/.hydra/config.yaml
@@ -0,0 +1,43 @@
+problem:
+  name: binary
+  weights:
+    path-length: 100
+  controls: ???
+  alp_gmm: ???
+hardware:
+  n_cpu: 1
+  n_gpu: 0
+  num_envs_per_worker: 10
+model:
+  name: null
+  conv_filters: 64
+  fc_size: 64
+multiagent:
+  n_agents: 0
+debug: false
+render: false
+infer: false
+evaluate: false
+load: false
+overwrite: false
+wandb: false
+exp_id: '0'
+representation: turtle
+learning_rate: 5.0e-06
+gamma: 0.99
+map_shape:
+- 16
+- 16
+crop_shape:
+- 32
+- 32
+max_board_scans: 3
+n_aux_tiles: 0
+observation_size: null
+controls: null
+change_percentage: null
+static_prob: null
+action_size: null
+log_dir: null
+env_name: null
+evaluation_env: null
diff --git a/control_pcgrl/configs/outputs/2022-11-29/14-40-16/.hydra/hydra.yaml b/control_pcgrl/configs/outputs/2022-11-29/14-40-16/.hydra/hydra.yaml
new file mode 100644
index 0000000..15e7825
--- /dev/null
+++ b/control_pcgrl/configs/outputs/2022-11-29/14-40-16/.hydra/hydra.yaml
@@ -0,0 +1,169 @@
+hydra:
+  run:
+    dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}
+  sweep:
+    dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
+    subdir: ${hydra.job.num}
+  launcher:
+    submitit_folder: ${hydra.sweep.dir}/.submitit/%j
+    timeout_min: 1440
+    cpus_per_task: 10
+    gpus_per_node: 1
+    tasks_per_node: 1
+    mem_gb: 30
+    nodes: 1
+    name: ${hydra.job.name}
+    stderr_to_stdout: false
+    _target_: hydra_plugins.hydra_submitit_launcher.submitit_launcher.LocalLauncher
+  sweeper:
+    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
+    max_batch_size: null
+    params: null
+  help:
+    app_name: ${hydra.job.name}
+    header: '${hydra.help.app_name} is powered by Hydra.
+
+      '
+    footer: 'Powered by Hydra (https://hydra.cc)
+
+      Use --hydra-help to view Hydra specific help
+
+      '
+    template: '${hydra.help.header}
+
+      == Configuration groups ==
+
+      Compose your configuration from those groups (group=option)
+
+
+      $APP_CONFIG_GROUPS
+
+
+      == Config ==
+
+      Override anything in the config (foo.bar=value)
+
+
+      $CONFIG
+
+
+      ${hydra.help.footer}
+
+      '
+  hydra_help:
+    template: 'Hydra (${hydra.runtime.version})
+
+      See https://hydra.cc for more info.
+
+
+      == Flags ==
+
+      $FLAGS_HELP
+
+
+      == Configuration groups ==
+
+      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
+      to command line)
+
+
+      $HYDRA_CONFIG_GROUPS
+
+
+      Use ''--cfg hydra'' to Show the Hydra config.
+
+      '
+    hydra_help: ???
+  hydra_logging:
+    version: 1
+    formatters:
+      simple:
+        format: '[%(asctime)s][HYDRA] %(message)s'
+    handlers:
+      console:
+        class: logging.StreamHandler
+        formatter: simple
+        stream: ext://sys.stdout
+    root:
+      level: INFO
+      handlers:
+      - console
+    loggers:
+      logging_example:
+        level: DEBUG
+    disable_existing_loggers: false
+  job_logging:
+    version: 1
+    formatters:
+      simple:
+        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
+    handlers:
+      console:
+        class: logging.StreamHandler
+        formatter: simple
+        stream: ext://sys.stdout
+      file:
+        class: logging.FileHandler
+        formatter: simple
+        filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
+    root:
+      level: INFO
+      handlers:
+      - console
+      - file
+    disable_existing_loggers: false
+  env: {}
+  mode: RUN
+  searchpath: []
+  callbacks: {}
+  output_subdir: .hydra
+  overrides:
+    hydra:
+    - hydra.mode=RUN
+    task:
+    - hardware.n_gpu=0
+    - load=False
+  job:
+    name: train_ctrl
+    chdir: null
+    override_dirname: hardware.n_gpu=0,load=False
+    id: ???
+    num: ???
+    config_name: config
+    env_set: {}
+    env_copy: []
+    config:
+      override_dirname:
+        kv_sep: '='
+        item_sep: ','
+        exclude_keys: []
+  runtime:
+    version: 1.2.0
+    version_base: '1.2'
+    cwd: /scratch/rd2893/control-pcgrl/control_pcgrl/configs
+    config_sources:
+    - path: hydra.conf
+      schema: pkg
+      provider: hydra
+    - path: control_pcgrl.configs
+      schema: pkg
+      provider: main
+    - path: ''
+      schema: structured
+      provider: schema
+    output_dir: /scratch/rd2893/control-pcgrl/control_pcgrl/configs/outputs/2022-11-29/14-40-16
+    choices:
+      multiagent: single_agent
+      model: default_model
+      hardware: remote
+      problem: binary_path
+      hydra/env: default
+      hydra/callbacks: null
+      hydra/job_logging: default
+      hydra/hydra_logging: default
+      hydra/hydra_help: default
+      hydra/help: default
+      hydra/sweeper: basic
+      hydra/launcher: submitit_local
+      hydra/output: default
+  verbose: false
diff --git a/control_pcgrl/configs/outputs/2022-11-29/14-40-16/.hydra/overrides.yaml b/control_pcgrl/configs/outputs/2022-11-29/14-40-16/.hydra/overrides.yaml
new file mode 100644
index 0000000..4df4332
--- /dev/null
+++ b/control_pcgrl/configs/outputs/2022-11-29/14-40-16/.hydra/overrides.yaml
@@ -0,0 +1,2 @@
+- hardware.n_gpu=0
+- load=False
diff --git a/control_pcgrl/configs/outputs/2022-11-29/14-44-50/.hydra/config.yaml b/control_pcgrl/configs/outputs/2022-11-29/14-44-50/.hydra/config.yaml
new file mode 100644
index 0000000..05a13f1
--- /dev/null
+++ b/control_pcgrl/configs/outputs/2022-11-29/14-44-50/.hydra/config.yaml
@@ -0,0 +1,43 @@
+problem:
+  name: binary
+  weights:
+    path-length: 100
+  controls: ???
+  alp_gmm: ???
+hardware:
+  n_cpu: 1
+  n_gpu: 0
+  num_envs_per_worker: 10
+model:
+  name: null
+  conv_filters: 64
+  fc_size: 64
+multiagent:
+  n_agents: 2
+debug: false
+render: false
+infer: false
+evaluate: false
+load: false
+overwrite: false
+wandb: false
+exp_id: '0'
+representation: turtle
+learning_rate: 5.0e-06
+gamma: 0.99
+map_shape:
+- 16
+- 16
+crop_shape:
+- 32
+- 32
+max_board_scans: 3
+n_aux_tiles: 0
+observation_size: null
+controls: null
+change_percentage: null
+static_prob: null
+action_size: null
+log_dir: null
+env_name: null
+evaluation_env: null
diff --git a/control_pcgrl/configs/outputs/2022-11-29/14-44-50/.hydra/hydra.yaml b/control_pcgrl/configs/outputs/2022-11-29/14-44-50/.hydra/hydra.yaml
new file mode 100644
index 0000000..8b1a090
--- /dev/null
+++ b/control_pcgrl/configs/outputs/2022-11-29/14-44-50/.hydra/hydra.yaml
@@ -0,0 +1,170 @@
+hydra:
+  run:
+    dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}
+  sweep:
+    dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
+    subdir: ${hydra.job.num}
+  launcher:
+    submitit_folder: ${hydra.sweep.dir}/.submitit/%j
+    timeout_min: 1440
+    cpus_per_task: 10
+    gpus_per_node: 1
+    tasks_per_node: 1
+    mem_gb: 30
+    nodes: 1
+    name: ${hydra.job.name}
+    stderr_to_stdout: false
+    _target_: hydra_plugins.hydra_submitit_launcher.submitit_launcher.LocalLauncher
+  sweeper:
+    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
+    max_batch_size: null
+    params: null
+  help:
+    app_name: ${hydra.job.name}
+    header: '${hydra.help.app_name} is powered by Hydra.
+
+      '
+    footer: 'Powered by Hydra (https://hydra.cc)
+
+      Use --hydra-help to view Hydra specific help
+
+      '
+    template: '${hydra.help.header}
+
+      == Configuration groups ==
+
+      Compose your configuration from those groups (group=option)
+
+
+      $APP_CONFIG_GROUPS
+
+
+      == Config ==
+
+      Override anything in the config (foo.bar=value)
+
+
+      $CONFIG
+
+
+      ${hydra.help.footer}
+
+      '
+  hydra_help:
+    template: 'Hydra (${hydra.runtime.version})
+
+      See https://hydra.cc for more info.
+
+
+      == Flags ==
+
+      $FLAGS_HELP
+
+
+      == Configuration groups ==
+
+      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
+      to command line)
+
+
+      $HYDRA_CONFIG_GROUPS
+
+
+      Use ''--cfg hydra'' to Show the Hydra config.
+
+      '
+    hydra_help: ???
+  hydra_logging:
+    version: 1
+    formatters:
+      simple:
+        format: '[%(asctime)s][HYDRA] %(message)s'
+    handlers:
+      console:
+        class: logging.StreamHandler
+        formatter: simple
+        stream: ext://sys.stdout
+    root:
+      level: INFO
+      handlers:
+      - console
+    loggers:
+      logging_example:
+        level: DEBUG
+    disable_existing_loggers: false
+  job_logging:
+    version: 1
+    formatters:
+      simple:
+        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
+    handlers:
+      console:
+        class: logging.StreamHandler
+        formatter: simple
+        stream: ext://sys.stdout
+      file:
+        class: logging.FileHandler
+        formatter: simple
+        filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
+    root:
+      level: INFO
+      handlers:
+      - console
+      - file
+    disable_existing_loggers: false
+  env: {}
+  mode: RUN
+  searchpath: []
+  callbacks: {}
+  output_subdir: .hydra
+  overrides:
+    hydra:
+    - hydra.mode=RUN
+    task:
+    - hardware.n_gpu=0
+    - load=False
+    - multiagent.n_agents=2
+  job:
+    name: train_ctrl
+    chdir: null
+    override_dirname: hardware.n_gpu=0,load=False,multiagent.n_agents=2
+    id: ???
+    num: ???
+    config_name: config
+    env_set: {}
+    env_copy: []
+    config:
+      override_dirname:
+        kv_sep: '='
+        item_sep: ','
+        exclude_keys: []
+  runtime:
+    version: 1.2.0
+    version_base: '1.2'
+    cwd: /scratch/rd2893/control-pcgrl/control_pcgrl/configs
+    config_sources:
+    - path: hydra.conf
+      schema: pkg
+      provider: hydra
+    - path: control_pcgrl.configs
+      schema: pkg
+      provider: main
+    - path: ''
+      schema: structured
+      provider: schema
+    output_dir: /scratch/rd2893/control-pcgrl/control_pcgrl/configs/outputs/2022-11-29/14-44-50
+    choices:
+      multiagent: single_agent
+      model: default_model
+      hardware: remote
+      problem: binary_path
+      hydra/env: default
+      hydra/callbacks: null
+      hydra/job_logging: default
+      hydra/hydra_logging: default
+      hydra/hydra_help: default
+      hydra/help: default
+      hydra/sweeper: basic
+      hydra/launcher: submitit_local
+      hydra/output: default
+  verbose: false
diff --git a/control_pcgrl/configs/outputs/2022-11-29/14-44-50/.hydra/overrides.yaml b/control_pcgrl/configs/outputs/2022-11-29/14-44-50/.hydra/overrides.yaml
new file mode 100644
index 0000000..7b8f603
--- /dev/null
+++ b/control_pcgrl/configs/outputs/2022-11-29/14-44-50/.hydra/overrides.yaml
@@ -0,0 +1,3 @@
+- hardware.n_gpu=0
+- load=False
+- multiagent.n_agents=2
diff --git a/control_pcgrl/configs/outputs/2022-11-29/14-46-43/.hydra/config.yaml b/control_pcgrl/configs/outputs/2022-11-29/14-46-43/.hydra/config.yaml
new file mode 100644
index 0000000..05a13f1
--- /dev/null
+++ b/control_pcgrl/configs/outputs/2022-11-29/14-46-43/.hydra/config.yaml
@@ -0,0 +1,43 @@
+problem:
+  name: binary
+  weights:
+    path-length: 100
+  controls: ???
+  alp_gmm: ???
+hardware:
+  n_cpu: 1
+  n_gpu: 0
+  num_envs_per_worker: 10
+model:
+  name: null
+  conv_filters: 64
+  fc_size: 64
+multiagent:
+  n_agents: 2
+debug: false
+render: false
+infer: false
+evaluate: false
+load: false
+overwrite: false
+wandb: false
+exp_id: '0'
+representation: turtle
+learning_rate: 5.0e-06
+gamma: 0.99
+map_shape:
+- 16
+- 16
+crop_shape:
+- 32
+- 32
+max_board_scans: 3
+n_aux_tiles: 0
+observation_size: null
+controls: null
+change_percentage: null
+static_prob: null
+action_size: null
+log_dir: null
+env_name: null
+evaluation_env: null
diff --git a/control_pcgrl/configs/outputs/2022-11-29/14-46-43/.hydra/hydra.yaml b/control_pcgrl/configs/outputs/2022-11-29/14-46-43/.hydra/hydra.yaml
new file mode 100644
index 0000000..561ae9a
--- /dev/null
+++ b/control_pcgrl/configs/outputs/2022-11-29/14-46-43/.hydra/hydra.yaml
@@ -0,0 +1,170 @@
+hydra:
+  run:
+    dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}
+  sweep:
+    dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
+    subdir: ${hydra.job.num}
+  launcher:
+    submitit_folder: ${hydra.sweep.dir}/.submitit/%j
+    timeout_min: 1440
+    cpus_per_task: 10
+    gpus_per_node: 1
+    tasks_per_node: 1
+    mem_gb: 30
+    nodes: 1
+    name: ${hydra.job.name}
+    stderr_to_stdout: false
+    _target_: hydra_plugins.hydra_submitit_launcher.submitit_launcher.LocalLauncher
+  sweeper:
+    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
+    max_batch_size: null
+    params: null
+  help:
+    app_name: ${hydra.job.name}
+    header: '${hydra.help.app_name} is powered by Hydra.
+
+      '
+    footer: 'Powered by Hydra (https://hydra.cc)
+
+      Use --hydra-help to view Hydra specific help
+
+      '
+    template: '${hydra.help.header}
+
+      == Configuration groups ==
+
+      Compose your configuration from those groups (group=option)
+
+
+      $APP_CONFIG_GROUPS
+
+
+      == Config ==
+
+      Override anything in the config (foo.bar=value)
+
+
+      $CONFIG
+
+
+      ${hydra.help.footer}
+
+      '
+  hydra_help:
+    template: 'Hydra (${hydra.runtime.version})
+
+      See https://hydra.cc for more info.
+
+
+      == Flags ==
+
+      $FLAGS_HELP
+
+
+      == Configuration groups ==
+
+      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
+      to command line)
+
+
+      $HYDRA_CONFIG_GROUPS
+
+
+      Use ''--cfg hydra'' to Show the Hydra config.
+
+      '
+    hydra_help: ???
+  hydra_logging:
+    version: 1
+    formatters:
+      simple:
+        format: '[%(asctime)s][HYDRA] %(message)s'
+    handlers:
+      console:
+        class: logging.StreamHandler
+        formatter: simple
+        stream: ext://sys.stdout
+    root:
+      level: INFO
+      handlers:
+      - console
+    loggers:
+      logging_example:
+        level: DEBUG
+    disable_existing_loggers: false
+  job_logging:
+    version: 1
+    formatters:
+      simple:
+        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
+    handlers:
+      console:
+        class: logging.StreamHandler
+        formatter: simple
+        stream: ext://sys.stdout
+      file:
+        class: logging.FileHandler
+        formatter: simple
+        filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
+    root:
+      level: INFO
+      handlers:
+      - console
+      - file
+    disable_existing_loggers: false
+  env: {}
+  mode: RUN
+  searchpath: []
+  callbacks: {}
+  output_subdir: .hydra
+  overrides:
+    hydra:
+    - hydra.mode=RUN
+    task:
+    - hardware.n_gpu=0
+    - load=False
+    - multiagent.n_agents=2
+  job:
+    name: train_ctrl
+    chdir: null
+    override_dirname: hardware.n_gpu=0,load=False,multiagent.n_agents=2
+    id: ???
+    num: ???
+    config_name: config
+    env_set: {}
+    env_copy: []
+    config:
+      override_dirname:
+        kv_sep: '='
+        item_sep: ','
+        exclude_keys: []
+  runtime:
+    version: 1.2.0
+    version_base: '1.2'
+    cwd: /scratch/rd2893/control-pcgrl/control_pcgrl/configs
+    config_sources:
+    - path: hydra.conf
+      schema: pkg
+      provider: hydra
+    - path: control_pcgrl.configs
+      schema: pkg
+      provider: main
+    - path: ''
+      schema: structured
+      provider: schema
+    output_dir: /scratch/rd2893/control-pcgrl/control_pcgrl/configs/outputs/2022-11-29/14-46-43
+    choices:
+      multiagent: single_agent
+      model: default_model
+      hardware: remote
+      problem: binary_path
+      hydra/env: default
+      hydra/callbacks: null
+      hydra/job_logging: default
+      hydra/hydra_logging: default
+      hydra/hydra_help: default
+      hydra/help: default
+      hydra/sweeper: basic
+      hydra/launcher: submitit_local
+      hydra/output: default
+  verbose: false
diff --git a/control_pcgrl/configs/outputs/2022-11-29/14-46-43/.hydra/overrides.yaml b/control_pcgrl/configs/outputs/2022-11-29/14-46-43/.hydra/overrides.yaml
new file mode 100644
index 0000000..7b8f603
--- /dev/null
+++ b/control_pcgrl/configs/outputs/2022-11-29/14-46-43/.hydra/overrides.yaml
@@ -0,0 +1,3 @@
+- hardware.n_gpu=0
+- load=False
+- multiagent.n_agents=2
diff --git a/control_pcgrl/control_wrappers.py b/control_pcgrl/control_wrappers.py
index 5ca4a50..ea17680 100644
--- a/control_pcgrl/control_wrappers.py
+++ b/control_pcgrl/control_wrappers.py
@@ -3,6 +3,7 @@
 ################################################################################
 import collections
 import copy
+import json
 from pdb import set_trace as TT
 from timeit import default_timer as timer
 from typing import Dict, OrderedDict
@@ -46,7 +47,12 @@ def __init__(self, env, ctrl_metrics=None, rand_params=False, **kwargs):
 
         metric_weights = copy.copy(self.unwrapped._reward_weights)
         self.metric_weights = {k: 0 for k in metric_weights}
-        self.metric_weights.update(kwargs['problem']['weights'])
+        try:
+            config_weights = kwargs['problem']['weights']
+        except TypeError:
+            config_weights = json.loads(kwargs['problem'].replace('\'', '\"'))['weights']
+
+        self.metric_weights.update(config_weights)
 
         #       cond_trgs = self.unwrapped.cond_trgs
 
@@ -350,11 +356,11 @@ def render(self, mode='human'):
 
         else:
             ### PROFILING
-            N = 100
-            start_time = timer()
-            for _ in range(N):
-                super().render(mode=mode)
-            print(f'mean pyglet image render time over {N} trials:', (timer() - start_time) * 1000 / N, 'ms')
+            #N = 100
+            #start_time = timer()
+            #for _ in range(N):
+            #    super().render(mode=mode)
+            #print(f'mean pyglet image render time over {N} trials:', (timer() - start_time) * 1000 / N, 'ms')
             ###
             return super().render(mode=mode)
 
diff --git a/control_pcgrl/envs/helper.py b/control_pcgrl/envs/helper.py
index b31cbf3..93fe9df 100644
--- a/control_pcgrl/envs/helper.py
+++ b/control_pcgrl/envs/helper.py
@@ -2,6 +2,7 @@
 A helper module that can be used by all problems
 """
 import numpy as np
+from gym.utils import seeding
 from pdb import set_trace as TT
 
 """
@@ -486,7 +487,7 @@ def calc_num_reachable_tile(map, map_locations, start_value, passable_values, re
 Returns:
     int[][]: the random generated map
 """
-def gen_random_map(random, dims, prob):
+def gen_random_map(random, dims, prob, seed=None):
 # def gen_random_map(random, width, height, prob):
     map = random.choice(list(prob.keys()),size=dims[::-1],p=list(prob.values())).astype(np.uint8)
     return map
diff --git a/control_pcgrl/envs/pcgrl_env.py b/control_pcgrl/envs/pcgrl_env.py
index 91898c7..275b1d3 100644
--- a/control_pcgrl/envs/pcgrl_env.py
+++ b/control_pcgrl/envs/pcgrl_env.py
@@ -1,4 +1,5 @@
 import collections
+import json
 from pdb import set_trace as TT
 import PIL
 
@@ -10,7 +11,7 @@
 from ray.rllib.env.env_context import EnvContext
 from ray.rllib.utils.annotations import override
 
-from control_pcgrl.envs.reps.wrappers import wrap_rep 
+from control_pcgrl.envs.reps.wrappers import wrap_rep, MultiAgentWrapper
 from control_pcgrl.envs.probs import PROBLEMS
 from control_pcgrl.envs.probs.problem import Problem, Problem3D
 from control_pcgrl.envs.reps import REPRESENTATIONS
@@ -45,6 +46,7 @@ def __init__(self, prob="binary", rep="narrow", **kwargs):
         # Attach this function to the env, since it will be different for, e.g., 3D environments.
         self.get_string_map = get_string_map
 
+        
         self._prob: Problem = PROBLEMS[prob](**kwargs)
         self._prob.init_tile_int_dict()
         self._rep_cls = REPRESENTATIONS[rep]
@@ -117,6 +119,12 @@ def set_task(self, map_idx):
             self.cur_map_idx = map_idx
             self.switch_env = True
 
+    def get_rep(self):
+        return self._rep
+
+    def get_map(self):
+        return self._rep._map
+
     def get_map_dims(self):
         return (self._prob._width, self._prob._height, self.get_num_tiles())
 
@@ -125,6 +133,9 @@ def get_observable_map_dims(self):
 
     def configure(self, map_shape, **kwargs):  # , max_step=300):
         # What is this garbage??
+        if isinstance(map_shape, str):
+            map_shape = json.loads(map_shape)
+        
         self._prob._width = map_shape[0]
         self._prob._height = map_shape[1]
         self.width = map_shape[0]  #UGH 
@@ -174,6 +185,10 @@ def get_spaces(self):
     def reset(self):
         self._changes = 0
         self._iteration = 0
+        # avoid default probabilities with normal distribution if we seed manually
+        if hasattr(self._prob, '_random'):
+            probs = self._prob._random.random(size=len(self._prob.get_tile_types()))
+            self._prob._prob = {tile: prob for tile, prob in zip(self._prob.get_tile_types(), probs)}
         if self.switch_env:
             self._rep.reset(self.get_map_dims()[:-1], get_int_prob(self._prob._prob, self._prob.get_tile_types()),
                 next_map=self._prob.eval_maps[self.cur_map_idx])
@@ -185,9 +200,14 @@ def reset(self):
             self._rep_stats = self._prob.get_stats(self.get_string_map(self._get_rep_map(), self._prob.get_tile_types()))  #, continuous=continuous))
         self.metrics = self._rep_stats
         self._prob.reset(self._rep_stats)
+        self._prob._prob = probs
         self._heatmap = np.zeros(self.get_map_dims()[:-1])
 
-        observation = self._rep.get_observation()
+        if issubclass(type(self._rep), MultiAgentWrapper):
+            observation = self._rep.get_observation(all_agents=True)
+        else:
+            observation = self._rep.get_observation() # all_agents parameter does not exist for representations without MultiAgentWrapper
+
         # observation["heatmap"] = self._heatmap.copy()
 
         return observation
@@ -249,6 +269,8 @@ def adjust_param(self, **kwargs):
         else:
             max_board_scans = kwargs.get('max_board_scans', 1)
             self._max_iterations = np.prod(self.get_map_dims()[:-1]) * max_board_scans + 1
+        if isinstance(kwargs['map_shape'], str):
+            kwargs['map_shape'] = json.loads(kwargs['map_shape'])
         self._prob.adjust_param(**kwargs)
         self._rep.adjust_param(**kwargs)
         self.action_space = self._rep.get_action_space(self.get_map_dims()[:-1], self.get_num_tiles())
@@ -257,6 +279,8 @@ def adjust_param(self, **kwargs):
         # self.observation_space.spaces['heatmap'] = spaces.Box(
         #     low=0, high=self._max_changes, dtype=np.uint8, shape=self.get_map_dims()[:-1])
 
+    def get_agent_position(self):
+        return self._rep._positions
 
     """
     Advance the environment using a specific action
diff --git a/control_pcgrl/envs/probs/__init__.py b/control_pcgrl/envs/probs/__init__.py
index 93f52c5..5ffc96f 100644
--- a/control_pcgrl/envs/probs/__init__.py
+++ b/control_pcgrl/envs/probs/__init__.py
@@ -33,6 +33,7 @@
     "ddave": DDaveProblem,
     "mdungeon": MDungeonProblem,
     "sokoban": SokobanCtrlProblem,
+    #"sokoban": SokobanProblem,
     # "sokoban_ctrl": SokobanCtrlProblem,
     # "zelda": ZeldaProblem,
     "smb": SMBCtrlProblem,
diff --git a/control_pcgrl/envs/probs/problem.py b/control_pcgrl/envs/probs/problem.py
index 60a3423..075b6d9 100644
--- a/control_pcgrl/envs/probs/problem.py
+++ b/control_pcgrl/envs/probs/problem.py
@@ -55,6 +55,7 @@ def init_tile_int_dict(self):
     def get_tile_int(self, tile):
         return self._tile_int_dict[tile]
 
+
     def is_continuous(self):
         return False
 
diff --git a/control_pcgrl/envs/probs/sokoban/sokoban_ctrl_prob.py b/control_pcgrl/envs/probs/sokoban/sokoban_ctrl_prob.py
index 7681205..719fd92 100644
--- a/control_pcgrl/envs/probs/sokoban/sokoban_ctrl_prob.py
+++ b/control_pcgrl/envs/probs/sokoban/sokoban_ctrl_prob.py
@@ -9,6 +9,7 @@
 class SokobanCtrlProblem(SokobanProblem):
     def __init__(self):
         super(SokobanCtrlProblem, self).__init__()
+        #import pdb; pdb.set_trace()
         self._max_path_length = np.ceil(self._width / 2 + 1) * (self._height)
         # like _reward_weights but for use with ParamRew
         #       self._reward_weights = self._reward_weights
@@ -56,7 +57,7 @@ def get_reward(self, new_stats, old_stats):
 
     def get_stats(self, map):
         stats = super().get_stats(map)
-        stats["sol-length"] = len(stats["solution"])
+        stats["sol-length"] = len(stats.get('solution', []))
         stats["ratio"] = abs(stats["crate"] - stats["target"])
         #       if stats['dist-win'] == self._width * self._height * (self._width + self._height):
         #           stats['dist-win'] = 0
diff --git a/control_pcgrl/envs/probs/sokoban/sokoban_prob.py b/control_pcgrl/envs/probs/sokoban/sokoban_prob.py
index c74b5dd..6b09675 100644
--- a/control_pcgrl/envs/probs/sokoban/sokoban_prob.py
+++ b/control_pcgrl/envs/probs/sokoban/sokoban_prob.py
@@ -174,7 +174,6 @@ def get_stats(self, map):
                 map, map_locations, ["empty", "player", "crate", "target"]
             ),
             "dist-win": self._width * self._height * (self._width + self._height),
-            "solution": [],
         }
 
         if (
@@ -270,7 +269,7 @@ def get_debug_info(self, new_stats, old_stats):
             "target": new_stats["target"],
             "regions": new_stats["regions"],
             "dist-win": new_stats["dist-win"],
-            "sol-length": len(new_stats["solution"]),
+            "sol-length": len(new_stats.get('solution', []))
         }
 
     """
diff --git a/control_pcgrl/envs/reps/narrow_rep.py b/control_pcgrl/envs/reps/narrow_rep.py
index 3640914..47fdcb1 100644
--- a/control_pcgrl/envs/reps/narrow_rep.py
+++ b/control_pcgrl/envs/reps/narrow_rep.py
@@ -97,6 +97,9 @@ def update(self, action):
         self.n_step += 1
         super().update(action)
         return change, self._pos
+    
+    def update_state(self, action):
+        return self.update(action)
 
     # """
     # Modify the level image with a red rectangle around the tile that is
diff --git a/control_pcgrl/envs/reps/representation.py b/control_pcgrl/envs/reps/representation.py
index 4c3d9d3..111eb9d 100644
--- a/control_pcgrl/envs/reps/representation.py
+++ b/control_pcgrl/envs/reps/representation.py
@@ -27,6 +27,7 @@ def __init__(self, border_tile_index=1, empty_tile_index=0):
         self._border_tile_index = border_tile_index
         self._empty_tile = empty_tile_index
         self._random_start: bool = True
+        self.seed_val: int = None
 
         self.seed()
 
@@ -44,6 +45,7 @@ def get_pos(self):
         int: the used seed (same as input if not None)
     """
     def seed(self, seed=None):
+        self.seed_val = seed
         self._random, seed = seeding.np_random(seed)
         return seed
 
@@ -64,7 +66,7 @@ def reset(self, dims: tuple, prob: Problem, next_map: np.ndarray = None):
             self._map = next_map
             self._old_map = self._map.copy()
         elif self._random_start or self._old_map is None:
-            self._map = type(self).gen_random_map(self._random, dims, prob)
+            self._map = type(self).gen_random_map(self._random, dims, prob, self.seed_val)
             self._old_map = self._map.copy()
         else:
             self._map = self._old_map.copy()
diff --git a/control_pcgrl/envs/reps/turtle_rep.py b/control_pcgrl/envs/reps/turtle_rep.py
index 08f2cd3..7b7ac4b 100644
--- a/control_pcgrl/envs/reps/turtle_rep.py
+++ b/control_pcgrl/envs/reps/turtle_rep.py
@@ -30,7 +30,9 @@ def __init__(self, **kwargs):
     """
     def reset(self, dims, prob):
         self._pos = self.get_pos_at_step(dims, -1)
-        return super().reset(dims, prob)
+        ret = super().reset(dims, prob)
+        return ret
+
         # self._x = self._random.randint(width)
         # self._y = self._random.randint(height)
 #       self._x = 0
@@ -68,7 +70,7 @@ def adjust_param(self, **kwargs):
     def get_action_space(self, dims, num_tiles):
         return spaces.Discrete(len(self._dirs) + num_tiles)
 
-    def update(self, action):
+    def update(self, action, pos=None):
         action, self._pos = self.update_pos(action, self._pos)
         return action, self._pos
 
diff --git a/control_pcgrl/envs/reps/wrappers.py b/control_pcgrl/envs/reps/wrappers.py
index 5f7aabd..8bd0c3a 100644
--- a/control_pcgrl/envs/reps/wrappers.py
+++ b/control_pcgrl/envs/reps/wrappers.py
@@ -3,6 +3,7 @@
 from inspect import isclass
 import logging
 import math
+import json
 from pdb import set_trace as TT
 
 from gym import spaces
@@ -20,6 +21,7 @@
 from control_pcgrl.envs.reps.narrow_rep import NarrowRepresentation
 from control_pcgrl.envs.reps.representation import EgocentricRepresentation, Representation
 from control_pcgrl.envs.reps.turtle_rep import TurtleRepresentation
+from control_pcgrl.envs.reps.wide_rep import WideRepresentation
 
 
 # class RepresentationWrapper(Representation):
@@ -418,7 +420,7 @@ def render(self, lvl_image, tile_size=16, border_size=None):
         return super().render(lvl_image, tile_size, border_size)
 
 
-class MultiAgentRepresentation(RepresentationWrapper):
+class MultiAgentWrapper(RepresentationWrapper):
     agent_colors = [
         (255, 255, 255, 255),
         (0, 255, 0, 255),
@@ -429,30 +431,20 @@ class MultiAgentRepresentation(RepresentationWrapper):
         (0, 255, 255, 255),
     ]
     def __init__(self, rep, **kwargs):
-        self.n_agents = kwargs['multiagent']['n_agents']
+        try:
+            n_agents = kwargs.get('multiagent')['n_agents']
+        except TypeError:
+            n_agents = json.loads(kwargs.get('multiagent').replace('\'', '\"'))['n_agents']
+        #self.n_agents = kwargs['multiagent']['n_agents']
+        self.n_agents = n_agents
         self._active_agent = None
         super().__init__(rep, **kwargs)
-
+    
     def reset(self, dims, prob, **kwargs):
-        self._active_agent = None
-        ret = super().reset(dims, prob, **kwargs)
-
-        # FIXME: specific to turtle
-        self._positions = np.floor(np.random.random((self.n_agents, len(dims))) * (np.array(dims))).astype(int)
-
-    def update(self, action):
-        change = False
-
-        # FIXME: mostly specific to turtle
-        # for i, pos_0 in enumerate(self._positions):
-        for k, v in action.items():
-            i = int(k.split('_')[-1])
-            pos_0 = self._positions[i]
-            change_i, pos = self.update_pos(action[f'agent_{i}'], pos_0)
-            change = change or change_i
-            self._positions[i] = pos
-
-        return change, self._positions
+        super().reset(dims, prob, **kwargs)
+    
+    def update(self):
+        raise NotImplementedError("This must be overriden by a child class")
 
     def render(self, lvl_image, tile_size=16, border_size=None):
 
@@ -465,13 +457,13 @@ def render(self, lvl_image, tile_size=16, border_size=None):
                                             (x+border_size[0]+1)*tile_size,(y+border_size[1]+1)*tile_size), x_graphics)
         return lvl_image
 
-    def get_observation(self, *args, **kwargs):
+    def get_observation(self, *args, all_agents=False, **kwargs):
         # Note that this returns a dummy/meaningless position that never changes...
         base_obs = super().get_observation(*args, **kwargs)
 
         agent_name = self._active_agent
         multiagent_obs = {}
-        if agent_name is None:
+        if agent_name is None or all_agents:
             for i in range(self.n_agents):
                 obs_i = base_obs.copy()
                 obs_i['pos'] = self._positions[i]
@@ -483,10 +475,93 @@ def get_observation(self, *args, **kwargs):
             return multiagent_obs
             # base_obs['pos'] = self._positions[int(agent_name.split('_')[-1])]
             # return base_obs
-
+    
     def set_active_agent(self, agent_name):
         self._active_agent = agent_name
 
+
+class MultiAgentTurtleRepresentation(MultiAgentWrapper):
+    def __init__(self, rep, **kwargs):
+        super().__init__(rep, **kwargs)
+
+    def reset(self, dims, prob, **kwargs):
+        super().reset(dims, prob, **kwargs)
+        self._positions = np.floor(np.random.random((self.n_agents, len(dims))) * (np.array(dims))).astype(int)
+        self.heatmaps = np.zeros((self.n_agents, 16, 16))
+
+    def update(self, action):
+        change = False
+
+        # FIXME: mostly specific to turtle
+        # for i, pos_0 in enumerate(self._positions):
+        for k, v in action.items():
+            i = int(k.split('_')[-1])
+            pos_0 = self._positions[i]
+            change_i, pos = self.update_pos(action[f'agent_{i}'], pos_0)
+            if change_i:
+                y, x = pos_0[1], pos_0[0]
+                self.heatmaps[i][y][x] += 1
+            change = change or change_i
+            self._positions[i] = pos
+
+        return change, self._positions
+
+    def get_positions(self):
+        return self._positions
+
+class MultiAgentNarrowRepresentation(MultiAgentWrapper):
+    
+    def __init__(self, rep, **kwargs):
+        super().__init__(rep, **kwargs)
+        self.heatmaps = np.zeros((self.n_agents, 16, 16))
+
+    def reset(self, dims, prob, **kwargs):
+        self.rep.reset(dims, prob, **kwargs)
+        self.coords = self.get_act_coords()
+        self._n_steps = {i: i for i in range(self.n_agents)}
+        self._positions = np.array([self.coords[i] for i in range(self.n_agents)])
+        #self._positions = {i: self.coords[i] for i in range(self.n_agents)}
+
+    def update(self, action):
+        change = False
+        for agent, act in action.items():
+            i = int(agent.split('_')[-1])
+            self.rep.n_step = self._n_steps[i]
+            self.rep._pos = tuple(self.coords[self.n_step])
+            change_i, _ = self.rep.update(act)
+            self._n_steps[i] += 1
+            if self._n_steps[i] == len(self.coords):
+                self._n_steps[i] = 0
+            if change_i:
+                y, x = self.rep._pos[1], self.rep._pos[0]
+                self.heatmaps[i][y][x] += 1
+            self._positions[i] = self.coords[self._n_steps[i]]
+            change = change or change_i
+        return change, self.get_positions()
+
+    def get_positions(self):
+        return self._positions
+
+class MultiAgentWideRepresentation(MultiAgentWrapper):
+    
+    def __init__(self, rep, **kwargs):
+        super().__init__(rep, **kwargs)
+
+    def reset(self, dims, prob, **kwargs):
+        self.rep.reset(dims, prob, **kwargs)
+        # store the last known positions of the agents
+        self._positions = {i: i for i in range(self.n_agents)}
+
+    def update(self, actions):
+        change = False
+        positions = []
+        for agent, act in actions.items():
+            change_i, pos = self.rep.update(act)
+            positions.append(pos)
+            change = change or change_i
+        self._positions = positions
+        return change, self._positions
+
 def wrap_rep(rep: Representation, prob_cls: Problem, map_dims: tuple, static_build = False, multi = False, **kwargs):
     """Should only happen once!"""
     if multi:
@@ -519,11 +594,28 @@ def wrap_rep(rep: Representation, prob_cls: Problem, map_dims: tuple, static_bui
         
         else:
             rep = HoleyRepresentation(rep, **kwargs)
-
-    if kwargs.get("multiagent")['n_agents'] != 0:
-        if not issubclass(type(rep), TurtleRepresentation):
+    
+    try:
+        n_agents = kwargs.get('multiagent')['n_agents']
+    except TypeError:
+        n_agents = json.loads(kwargs.get('multiagent').replace('\'', '\"'))['n_agents']
+    #if isinstance(kwargs.get('multiagent'), str):
+    #    kwargs['multiagent'] = json.loads(kwargs.get('multiagent').replace('\'', '\"'))
+    #    #import pdb; pdb.set_trace()
+    #if not isinstance(multiagent_config, int):
+    #    import pdb; pdb.set_trace()
+    if n_agents != 0:
+        #if kwargs.get("multiagent")['n_agents'] != 0:
+        if issubclass(type(rep), TurtleRepresentation):
+            rep = MultiAgentTurtleRepresentation(rep, **kwargs)
+        elif issubclass(type(rep), NarrowRepresentation):
+            rep = MultiAgentNarrowRepresentation(rep, **kwargs)
+            pass
+        elif issubclass(type(rep), WideRepresentation):
+            rep = MultiAgentWideRepresentation(rep, **kwargs)
+        else:
             raise NotImplementedError("Multiagent only works with TurtleRepresentation currently")
-        rep = MultiAgentRepresentation(rep, **kwargs)
+
 
     return rep
     
@@ -553,4 +645,4 @@ def wrap_rep(rep: Representation, prob_cls: Problem, map_dims: tuple, static_bui
 #             np.random.shuffle(self._act_coords)
 #     self._x, self._y = self._act_coords[self.n_step % len(self._act_coords)]
 #     self.n_step += 1
-#     return change, [self._x, self._y]
\ No newline at end of file
+#     return change, [self._x, self._y]
diff --git a/control_pcgrl/envs/reps/wrappers_copy.py b/control_pcgrl/envs/reps/wrappers_copy.py
new file mode 100644
index 0000000..2e41232
--- /dev/null
+++ b/control_pcgrl/envs/reps/wrappers_copy.py
@@ -0,0 +1,597 @@
+from abc import ABC
+from copy import deepcopy
+from collections import OrderedDict
+from inspect import isclass
+import logging
+import math
+from pdb import set_trace as TT
+
+from gym import spaces
+import gym
+from control_pcgrl.envs import helper_3D
+from control_pcgrl.envs.probs.holey_prob import HoleyProblem
+from control_pcgrl.envs.probs.minecraft.mc_render import spawn_3D_maze
+from control_pcgrl.envs.probs.minecraft.minecraft_3D_rain import Minecraft3Drain
+from control_pcgrl.envs.probs.problem import Problem, Problem3D
+import numpy as np
+from PIL import Image
+
+from control_pcgrl.envs.helper_3D import gen_random_map as gen_random_map_3D
+from control_pcgrl.envs.reps.ca_rep import CARepresentation
+from control_pcgrl.envs.reps.narrow_rep import NarrowRepresentation
+from control_pcgrl.envs.reps.representation import EgocentricRepresentation, Representation
+from control_pcgrl.envs.reps.turtle_rep import TurtleRepresentation
+
+
+# class RepresentationWrapper(Representation):
+class RepresentationWrapper():
+    def __init__(self, rep: Representation, **kwargs):
+        self.rep = rep
+        # TODO: implement below so that they all point to the same object
+        # self._map = self.rep._map
+        # self._bordered_map = self.rep._bordered_map  # Doing this results in self._borderd_map != self.rep._bordered_map
+        # self._random_start = self.rep._random_start
+
+    def _set_pos(self, pos):
+        self.rep._pos = pos
+
+    def adjust_param(self, **kwargs):
+        return self.rep.adjust_param(**kwargs)
+
+    def update(self, *args, **kwargs):
+        return self.rep.update(*args, **kwargs)
+
+    def get_observation(self, *args, **kwargs):
+        return self.rep.get_observation(*args, **kwargs)
+
+    def get_observation_space(self, *args, **kwargs):
+        return self.rep.get_observation_space(*args, **kwargs)
+
+    def get_action_space(self, *args, **kwargs):
+        return self.rep.get_action_space(*args, **kwargs)
+
+    def reset(self, *args, **kwargs):
+        ret = self.rep.reset(*args, **kwargs)
+        return ret
+
+    def render(self, *args, **kwargs):
+        return self.rep.render(*args, **kwargs)
+
+    def _update_bordered_map(self):
+        return self.rep._update_bordered_map()
+
+    def __repr__(self):
+        return str(self)
+
+    def __getattr__(self, name):
+        # Removing this check causes errors when serializing this object with pickle. E.g. when using ray for parallel
+        # environments. Variables that start with underscore will need to be unwrapped manually.
+        if name.startswith("_"):
+            raise AttributeError(
+                "attempted to get missing private attribute '{}'".format(name)
+            )
+        return getattr(self.rep, name)
+
+    # @property
+    # def spec(self):
+    #     return self.rep.spec
+
+    @classmethod
+    def class_name(cls):
+        return cls.__name__
+
+    # def close(self):
+        # return self.rep.close()
+
+    def seed(self, seed=None):
+        return self.rep.seed(seed)
+
+    def __str__(self):
+        return "<{}{}>".format(type(self).__name__, self.rep)
+
+    @property
+    def unwrapped(self):
+        return self.rep.unwrapped
+
+
+# class Representation3DABC(Representation):
+
+
+# class Representation3D(rep_cls, Representation3DABC):
+class Representation3D(RepresentationWrapper):
+    """
+    The base class of all the 3D representations
+
+    map in repr are np.array of numbers
+    """
+    _dirs = [(-1,0,0), (1,0,0), (0,-1,0), (0,1,0),(0,0,-1),(0,0,1)]
+    _gen_random_map = helper_3D.gen_random_map
+
+    # def _update_bordered_map(self):
+        # self._bordered_map[1:-1, 1:-1, 1:-1] = self._map
+
+    def render(self, map, mode='human', **kwargs):
+        # TODO: Check if we are Egocentric. If so, render the agent edit. Otherwise, render the whole map (assume cellular)
+        spawn_3D_maze(map)
+        # return self.rep.render(mode, **kwargs)
+        # pass
+        
+
+
+class HoleyRepresentation(RepresentationWrapper):
+    def set_holes(self, entrance_coords, exit_coords):
+        self.entrance_coords, self.exit_coords = entrance_coords, exit_coords
+
+    def dig_holes(self, entrance_coords, exit_coords):
+        # TODO: Represent start/end differently to accommodate one-way paths.
+        self.unwrapped._bordered_map[entrance_coords[0], entrance_coords[1]] = self.unwrapped._empty_tile
+        self.unwrapped._bordered_map[exit_coords[0], exit_coords[1]] = self.unwrapped._empty_tile
+
+
+    def update(self, action):
+        ret = super().update(action)
+        return ret
+
+    def reset(self, *args, **kwargs):
+        ret = super().reset(*args, **kwargs)
+        self.dig_holes(self.entrance_coords, self.exit_coords)
+        return ret
+
+    def get_observation(self):
+        obs: dict = super().get_observation()
+        obs.update(
+            {'map': self.unwrapped._bordered_map.copy(),}
+        )
+        if 'pos' in obs:
+            obs['pos'] += 1  # support variable border sizes?
+        return obs
+
+    def get_observation_space(self, dims, num_tiles):
+        obs_space = super().get_observation_space(dims, num_tiles)
+        map_shape = tuple([i + 2 for i in obs_space['map'].shape])
+        obs_space.spaces.update({
+            "map": spaces.Box(low=0, high=num_tiles-1, dtype=np.uint8, shape=map_shape)
+        })
+        if "pos" in obs_space.spaces:
+            old_pos_space = obs_space.spaces["pos"]
+            obs_space.spaces.update({
+                "pos": spaces.Box(low=old_pos_space.low + 1, high=old_pos_space.high + 1, \
+                    dtype=old_pos_space.dtype, shape=old_pos_space.shape)
+            })
+        return obs_space
+
+
+class HoleyRepresentation3D(HoleyRepresentation):
+    """A 3D variant of the holey representation. Holes on the border of the map are 2 tiles high, to support the
+    size of the player in our Minecraft-inspired 3D problems."""
+
+    def dig_holes(self, s, e):
+        # TODO: Represent start/end differently to accommodate one-way paths.
+        self.unwrapped._bordered_map[s[0][0]][s[0][1]][s[0][2]] = self.unwrapped._bordered_map[s[1][0]][s[1][1]][s[1][2]] = self.unwrapped._empty_tile
+        self.unwrapped._bordered_map[e[0][0]][e[0][1]][e[0][2]] = self.unwrapped._bordered_map[e[1][0]][e[1][1]][e[1][2]] = self.unwrapped._empty_tile
+
+
+class StaticBuildRepresentation(RepresentationWrapper):
+    def __init__(self, rep, **kwargs):
+        super().__init__(rep, **kwargs)
+        self.prob_static = 0.0
+        self.window = None
+
+    def adjust_param(self, **kwargs):
+        self.prob_static = kwargs.get('static_prob')
+        self.n_aux_tiles = kwargs.get('n_aux_tiles')
+        return super().adjust_param(**kwargs)
+
+    def reset(self, *args, **kwargs):
+        ret = super().reset(*args, **kwargs)
+        # Uniformly sample a probability of static builds from within the range [0, self.prob_static]
+        prob_static = self.unwrapped._random.random() * self.prob_static
+        # TODO: take into account validity constraints on number of certain tiles
+        self.static_builds = (self.unwrapped._random.random(self.unwrapped._bordered_map.shape) < prob_static).astype(np.uint8)
+        # Borders are always static
+        self.static_builds[(0, -1), :] = 1
+        self.static_builds[:, (0, -1)] = 1
+
+        # Remove any action coordinates that correspond to static tiles (unless we have aux chans, in which case 
+        # we'll let the agent leave messages for itself on those channels, even on static tiles.)
+        # NOTE: We only have `_act_coords` for narrow representation. Can we make this cleaner?
+        if hasattr(self, '_act_coords') and self.n_aux_tiles == 0:
+            self._act_coords = self._act_coords[np.where(
+                self.static_builds[self._act_coords[:, 0], self._act_coords[:, 1]] == 0)] 
+        return ret
+
+    def get_observation_space(self, dims, num_tiles):
+        obs_space = super().get_observation_space(dims, num_tiles)
+        obs_space.spaces.update({
+            'static_builds': spaces.Box(low=0, high=1, dtype=np.uint8, shape=dims)
+        })
+        return obs_space
+
+    def get_observation(self):
+        obs = super().get_observation()
+        obs.update({
+            'static_builds': self.static_builds,
+        })
+        return obs
+
+    def render(self, lvl_image, tile_size, border_size=None):
+        lvl_image = super().render(lvl_image, tile_size, border_size)
+        im_arr = np.zeros((tile_size, tile_size, 4), dtype=np.uint8)
+        clr = (255, 0, 0, 255)
+        im_arr[(0, 1, -1, -2), :, :] = im_arr[:, (0, 1, -1, -2), :] = clr
+        x_graphics = Image.fromarray(im_arr)
+
+        for (y, x) in np.argwhere(self.static_builds[1:-1, 1:-1] == 1):
+            y, x = y + 1, x + 1  # ignoring the border
+            lvl_image.paste(x_graphics, ((x+border_size[0]-1)*tile_size, (y+border_size[1]-1)*tile_size,
+                                            (x+border_size[0])*tile_size,(y+border_size[1])*tile_size), x_graphics)
+
+        # if not hasattr(self, 'window'):
+            # self.window = cv2.namedWindow('static builds', cv2.WINDOW_NORMAL)
+            # cv2.resize('static builds', 100, 800)
+            # cv2.waitKey(1)
+        # im = self.static_builds.copy()
+        # cv2.imshow('static builds', im * 255)
+        # cv2.waitKey(1)
+
+        return lvl_image
+
+    # update = {
+        # CARepresentationHoley: update_ca_holey,
+    # }[rep_cls]
+
+    def update(self, action, **kwargs):
+        old_state = self.unwrapped._bordered_map.copy()
+        change, pos = super().update(action, **kwargs)
+        new_state = self.unwrapped._bordered_map
+        # assert not(np.all(old_state == new_state))
+        self.unwrapped._bordered_map = np.where(self.static_builds < 1, new_state, old_state)
+        # print(self._bordered_map)
+        self.unwrapped._map = self.unwrapped._bordered_map[
+            tuple([slice(1, -1) for _ in range(len(self.unwrapped._map.shape))])]
+        change = np.any(old_state != new_state)
+        return change, pos
+
+
+class RainRepresentation(RepresentationWrapper):
+    def get_action_space(self, dims, num_tiles):
+        # Need no-op because raining sand/acid will always change map (if column is not empty).
+        return spaces.Discrete(num_tiles + 1)
+
+    # TODO:
+    def update(self, action, **kwargs):
+        # FIXME: Assuming a narrow representation!
+        change, pos = super().update(action, **kwargs)
+        if change:
+            self.unwrapped._map[pos[0], pos[1]] = self.unwrapped._empty_tile
+        return change, pos
+
+    def render(self, map, mode='human', **kwargs):
+        # TODO: just place a sand block at the top
+        spawn_3D_maze(map)
+
+
+class MultiActionRepresentation(RepresentationWrapper):
+    '''
+    A wrapper that makes the action space change multiple tiles at each time step. Maybe useful for all representations 
+    (for 2D, 3D, narrow, turtle, wide, ca, ...).
+    NOW JUST FOR EGOCENTRIC REPRESENTATIONS.
+    '''
+    def _set_inner_padding(self, action_size):
+        """These are like buffers. The agent should not be centered on these buffers because it will act on them anyway 
+        when at either edge of the map. 
+        For any odd action patch, these are equal (e.g., for 3, they are both 1). For, e.g. 4, they are 1 and 2.
+        We define a left/right (bottom/top, close/far) pair for each map dimension."""
+        self.inner_l_pads = np.floor((action_size - 1) / 2).astype(int)
+        self.inner_r_pads = np.ceil((action_size - 1) / 2).astype(int)
+
+    def __init__(self, rep, map_dims, **kwargs):
+        super().__init__(rep, **kwargs)
+        self.action_size = np.array(kwargs.get('action_size'))       # if we arrive here, there must be an action_size in kwargs
+        self._set_inner_padding(self.action_size)
+        self.map_size = map_dims                            # map_dims is a tuple (height, width, n_tiles) in 2D
+        self.map_dim = len(map_dims[:-1])                        # 2 for 2D, 3 for 3D
+        self.strides = np.ones(len(self.map_size[:-1]), dtype=np.int32) * 3   # strides are just 3 for each dimension now
+
+        # We should not set this here. This is defined in the underlying representation class. In this underlying class,
+        # it is initialized on `reset`.
+        # self._act_coords = None
+
+        # Check the action size is the same dimension as the map
+        assert self.map_dim == len(self.action_size), \
+            f"Action size ({len(self.action_size)}) should be the same dimension as the map size ({self.map_dim})"
+        # Check whether we have a valid action size and stride
+        for i in range(self.map_dim):
+            logging.warning(f"Not validating your action size ({self.action_size}) and stride ({self.strides}, w.r.t." +
+                " the map size ({self.map_size}). If these are mismatches, the agent may not be able to edit the bottom" +
+                    " right/far edges of the map.")
+            # FIXME: below assertion is thrown whenever stride = 1 and action_size > 1. But these are valid settings.
+            # assert self.map_size[i] - self.action_size[i] + self.strides[i] == self.map_size[i] * self.strides[i], \
+            #     "Please make sure that the action size and stride are valid for the map size."
+        
+        # NOTE: This function will not be called by the object we are wrapping. (Like it would be if we
+        #   inherited from it instead.) So we'll be gross, and overwrite this function in the wrapped class manually.
+        self.unwrapped.get_act_coords = self.get_act_coords
+    
+    def get_action_space(self, *args, **kwargs):
+        # the tiles inside the action are not neccearily the same
+        action_space = []
+        for i in range(math.prod(self.action_size)):
+            action_space.append(self.map_size[-1])
+        return spaces.MultiDiscrete(action_space)
+    
+    # This gets overwritten in the wrapped class in `__init__` above.
+    def get_act_coords(self):
+        '''
+        Get the coordinates of the action space. Regards the top left corner's coordinate (the smallest coords in the 
+        action block) as the coordinate of current action. 
+
+        The formula of calculating the size of 2d convolutional layer is:
+        (W-F+2P)/S + 1
+        where W is the width of input (the map size here), F is the width of filter (action_size here), 
+        P is the padding (0 here), S is the stride. To get the same size of input and output, we have:
+        (W-F)/S + 1 = W
+        => W - F + S = W * S for each dimension
+        '''
+        coords = []
+        for i in range(self.map_dim):
+            coords.append(np.arange(self.inner_l_pads[i], self.map_size[i] - self.inner_r_pads[i], self.strides[i]))
+        act_coords = np.array(np.meshgrid(*coords)).T.reshape(-1, self.map_dim)  # tobe checked! copilot writes this but looks good
+        act_coords = np.flip(act_coords, axis=1)  # E.g., in 2D, scan horizontally first.
+        return act_coords
+            
+    
+    def update(self, action, **kwargs):
+        '''
+        Update the map according to the action, the action is a vector of size action_size
+
+        In previous narrow_multi representation, the action is also a vector (MultiDiscrete). However the action 
+        outside the map will be discarded (do I understand it right?). This will make the entries of the action space 
+        sometimes crucial (inside the map) and sometimes trivial (outside the map). This is not good for RL. (copilot agree 
+        this is not good)
+        '''
+        # unravel the action from a vector to a matrix (of size action_size)
+        action = action.reshape(self.action_size)
+
+        old_state = self.unwrapped._map.copy()
+
+        # replace the map at self._pos with the action TODO: is there any better way to make it dimension independent? (sam: yes. Slices!) (copilot: yes, np.take_along_axis)(zehua:is this right?) (copilot:I think so)
+        _pos = self.unwrapped._pos  # Why is _pos private again? (copilot: I don't know) Ok thanks copilot. (copilot: you're welcome)
+
+        # Let's center the action patch around _pos. If the agent's observation is centered
+        # around _pos, then we want the action patch to be centered around _pos as well.
+        # The inner padding tells us how many tiles can be acted on to the left/right of _pos.
+        top_left = _pos - self.inner_l_pads
+        bottom_right = _pos + self.inner_r_pads
+
+        slices = [slice(top_left[i], bottom_right[i] + 1) for i in range(self.map_dim)]
+        # (zehua: yes use slices!)
+        # not tested: map[tuple(starmap(slice, zip(top_left, bottom_right)))] = action
+        # Or this(more similar to sam's code but single line): map[tuple(slice(*indexes) for indexes in zip(top_left, bottom_right))] = action
+
+        ### Some checks for safety (could comment these out later). ###
+        # Check that the action patch is within the map.
+        assert np.all(top_left >= 0), \
+            f"Action patch is outside the map. Top left corner: {top_left}"
+        assert np.all(bottom_right < self.map_size[:-1]), \
+            f"Action patch is outside the map. Bottom right corner: {bottom_right}"
+        ################################################################
+
+        self.unwrapped._map[tuple(slices)] = action
+        # if self.map_dim == 2:
+        #     self.unwrapped._map[top_left[0]:bottom_right[0]+1, top_left[1]:bottom_right[1]+1] = action
+        # elif self.map_dim == 3:
+        #     self.unwrapped._map[top_left[0]:bottom_right[0]+1, top_left[1]:bottom_right[1]+1, top_left[2]:bottom_right[2]+1] = action
+
+
+        new_state = self.unwrapped._map
+        if self.unwrapped._random_tile:
+            if self.unwrapped.n_step == len(self._act_coords):
+                np.random.shuffle(self._act_coords)
+
+        # self._set_pos(self.unwrapped._act_coords[self.n_step % len(self.unwrapped._act_coords)])
+        self._set_pos(self.get_pos_at_step(self.n_step))
+        self.unwrapped.n_step += 1
+
+        self.unwrapped._bordered_map[tuple([slice(1, -1) for _ in range(len(self.unwrapped._map.shape))])] = self.unwrapped._map
+
+        change = np.any(old_state != new_state)
+
+        return change, self.unwrapped._pos
+
+    def render(self, lvl_image, tile_size=16, border_size=None):
+        y, x = self.get_pos()
+        # This is a little image with our border in it
+        im_arr = np.zeros((tile_size * self.action_size[0], tile_size * self.action_size[1], 4), dtype=np.uint8)
+        # Grey color
+        clr = np.array([128, 128, 128, 255], dtype=np.uint8)
+        # Two pixels on each side for column
+        im_arr[(0, 1, -1, -2), :, :] = clr
+        # Two pixels on each side for row
+        im_arr[:, (0, 1, -1, -2), :] = clr
+        x_graphics = Image.fromarray(im_arr)
+        # Paste our border image into the level image at the agent's position
+        lvl_image.paste(x_graphics, (
+            # Left corner of the image we're pasting in
+            (x+border_size[0]-self.inner_l_pads[0])*tile_size, (y+border_size[1]-self.inner_l_pads[1])*tile_size,
+            # Right corner
+            (x+border_size[0]+self.inner_r_pads[0]+1)*tile_size, (y+border_size[1]+self.inner_r_pads[1]+1)*tile_size), x_graphics)
+        return super().render(lvl_image, tile_size, border_size)
+
+
+class MultiAgentRepresentation(RepresentationWrapper):
+    agent_colors = [
+        (255, 255, 255, 255),
+        (0, 255, 0, 255),
+        (255, 0, 0, 255),
+        (0, 0, 255, 255),
+        (255, 255, 0, 255),
+        (255, 0, 255, 255),
+        (0, 255, 255, 255),
+    ]
+    def __init__(self, rep, **kwargs):
+        self.n_agents = kwargs['multiagent']['n_agents']
+        # create a single representation for each agent
+        # all representations share maps
+        self._rep = rep
+        self.reps = {f'agent_{i}': deepcopy(rep) for i in range(self.n_agents)}
+        self._active_agent = None
+        super().__init__(rep, **kwargs)
+
+    def get_rep_map(self):
+        return self.reps['agent_0']._map
+
+    def reset(self, dims, prob, **kwargs):
+        self._active_agent = None
+        shared_map = None
+        for agent, r in self.reps.items():
+            r.reset(dims, prob, **kwargs)
+            if shared_map is None:
+                shared_map = r._map
+            else:
+                r._map = shared_map
+            # default to random initialization
+            import pdb; pdb.set_trace()
+            r._pos = [int(r._random.random() * i) for i in dims]
+
+        super().reset(dims, prob, **kwargs)
+        self.unwrapped._map = shared_map
+
+        # FIXME: specific to turtle
+        #self._positions = np.floor(np.random.random((self.n_agents, len(dims))) * (np.array(dims))).astype(int)
+
+    #def update(self, action):
+    #    change = False
+
+    #    # FIXME: mostly specific to turtle
+    #    # for i, pos_0 in enumerate(self._positions):
+    #    for k, v in action.items():
+    #        i = int(k.split('_')[-1])
+    #        pos_0 = self._positions[i]
+    #        change_i, pos = self.update_pos(action[f'agent_{i}'], pos_0)
+    #        change = change or change_i
+    #        self._positions[i] = pos
+
+    #    return change, self._positions
+
+    def update(self, action):
+        change = False
+        for k, v in action.items():
+            change_i, new_pos = self.reps[k].update(v)
+            #i = int(k.split('_')[-1])
+            #self.rep._pos = self._positions[i]
+            #change_i, new_pos = self.rep.update(v)
+            change = change or change_i
+            self.reps[k]._pos = new_pos
+            for r in self.reps:
+                r._map = self.reps[k]._map
+            self._map = self.reps[k]._map
+            #self._positions[i] = new_pos
+        return change, self._positions
+
+    def render(self, lvl_image, tile_size=16, border_size=None):
+
+        for (y, x), clr in zip(self._positions, self.agent_colors):
+            im_arr = np.zeros((tile_size, tile_size, 4), dtype=np.uint8)
+
+            im_arr[(0, 1, -1, -2), :, :] = im_arr[:, (0, 1, -1, -2), :] = clr
+            x_graphics = Image.fromarray(im_arr)
+            lvl_image.paste(x_graphics, ((x+border_size[0])*tile_size, (y+border_size[1])*tile_size,
+                                            (x+border_size[0]+1)*tile_size,(y+border_size[1]+1)*tile_size), x_graphics)
+        return lvl_image
+
+    def get_positions(self):
+        return [r._pos for _, r in self.reps.items()]
+
+    def get_observation(self, *args, **kwargs):
+        # Note that this returns a dummy/meaningless position that never changes...
+        base_obs = super().get_observation(*args, **kwargs)
+
+        agent_name = self._active_agent
+        multiagent_obs = {}
+        if agent_name is None:
+            for agent, r in self.reps.items():
+                multiagent_obs[agent] = r.get_observation(*args, **kwargs)
+            #for i in range(self.n_agents):
+            #    obs_i = base_obs.copy()
+            #    obs_i['pos'] = self._positions[i]
+            #    multiagent_obs[f'agent_{i}'] = obs_i
+            return multiagent_obs
+        else:
+            multiagent_obs[agent_name] = self.reps[agent_name].get_observation(*args, **kwargs)
+            #multiagent_obs[agent_name] = base_obs
+            #multiagent_obs[agent_name]['pos'] = self._positions[int(agent_name.split('_')[-1])]
+            return multiagent_obs
+            # base_obs['pos'] = self._positions[int(agent_name.split('_')[-1])]
+            # return base_obs
+
+    def set_active_agent(self, agent_name):
+        self._active_agent = agent_name
+
+def wrap_rep(rep: Representation, prob_cls: Problem, map_dims: tuple, static_build = False, multi = False, **kwargs):
+    """Should only happen once!"""
+    if multi:
+        rep = MultiActionRepresentation(rep, map_dims, **kwargs)
+
+    if static_build:
+        # rep_cls = StaticBuildRepresentation(rep_cls)
+        rep = StaticBuildRepresentation(rep, **kwargs)
+
+
+    # FIXME: this is a hack to make sure that rep_cls is a class name but not an object
+    # rep_cls = rep_cls if isclass(rep_cls) else type(rep_cls)
+    # if issubclass(prob_cls, Minecraft3Drain):
+        # rep = RainRepresentation(rep)
+    if issubclass(prob_cls, Problem3D):
+        rep = Representation3D(rep, **kwargs)
+        # rep_cls = wrap_3D(rep_cls)
+        # if issubclass(rep_cls, EgocentricRepresentation):
+            # rep_cls = EgocentricRepresentation3D()
+        # else:
+            # rep_cls = Representation3D(rep_cls)
+    
+    # FIXME: this is a hack to make sure that rep_cls is a class name but not an object
+    # rep_cls = rep_cls if isclass(rep_cls) else type(rep_cls)
+    # if issubclass(prob_cls, HoleyProblem) and not issubclass(type(rep), HoleyRepresentation):
+    if issubclass(prob_cls, HoleyProblem):
+
+        if issubclass(prob_cls, Problem3D):
+            rep = HoleyRepresentation3D(rep, **kwargs)
+        
+        else:
+            rep = HoleyRepresentation(rep, **kwargs)
+
+    if kwargs.get("multiagent")['n_agents'] != 0:
+        #if not issubclass(type(rep), TurtleRepresentation):
+        #    raise NotImplementedError("Multiagent only works with TurtleRepresentation currently")
+        rep = MultiAgentRepresentation(rep, **kwargs)
+
+    return rep
+    
+    
+
+
+# def update_ca_holey(self, action, **kwargs):
+#     old_state = self._bordered_map.copy()
+#     change, pos = CARepresentationHoley.update(self, action, **kwargs)
+#     new_state = self._bordered_map
+#     # assert not(np.all(old_state == new_state))
+#     self._bordered_map = np.where(self.static_builds < 1, new_state, old_state)
+#     # print(self._bordered_map)
+#     self._map = self._bordered_map[1:-1, 1:-1]
+#     change = np.any(old_state != new_state)
+#     return change, pos
+
+
+# def update_narrow_holey(self, action, **kwargs):
+#     change = 0
+#     if action > 0:
+#         change += [0,1][self._map[self._y][self._x] != action-1]
+#         self._map[self._y][self._x] = action-1
+#         self._bordered_map[self._y+1][self._x+1] = action-1
+#     if self._random_tile:
+#         if self.n_step == len(self._act_coords):
+#             np.random.shuffle(self._act_coords)
+#     self._x, self._y = self._act_coords[self.n_step % len(self._act_coords)]
+#     self.n_step += 1
+#     return change, [self._x, self._y]
\ No newline at end of file
diff --git a/control_pcgrl/rl/callbacks.py b/control_pcgrl/rl/callbacks.py
index d9ef49f..e119209 100644
--- a/control_pcgrl/rl/callbacks.py
+++ b/control_pcgrl/rl/callbacks.py
@@ -40,8 +40,11 @@ def on_episode_start(
                 f'{k}-trg': None,
             })
         for k in env.metrics:
+            if k == 'solution':
+                continue
             episode.hist_data.update({f'{k}-val': None,
         })
+
         if self.holey:
             episode.hist_data.update({
                 'holes_start': None,
@@ -88,13 +91,15 @@ def on_episode_end(
         #     'path-length': np.mean(path_lengths),
         # }
         env = base_env.get_sub_environments()[env_index]
-        episode_stats = env.unwrapped._rep_stats
+        unwrapped = env._unwrapped if hasattr(env, '_unwrapped') else env.unwrapped
+        episode_stats = unwrapped._rep_stats
         
 
         # stats_list = ['regions', 'connectivity', 'path-length']
         # write to tensorboard file (if enabled)
         # episode.hist_data.update({k: [v] for k, v in episode_stats.items()})
-        episode.custom_metrics.update({k: [v] for k, v in episode_stats.items()})
+        episode.custom_metrics.update({k: [v] for k, v in episode_stats.items() if k != 'solution'})
+        
 
         # TODO: log ctrl targets and success rate as heatmap: x is timestep, y is ctrl target, heatmap is success rate
 
@@ -104,12 +109,17 @@ def on_episode_end(
                 f'{k}-trg': [env.metric_trgs[k]],  # rllib needs these values to be lists :)
             })
         for k in env.metrics:
+            # avoid adding non-numeric values
+            #if isinstance(env.metrics[k], int) or isinstance(env.metrics[k], float):
+            if k == 'solution':
+                continue
             episode.hist_data.update({f'{k}-val': [env.metrics[k]],})
 
+
         # episode.hist_data.update({k: [v] for k, v in episode_stats.items() if k in stats_list})
         # episode.custom_metrics.update({k: [v] for k, v in episode_stats.items() if k in stats_list})
 
-        if hasattr(env.unwrapped._prob, '_hole_queue'):
+        if hasattr(unwrapped._prob, '_hole_queue'):
             entrance_coords, exit_coords = env.unwrapped._prob.entrance_coords, env.unwrapped._prob.exit_coords
             if len(entrance_coords.shape) == 1:
                 # Then it's 2D.
@@ -120,6 +130,6 @@ def on_episode_end(
             else:
                 # Just record the foot-room if 3D
                 episode.hist_data.update({
-                    'holes_start': [tuple(env.unwrapped._prob.entrance_coords[0])],
-                    'holes_end': [tuple(env.unwrapped._prob.exit_coords[0])],
+                    'holes_start': [tuple(unwrapped._prob.entrance_coords[0])],
+                    'holes_end': [tuple(unwrapped._prob.exit_coords[0])],
                 })
diff --git a/control_pcgrl/rl/envs.py b/control_pcgrl/rl/envs.py
index 26b81de..7007ea3 100644
--- a/control_pcgrl/rl/envs.py
+++ b/control_pcgrl/rl/envs.py
@@ -2,6 +2,7 @@
 from collections import namedtuple
 import os
 from pdb import set_trace as TT
+import json
 from typing import Dict
 from control_pcgrl import wrappers
 
@@ -84,7 +85,12 @@ def make_env(cfg):
 #       # RenderMonitor must come last
 #       env = RenderMonitor(env, rank, log_dir, **kwargs)
 
-    if cfg.multiagent.n_agents != 0:
+    try:
+        n_agents = cfg_dict['multiagent']['n_agents']
+    except TypeError:
+        n_agents = json.loads(cfg_dict['multiagent'].replace('\'', '\"'))['n_agents']
+
+    if n_agents != 0:
         env = wrappers.MultiAgentWrapper(env, **cfg_dict)
 
     return env
diff --git a/control_pcgrl/rl/evaluate.py b/control_pcgrl/rl/evaluate.py
index fb26f2f..fdd9382 100644
--- a/control_pcgrl/rl/evaluate.py
+++ b/control_pcgrl/rl/evaluate.py
@@ -15,9 +15,9 @@
 
 
 LOAD_STATS = True
-CONTROL_DOORS = True
+CONTROL_DOORS = False
 CONTROLS = False
-GENERAL_EVAL = False
+GENERAL_EVAL = True
 
 
 def evaluate(trainer, env, cfg):
diff --git a/control_pcgrl/rl/models.py b/control_pcgrl/rl/models.py
index f788875..566c8f5 100644
--- a/control_pcgrl/rl/models.py
+++ b/control_pcgrl/rl/models.py
@@ -1,4 +1,5 @@
 from typing import Dict, List
+import json
 
 from einops import rearrange
 import numpy as np
@@ -31,6 +32,7 @@ def __init__(self,
 
         # self.obs_size = get_preprocessor(obs_space)(obs_space).size
         obs_shape = obs_space.shape
+        self.img_shape = obs_shape
         obs_shape = (obs_shape[2], obs_shape[0], obs_shape[1])
         self.fc_size = fc_size
 
@@ -52,6 +54,11 @@ def value_function(self):
         return th.reshape(self.value_branch(self._features), [-1])
 
     def forward(self, input_dict, state, seq_lens):
+        #raise ValueError(input_dict['obs'].shape)
+        input_dict['obs'] = input_dict['obs'].reshape(
+            input_dict['obs'].size(0),
+            *self.img_shape
+        )
         input = input_dict["obs"].permute(0, 3, 1, 2)  # Because rllib order tensors the tensorflow way (channel last)
         x = nn.functional.relu(self.conv_1(input.float()))
         x = nn.functional.relu(self.conv_2(x))
@@ -135,7 +142,9 @@ def __init__(self,
         # self.n_aux_chan = n_aux_chan
         self.conv_filters = conv_filters
         # self.obs_size = get_preprocessor(obs_space)(obs_space).size
-        obs_shape = obs_space.shape
+        obs_shape = (32, 32, 3)
+        #obs_shape = obs_space.shape
+        self.obs_shape = obs_shape
         # orig_obs_space = model_config['custom_model_config']['orig_obs_space']
         # obs_shape = orig_obs_space['map'].shape
         # metrics_size = orig_obs_space['ctrl_metrics'].shape \
@@ -170,6 +179,11 @@ def value_function(self):
         return th.reshape(self.value_branch(self._features), [-1])
 
     def forward(self, input_dict, state, seq_lens):
+        #import pdb; pdb.set_trace()
+        input_dict['obs'] = input_dict['obs'].reshape(
+            input_dict['obs'].size(0),
+            *self.obs_shape
+        )
         input = input_dict['obs'].permute(0, 3, 1, 2)
         # input = th.cat([input, self._last_aux_activ], dim=1)
         x = nn.functional.relu(self.conv_1(input.float()))
@@ -258,8 +272,8 @@ def forward(self, input_dict, state, seq_lens):
         x = self.fc_1(x)
         self._features = x
         x = x.reshape(*pre_fc_shape)
-        x = nn.functional.relu(self.deconv_1(x)) 
-        x = x + x1
+        x = nn.functional.relu(self.deconv_1(x))
+        x = x.repeat(1, 1, 2, 2) + x1
         x = nn.functional.relu(self.deconv_2(x))
         action_out = x.reshape(x.size(0), -1)
 
diff --git a/control_pcgrl/rl/rllib_utils.py b/control_pcgrl/rl/rllib_utils.py
index 7d417a4..be48a0b 100644
--- a/control_pcgrl/rl/rllib_utils.py
+++ b/control_pcgrl/rl/rllib_utils.py
@@ -1,6 +1,7 @@
 from pdb import set_trace as TT
 import numpy as np
 from ray.rllib.algorithms.ppo import PPO as RlLibPPOTrainer
+from ray.rllib.algorithms.qmix import QMix as RlLibQMIXTrainer
 import torchinfo
 import torch as th
 
@@ -16,130 +17,156 @@
 #         done = np.any(self.num_timesteps > old_num_timesteps)
 #         return done
 
-
-class PPOTrainer(RlLibPPOTrainer):
-    log_keys = ['episode_reward_max', 'episode_reward_mean', 'episode_reward_min', 'episode_len_mean']
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        # wandb.init(**self.config['wandb'])
-        self.checkpoint_path_file = kwargs['config']['checkpoint_path_file']
-        self.ctrl_metrics = self.config['env_config']['controls']
-        self.ctrl_metrics = {} if self.ctrl_metrics is None else self.ctrl_metrics
-        cbs = self.workers.foreach_env(lambda env: env.unwrapped.cond_bounds)
-        cbs = [cb for worker_cbs in cbs for cb in worker_cbs if cb is not None]
-        cond_bounds = cbs[0]
-        self.metric_ranges = {k: v[1] - v[0] for k, v in cond_bounds.items()}
-        # self.checkpoint_path_file = checkpoint_path_file
-
-    def setup(self, config):
-        ret = super().setup(config)
-        n_params = 0
-        param_dict = self.get_weights()['default_policy']
-
-        for v in param_dict.values():
-            n_params += np.prod(v.shape)
-        model = self.get_policy('default_policy').model
-        print(f'default_policy has {n_params} parameters.')
-        print('Model overview(s):')
-        print(model)
-        print("=============")
-        # torchinfo summaries are very confusing at the moment
-        torchinfo.summary(model, input_data={
-            "input_dict": {"obs": th.zeros((1, *self.config['model']['custom_model_config']['dummy_env_obs_space'].shape))}})
-        return ret
-
-    @classmethod
-    def get_default_config(cls):
-        # def_cfg = super().get_default_config()
-        def_cfg = RlLibPPOTrainer.get_default_config()
-        def_cfg.update({
-            'checkpoint_path_file': None,
-            'wandb': {
-                'project': 'PCGRL',
-                'name': 'default_name',
-                'id': 'default_id',
-            },
-        })
-        return def_cfg
-
-    def save(self, *args, **kwargs):
-        ckp_path = super().save(*args, **kwargs)
-        with open(self.checkpoint_path_file, 'w') as f:
-            f.write(ckp_path)
-        return ckp_path
-
-    # @wandb_mixin
-    def train(self, *args, **kwargs):
-        result = super().train(*args, **kwargs)
-        log_result = {k: v for k, v in result.items() if k in self.log_keys}
-        log_result['info: learner:'] = result['info']['learner']
-
-        # Either doing multi-agent...
-        if 'num_agent_steps_sampled_this_iter' in result:
-            result['fps'] = result['num_agent_steps_trained_this_iter'] / result['time_this_iter_s']
-        # or single-agent.
+def ControllablaTrainerFactory(trainer):
+    if isinstance(trainer, str):
+        if trainer.lower() == 'ppo':
+            trainer = RlLibPPOTrainer
+        elif trainer == 'QMIX':
+            trainer = RlLibQMIXTrainer
         else:
-            result['fps'] = result['num_env_steps_trained_this_iter'] / result['time_this_iter_s']
-
-        # TODO: Send a heatmap to tb/wandb representing success reaching various control targets?
-        if len(result['custom_metrics']) > 0:
-            n_bins = 20
-            result['custom_plots'] = {}
-            for metric in self.ctrl_metrics:
-
-                # Scatter plots via wandb
-                # trgs = result['hist_stats'][f'{metric}-trg']
-                # vals = result['hist_stats'][f'{metric}-val']
-                # data = [[x, y] for (x, y) in zip(trgs, vals)]
-                # table = wandb.Table(data=data, columns=['trg', 'val'])
-                # scatter = wandb.plot.scatter(table, "trg", "val", title=f"{metric}-trg-val")
-                # result['custom_plots']["scatter_{}".format(metric)] = scatter
-                # scatter.save(f"{metric}-trg-val.png")
-                # wandb.log({f'{metric}-scc': scatter}, step=self.iteration)
-
-                # Spoofed histograms
-                # FIXME: weird interpolation behavior here???
-                bin_size = self.metric_ranges[metric] / n_bins  # 30 is the default number of tensorboard histogram bins (HACK)
-                trg_dict = {}
-
-                for i, trg in enumerate(result['hist_stats'][f'{metric}-trg']):
-                    val = result['hist_stats'][f'{metric}-val'][i]
-                    scc = 1 - abs(val - trg) / self.metric_ranges[metric]
-                    trg_bin = trg // bin_size
-                    if trg not in trg_dict:
-                        trg_dict[trg_bin] = [scc]
-                    else:
-                        trg_dict[trg_bin] += [scc]
-                # Get average success rate in meeting each target.
-                trg_dict = {k: np.mean(v) for k, v in trg_dict.items()}
-                # Repeat each target based on how successful we were in reaching it. (Appears at least once if sampled)
-                spoof_data = [[trg * bin_size] * (1 + int(20 * scc)) for trg, scc in trg_dict.items()]
-                spoof_data = [e for ee in spoof_data for e in ee]  # flatten the list
-                result['hist_stats'][f'{metric}-scc'] = spoof_data
-
-                # Make a heatmap.
-                # ax, fig = plt.subplots(figsize=(10, 10))
-                # data = np.zeros(n_bins)
-                # for trg, scc in trg_dict.items():
-                    # data[trg] = scc
-                # wandb.log({f'{metric}-scc': wandb.Histogram(data, n_bins=n_bins)})
-
-                # plt.imshow(data, cmap='hot')
-                # plt.savefig(f'{metric}.png')
-
-            
-
-        # for k, v in result['hist_stats'].items():
-            # if '-trg' in k or '-val' in k:
-                # result['custom_metrics'][k] = [v]
-
-        # print('-----------------------------------------')
-        # print(pretty_print(log_result))
-        return result
-
-    def evaluate(self):
-        # TODO: Set the evaluation maps here!
-        # self.eval_workers.foreach_env_with_context(fn)
-        result = super().evaluate()
-        return result
+            raise ValueError(
+                'Unsupported trainer type. ' + \
+                'Acceptable arguments are {PPO, QMIX}. '+ \
+                'For custom trainers, pass a trainer object as a parameter')
+
+    """
+    Wrap trainer object with extra logging and custom metric checkpointing
+    """
+    class Trainer(trainer):
+        log_keys = ['episode_reward_max', 'episode_reward_mean', 'episode_reward_min', 'episode_len_mean']
+
+        def __init__(self, *args, **kwargs):
+            super().__init__(*args, **kwargs)
+            # wandb.init(**self.config['wandb'])
+            self.checkpoint_path_file = kwargs['config']['checkpoint_path_file']
+            self.ctrl_metrics = self.config['env_config']['controls']
+            self.ctrl_metrics = {} if self.ctrl_metrics is None else self.ctrl_metrics
+            cbs = self.workers.foreach_env(lambda env: env._unwrapped.cond_bounds if hasattr(env, '_unwrapped') else env.unwrapped.cond_bounds)
+
+            cbs = [cb for worker_cbs in cbs for cb in worker_cbs if cb is not None]
+            cond_bounds = cbs[0]
+            self.metric_ranges = {k: v[1] - v[0] for k, v in cond_bounds.items()}
+             #self.checkpoint_path_file = checkpoint_path_file
+
+        def setup(self, config):
+            #import pdb; pdb.set_trace()
+            #config['replay_buffer_config'] = {'type': 'ReplayBuffer'}
+            ret = super().setup(config)
+            n_params = 0
+            #agent_id = config['multiagent']
+            multiagent = config.get('multiagent', None)
+            if multiagent is None:
+                sample_agent_id = 'default_policy'
+            else:
+                sample_agent_id = list(multiagent['policies'].keys())[0]
+            param_dict = self.get_weights()[sample_agent_id]
+
+            # DOES NOT WORK FOR QMIX MODEL
+            #for v in param_dict.values():
+            #    n_params += np.prod(v.shape)
+            #model = self.get_policy(sample_agent_id).model
+            #print(f'default_policy has {n_params} parameters.')
+            #print('Model overview(s):')
+            #print(model)
+            #print("=============")
+            # torchinfo summaries are very confusing at the moment
+            #torchinfo.summary(model, input_data={
+            #    "input_dict": {"obs": th.zeros((1, *self.config['model']['custom_model_config']['dummy_env_obs_space'].shape))}})
+            return ret
+
+
+        @classmethod
+        def get_default_config(cls):
+            # def_cfg = super().get_default_config()
+            def_cfg = trainer.get_default_config()
+            def_cfg.update({
+                'checkpoint_path_file': None,
+                'wandb': {
+                    'project': 'PCGRL',
+                    'name': 'default_name',
+                    'id': 'default_id',
+                },
+            })
+            return def_cfg
+
+        def save(self, *args, **kwargs):
+            ckp_path = super().save(*args, **kwargs)
+            with open(self.checkpoint_path_file, 'w+') as f:
+                f.write(ckp_path)
+            return ckp_path
+
+        # @wandb_mixin
+        def train(self, *args, **kwargs):
+            result = super().train(*args, **kwargs)
+            log_result = {k: v for k, v in result.items() if k in self.log_keys}
+            log_result['info: learner:'] = result['info']['learner']
+
+            # Either doing multi-agent...
+            if 'num_agent_steps_sampled_this_iter' in result:
+                result['fps'] = result['num_agent_steps_trained_this_iter'] / result['time_this_iter_s']
+            # or single-agent.
+            else:
+                result['fps'] = result['num_env_steps_trained_this_iter'] / result['time_this_iter_s']
+
+            # TODO: Send a heatmap to tb/wandb representing success reaching various control targets?
+            if len(result['custom_metrics']) > 0:
+                n_bins = 20
+                result['custom_plots'] = {}
+                for metric in self.ctrl_metrics:
+                    # Scatter plots via wandb
+                    # trgs = result['hist_stats'][f'{metric}-trg']
+                    # vals = result['hist_stats'][f'{metric}-val']
+                    # data = [[x, y] for (x, y) in zip(trgs, vals)]
+                    # table = wandb.Table(data=data, columns=['trg', 'val'])
+                    # scatter = wandb.plot.scatter(table, "trg", "val", title=f"{metric}-trg-val")
+                    # result['custom_plots']["scatter_{}".format(metric)] = scatter
+                    # scatter.save(f"{metric}-trg-val.png")
+                    # wandb.log({f'{metric}-scc': scatter}, step=self.iteration)
+
+                    # Spoofed histograms
+                    # FIXME: weird interpolation behavior here???
+                    bin_size = self.metric_ranges[metric] / n_bins  # 30 is the default number of tensorboard histogram bins (HACK)
+                    trg_dict = {}
+
+                    for i, trg in enumerate(result['hist_stats'][f'{metric}-trg']):
+                        val = result['hist_stats'][f'{metric}-val'][i]
+                        scc = 1 - abs(val - trg) / self.metric_ranges[metric]
+                        trg_bin = trg // bin_size
+                        if trg not in trg_dict:
+                            trg_dict[trg_bin] = [scc]
+                        else:
+                            trg_dict[trg_bin] += [scc]
+                    # Get average success rate in meeting each target.
+                    trg_dict = {k: np.mean(v) for k, v in trg_dict.items()}
+                    # Repeat each target based on how successful we were in reaching it. (Appears at least once if sampled)
+                    spoof_data = [[trg * bin_size] * (1 + int(20 * scc)) for trg, scc in trg_dict.items()]
+                    spoof_data = [e for ee in spoof_data for e in ee]  # flatten the list
+                    result['hist_stats'][f'{metric}-scc'] = spoof_data
+
+                   # Make a heatmap.
+                   # ax, fig = plt.subplots(figsize=(10, 10))
+                   # data = np.zeros(n_bins)
+                   # for trg, scc in trg_dict.items():
+                       # data[trg] = scc
+                   # wandb.log({f'{metric}-scc': wandb.Histogram(data, n_bins=n_bins)})
+
+                   # plt.imshow(data, cmap='hot')
+                   # plt.savefig(f'{metric}.png')
+
+                
+
+            # for k, v in result['hist_stats'].items():
+                # if '-trg' in k or '-val' in k:
+                    # result['custom_metrics'][k] = [v]
+
+            # print('-----------------------------------------')
+            # print(pretty_print(log_result))
+            return result
+
+        def evaluate(self):
+            # TODO: Set the evaluation maps here!
+            # self.eval_workers.foreach_env_with_context(fn)
+            result = super().evaluate()
+            return result
+
+    return Trainer
diff --git a/control_pcgrl/rl/train_ctrl.py b/control_pcgrl/rl/train_ctrl.py
index 01cf106..81eb7f2 100644
--- a/control_pcgrl/rl/train_ctrl.py
+++ b/control_pcgrl/rl/train_ctrl.py
@@ -1,4 +1,3 @@
-
 import copy
 import json
 import os
@@ -12,6 +11,7 @@
 from typing import Dict
 
 import gym
+from tqdm import tqdm
 import hydra
 import matplotlib
 import numpy as np
@@ -43,8 +43,8 @@
                        CustomFeedForwardModel, CustomFeedForwardModel3D,
                        Decoder, DenseNCA, SeqNCA, SeqNCA3D, WideModel3D,
                        WideModel3DSkip)
-from control_pcgrl.rl.utils import IdxCounter, get_env_name, get_exp_name, get_map_width
-from control_pcgrl.rl.rllib_utils import PPOTrainer
+from control_pcgrl.rl.utils import IdxCounter, get_env_name, get_exp_name, get_map_width, TrainerConfigParsers
+from control_pcgrl.rl.rllib_utils import ControllablaTrainerFactory
 from control_pcgrl.configs.config import ControlPCGRLConfig
 import control_pcgrl
 from control_pcgrl.envs.probs import PROBLEMS
@@ -94,7 +94,12 @@ def main(cfg: ControlPCGRLConfig) -> None:
     print('env name: ', cfg.env_name)
     exp_name = get_exp_name(cfg)
     exp_name_id = f'{exp_name}_{cfg.exp_id}'
-    cfg.log_dir = log_dir = os.path.join(PROJ_DIR, f'rl_runs/{exp_name_id}_log')
+    default_dir = os.path.join(PROJ_DIR, 'rl_runs')
+    cfg.log_dir = log_dir = os.path.join(
+            cfg.log_dir if cfg.log_dir is not None else default_dir,
+            cfg.algorithm,
+            f'{exp_name_id}_log'
+        )
 
     if not cfg.load:
 
@@ -156,7 +161,8 @@ def main(cfg: ControlPCGRLConfig) -> None:
 
     ### DEBUG ###
     if cfg.debug:
-        for _ in range(100):
+        #import pdb; pdb.set_trace()
+        for _ in tqdm(range(100)):
             obs = dummy_env.reset()
             for i in range(500):
                 # if i > 3:
@@ -168,6 +174,7 @@ def main(cfg: ControlPCGRLConfig) -> None:
                 # print(obs.transpose(2, 0, 1)[:, 10:-10, 10:-10])
                 if cfg.render:
                     dummy_env.render()
+            #import pdb; pdb.set_trace()
         print('DEBUG: Congratulations! You can now use the environment.')
         sys.exit()
 
@@ -182,92 +189,57 @@ def main(cfg: ControlPCGRLConfig) -> None:
     model_cfg.pop('name')
 
     if cfg.multiagent.n_agents != 0:
-        multiagent_config = {
-            "policies": {
-                f"default_policy": PolicySpec(
+        multiagent_config = {}
+        if cfg.multiagent.policies == "centralized":
+            multiagent_config['policies'] = {
+                'default_policy': PolicySpec(
                     policy_class=None,
                     observation_space=agent_obs_space,
                     action_space=agent_act_space,
-                    config=None,)
-            },
-            "policy_mapping_fn": lambda agent_id: "default_policy",
-            "count_steps_by": "agent_steps",
-        }
+                    config=None 
+                )
+            }
+            multiagent_config['policy_mapping_fn'] = lambda agent_id: 'default_policy'
+        elif cfg.multiagent.policies == "decentralized":
+            multiagent_config['policies'] = {
+                f'agent_{i}': PolicySpec(
+                    policy_class=None,
+                    observation_space=agent_obs_space,
+                    action_space=agent_act_space,
+                    config={
+                        'custom_model': 'custom_model',
+                        'custom_model_config': {
+                            "dummy_env_obs_space": copy.copy(agent_obs_space),
+                            **model_cfg,
+                        }
+                    }
+                ) for i in range(cfg.multiagent.n_agents)
+            }
+            multiagent_config['policy_mapping_fn'] = lambda agent_id: agent_id
+        else:
+            raise ValueError('Unrecognized policy type. Policy values can either be centralized or decentralized')
+
+        multiagent_config['count_steps_by'] = 'agent_steps'
         multiagent_config = {"multiagent": multiagent_config}
+        
     else:
         multiagent_config = {}
 
     # The rllib trainer config (see the docs here: https://docs.ray.io/en/latest/rllib/rllib-training.html)
-    trainer_config = {
-        'env': 'pcgrl',
-        **multiagent_config,
-        'framework': 'torch',
-        'num_workers': num_workers if not (cfg.evaluate or cfg.infer) else 0,
-        'num_gpus': cfg.hardware.n_gpu,
-        'env_config': {
-            **cfg,  # Maybe env should get its own config? (A subset of the original?)
-            "evaluation_env": False,
-        },
-        # 'env_config': {
-            # 'change_percentage': cfg.change_percentage,
-        # },
-        'num_envs_per_worker': num_envs_per_worker,
-        'render_env': cfg.render,
-        'lr': cfg.learning_rate,
-        'gamma': cfg.gamma,
-        'model': {
-            'custom_model': 'custom_model',
-            'custom_model_config': {
-                "dummy_env_obs_space": copy.copy(agent_obs_space),
-            **model_cfg,
-            },
-        },
-        "evaluation_interval" : 1 if cfg.evaluate else 1,
-        "evaluation_duration": max(1, num_workers),
-        "evaluation_duration_unit": "episodes",
-        "evaluation_num_workers": eval_num_workers,
-        "env_task_fn": set_map_fn,
-        "evaluation_config": {
-            "env_config": {
-                **cfg,
-                "evaluation_env": True,
-                "num_eval_envs": num_envs_per_worker * eval_num_workers,
-            },
-            "explore": True,
-        },
-        "logger_config": {
-                # "wandb": {
-                    # "project": "PCGRL",
-                    # "name": exp_name_id,
-                    # "id": exp_name_id,
-                    # "api_key_file": "~/.wandb_api_key"
-            # },
-            **logger_type,
-            # Optional: Custom logdir (do not define this here
-            # for using ~/ray_results/...).
-            "logdir": log_dir,
-        },
-#       "exploration_config": {
-#           "type": "Curiosity",
-#       }
-#       "log_level": "INFO",
-        # "train_batch_size": 50,
-        # "sgd_minibatch_size": 50,
-        'callbacks': stats_callbacks,
-
-        # To take random actions while changing all tiles at once seems to invite too much chaos.
-        'explore': True,
-
-        # `ray.tune` seems to need these spaces specified here.
-        # 'observation_space': dummy_env.observation_space,
-        # 'action_space': dummy_env.action_space,
-
-        # 'create_env_on_driver': True,
-        'checkpoint_path_file': checkpoint_path_file,
-        # 'record_env': log_dir,
-        # 'stfu': True,
-        'disable_env_checking': True,
-    }
+    num_workers = num_workers if not (cfg.evaluate or cfg.infer) else 1
+    trainer_config = TrainerConfigParsers[cfg.algorithm](
+                        cfg,
+                        agent_obs_space,
+                        log_dir,
+                        logger_type,
+                        stats_callbacks,
+                        checkpoint_path_file,
+                        model_cfg,
+                        multiagent_config,
+                        num_workers=num_workers,
+                        num_envs_per_worker=num_envs_per_worker,
+                        eval_num_workers=eval_num_workers
+                    )
 
     register_env('pcgrl', make_env)
 
@@ -315,7 +287,8 @@ def main(cfg: ControlPCGRLConfig) -> None:
         # Quit the program before agent starts training.
         sys.exit()
 
-    tune.register_trainable("CustomPPO", PPOTrainer)
+    #tune.register_trainable("CustomPPO", PPOTrainer)
+    tune.register_trainable(f"CustomTrainer", ControllablaTrainerFactory(cfg.algorithm))
 
     # Limit the number of rows.
     reporter = CLIReporter(
@@ -347,7 +320,7 @@ def main(cfg: ControlPCGRLConfig) -> None:
     try:
         # TODO: ray overwrites the current config with the re-loaded one. How to avoid this?
         analysis = tune.run(
-            "CustomPPO",
+            "CustomTrainer",
             resume="AUTO" if (cfg.load and not cfg.overwrite) else False,
             config={
                 **trainer_config,
@@ -355,8 +328,10 @@ def main(cfg: ControlPCGRLConfig) -> None:
             # checkpoint_score_attr="episode_reward_mean",
             # TODO: makes timestep total input by user.(n_frame)
             stop={"timesteps_total": 1e10},
+            mode='max',
+            checkpoint_score_attr='episode_reward_mean',
             checkpoint_at_end=True,
-            checkpoint_freq=10,
+            checkpoint_freq=1,
             keep_checkpoints_num=2,
             local_dir=log_dir,
             verbose=1,
diff --git a/control_pcgrl/rl/utils.py b/control_pcgrl/rl/utils.py
index 2312223..6eb0010 100644
--- a/control_pcgrl/rl/utils.py
+++ b/control_pcgrl/rl/utils.py
@@ -2,18 +2,24 @@
 Helper functions for train, infer, and eval modules.
 """
 from pdb import set_trace as TT
+import copy
 import glob
 import os
 import ray
 import re
 
 import numpy as np
+import gym
+from ray.tune import register_env
+from gym.spaces import Tuple
 # from stable_baselines import PPO2
 # from stable_baselines.bench import Monitor
 #from stable_baselines.common.vec_env import DummyVecEnv, SubprocVecEnv
 
 from control_pcgrl.configs.config import ControlPCGRLConfig
 from control_pcgrl import wrappers
+from control_pcgrl.task_assignment import set_map_fn
+from control_pcgrl.rl.envs import make_env
 
 # NOTE: minecraft has to precede zelda since minecraft zelda maze has both phrases in its name.
 MAP_WIDTHS = [("binary", 16), ("minecraft_3D_rain", 7), ("minecraft_3D", 15), ("zelda", 16), ("sokoban", 5)]
@@ -220,8 +226,8 @@ def get_exp_name(cfg: ControlPCGRLConfig):
         cfg.problem.name, 
         "weights_" + "-".join(f"{k}-{v}" for k, v in cfg.problem.weights.items()),
         cfg.representation,
+        cfg.multiagent.policies, # default to single policy
     )
-
     exp_name += '/'
 
     if cfg.model.name is not None:
@@ -316,3 +322,199 @@ def max_exp_idx(exp_name):
         n = max(log_ns)
     return int(n)
 
+
+def parse_ppo_config(
+    config,
+    agent_obs_space,
+    log_dir,
+    logger_type,
+    stats_callbacks,
+    checkpoint_path_file,
+    model_cfg,
+    multiagent_config={},
+    **kwargs
+    ):
+    num_workers = kwargs.get('num_workers', 0)
+    num_envs_per_worker = kwargs.get('num_envs_per_worker', 1)
+    eval_num_workers = kwargs.get('num_workers', 0)
+    
+    return {
+        'env': 'pcgrl',
+        **multiagent_config,
+        'framework': 'torch',
+        'num_workers': num_workers if not (config.evaluate or config.infer) else 0,
+        'num_gpus': config.hardware.n_gpu,
+        'env_config': {
+            **config,  # Maybe env should get its own config? (A subset of the original?)
+            "evaluation_env": False,
+        },
+        # 'env_config': {
+            # 'change_percentage': cfg.change_percentage,
+        # },
+        'num_envs_per_worker': num_envs_per_worker,
+        'render_env': config.render,
+        'lr': config.learning_rate,
+        'gamma': config.gamma,
+        'model': {
+            'custom_model': 'custom_model',
+            'custom_model_config': {
+                "dummy_env_obs_space": copy.copy(agent_obs_space),
+            **model_cfg,
+            },
+        },
+        "evaluation_interval" : 1 if config.evaluate else 1,
+        "evaluation_duration": max(1, num_workers),
+        "evaluation_duration_unit": "episodes",
+        "evaluation_num_workers": eval_num_workers,
+        "env_task_fn": set_map_fn,
+        "evaluation_config": {
+            "env_config": {
+                **config,
+                "evaluation_env": True,
+                "num_eval_envs": num_envs_per_worker * eval_num_workers,
+            },
+            "explore": True,
+        },
+        "logger_config": {
+                # "wandb": {
+                    # "project": "PCGRL",
+                    # "name": exp_name_id,
+                    # "id": exp_name_id,
+                    # "api_key_file": "~/.wandb_api_key"
+            # },
+            **logger_type,
+            # Optional: Custom logdir (do not define this here
+            # for using ~/ray_results/...).
+            "logdir": log_dir,
+        },
+#       "exploration_config": {
+#           "type": "Curiosity",
+#       }
+#       "log_level": "INFO",
+        # "train_batch_size": 50,
+        # "sgd_minibatch_size": 50,
+        'callbacks': stats_callbacks,
+
+        # To take random actions while changing all tiles at once seems to invite too much chaos.
+        'explore': True,
+
+        # `ray.tune` seems to need these spaces specified here.
+        # 'observation_space': dummy_env.observation_space,
+        # 'action_space': dummy_env.action_space,
+
+        # 'create_env_on_driver': True,
+        'checkpoint_path_file': checkpoint_path_file,
+        # 'record_env': log_dir,
+        # 'stfu': True,
+        'disable_env_checking': True,
+    }
+
+
+def make_grouped_env(config):
+    
+    n_agents = config.multiagent.n_agents
+    dummy_env = make_env(config)
+    groups = {'group_1': list(dummy_env.observation_space.keys())}
+    obs_space = Tuple(dummy_env.observation_space.values())
+    act_space = Tuple(dummy_env.action_space.values())
+    #import pdb; pdb.set_trace()
+    register_env(
+        'grouped_pcgrl',
+        lambda config: wrappers.GroupedEnvironmentWrapper(make_env(config).with_agent_groups(
+            groups, obs_space=obs_space, act_space=act_space))
+        
+    )
+
+
+def parse_qmix_config(
+    config,
+    agent_obs_space,
+    log_dir,
+    logger_type,
+    stats_callbacks,
+    checkpoint_path_file,
+    model_cfg,
+    multiagent_config={},
+    **kwargs
+    ):
+    # register grouped version of environment
+    #import pdb; pdb.set_trace()
+    make_grouped_env(config)
+    num_workers = kwargs.get('num_workers', 0)
+    num_envs_per_worker = kwargs.get('num_envs_per_worker', 1)
+    eval_num_workers = kwargs.get('num_workers', 0)
+    return {
+        'env': 'grouped_pcgrl', # replace with grouped environment
+        'rollout_fragment_length': 1,
+        'train_batch_size': 32,
+        'framework': 'torch',
+        'num_workers': num_workers if not (config.evaluate or config.infer) else 0,
+        'num_gpus': 0, # config.hardware.n_gpu GPU's don't work for QMIX
+        'env_config': {
+            **config,  # Maybe env should get its own config? (A subset of the original?)
+            "evaluation_env": False,
+        },
+        #'mixer': 'qmix',
+        'num_envs_per_worker': num_envs_per_worker,
+        'render_env': config.render,
+        'lr': config.learning_rate,
+        'gamma': config.gamma,
+        'model': {
+            'custom_model': 'custom_model',
+            'custom_model_config': {
+                "dummy_env_obs_space": copy.copy(agent_obs_space),
+            **model_cfg,
+            },
+        },
+        "evaluation_interval" : 1 if config.evaluate else 1,
+        "evaluation_duration": max(1, num_workers),
+        "evaluation_duration_unit": "episodes",
+        "evaluation_num_workers": eval_num_workers,
+        #"env_task_fn": set_map_fn,
+        "evaluation_config": {
+            "env_config": {
+                **config,
+                "evaluation_env": True,
+                "num_eval_envs": num_envs_per_worker * eval_num_workers,
+            },
+            "explore": True,
+        },
+        "logger_config": {
+                # "wandb": {
+                    # "project": "PCGRL",
+                    # "name": exp_name_id,
+                    # "id": exp_name_id,
+                    # "api_key_file": "~/.wandb_api_key"
+            # },
+            **logger_type,
+            # Optional: Custom logdir (do not define this here
+            # for using ~/ray_results/...).
+            "logdir": log_dir,
+        },
+#       "exploration_config": {
+#           "type": "Curiosity",
+#       }
+#       "log_level": "INFO",
+        # "train_batch_size": 50,
+        # "sgd_minibatch_size": 50,
+        'callbacks': stats_callbacks,
+
+        # To take random actions while changing all tiles at once seems to invite too much chaos.
+        'explore': True,
+
+        # `ray.tune` seems to need these spaces specified here.
+        # 'observation_space': dummy_env.observation_space,
+        # 'action_space': dummy_env.action_space,
+
+        # 'create_env_on_driver': True,
+        'checkpoint_path_file': checkpoint_path_file,
+        # 'record_env': log_dir,
+        # 'stfu': True,
+        'disable_env_checking': True,
+    }
+
+
+TrainerConfigParsers = {
+   'PPO': parse_ppo_config,
+   'QMIX': parse_qmix_config 
+}
diff --git a/control_pcgrl/wrappers.py b/control_pcgrl/wrappers.py
index 543e712..d8b2c1f 100644
--- a/control_pcgrl/wrappers.py
+++ b/control_pcgrl/wrappers.py
@@ -1,4 +1,5 @@
 from functools import partial
+import json
 from pdb import set_trace as TT
 from typing import Iterable
 
@@ -108,7 +109,12 @@ class TransformObs(gym.Wrapper):
     """Lil' hack to transform nested observation dicts when dealing with multi-agent environments."""
     def __init__(self, *args, **kwargs):
         super().__init__(self.env)
-        if kwargs.get("multiagent")['n_agents'] != 0:
+        try:
+            n_agents = kwargs.get('multiagent')['n_agents']
+        except TypeError:
+            n_agents = json.loads(kwargs.get('multiagent').replace('\'', '\"'))['n_agents']
+        if n_agents != 0:
+            #if kwargs.get("multiagent")['n_agents'] != 0:
             self.transform = self._transform_multiagent
         else:
             self.transform = self._transform
@@ -161,8 +167,13 @@ def __init__(self, game, names, **kwargs):
                 max_value = self.env.observation_space[n].high.max()
         self.names = names
 
+        self.show_agents = kwargs.get('show_agents', False)
+        try:
+            n_agents = kwargs['multiagent']['n_agents']
+        except:
+            n_agents = json.loads(kwargs['multiagent'].replace('\'', '\"'))['n_agents']
         self.observation_space = spaces.Box(
-            low=0, high=max_value, shape=(*self.shape[:-1], depth)
+            low=0, high=max_value if self.show_agents else max(max_value, n_agents), shape=(*self.shape[:-1], depth)
         )
 
 
@@ -184,21 +195,12 @@ def _transform(self, obs):
         final = np.empty([])
 
         for n in self.names:
-#           if len(self.env.observation_space.spaces[n].shape) == 3:
             if len(final.shape) == 0:
                 final = obs[n].reshape(*self.shape[:-1], -1)
             else:
                 final = np.append(
                     final, obs[n].reshape(*self.shape[:-1], -1), axis=-1
                 )
-#           else:
-#               if len(final.shape) == 0:
-#                   final = obs[n].reshape(self.shape[0], self.shape[1], self.shape[2], -1)
-#               else:
-#                   final = np.append(
-#                       final, obs[n].reshape(self.shape[0], self.shape[1], self.shape[2], -1), axis=2
-#                   )
-
         return final
 
 class ToImageCA(ToImage):
@@ -254,11 +256,15 @@ def __init__(self, game, name, padded: bool = False, **kwargs):
             + 1
         )
 
-        for v in shape:
-            new_shape.append(v)
-        new_shape.append(self.dim)
+        new_shape.extend(shape)
+        if len(new_shape) > 2:
+            new_shape[-1] += self.dim - 1
+        else:
+            new_shape.append(self.dim)
+        #import pdb; pdb.set_trace()
+        self.show_agents = kwargs.get('show_agents', False)
         self.observation_space.spaces[self.name] = gym.spaces.Box(
-            low=0, high=1, shape=new_shape, dtype=np.uint8
+            low=0, high=1 if not self.show_agents else max(1, kwargs['multiagent']['n_agents']), shape=new_shape, dtype=np.uint8
         )
 
     def step(self, action, **kwargs):
@@ -275,16 +281,25 @@ def reset(self):
         return obs
 
     def _transform(self, obs):
-        old = obs[self.name]
+        named_obs = obs[self.name]
+        if self.show_agents: # if agent positions are a part of the observation, then the map observation will already have an extra dimension
+            old = named_obs[:, :, 0]
+        else:
+            old = named_obs
+
         if self.padded:
             # Replace out-of-bounds values with all-zeros.
             new = np.eye(self.dim + 1)[old]
             new = new[..., 1:]
-
         else:
             new = np.eye(self.dim)[old]
 
+        # add the agent positions back into the observation
+        if self.show_agents:
+            new = np.concatenate((new, named_obs[:, :, -1][:, :, None]), axis=-1)
+
         obs[self.name] = new
+        #import pdb; pdb.set_trace()
 
         return obs
 
@@ -390,8 +405,18 @@ def __init__(self, game, crop_shape: Iterable, pad_value: int, name: str, **kwar
             len(self.env.observation_space.spaces[name].shape) in [2, 3]
         ), "This wrapper only works on 2D or 3D arrays."
         self.name = name
-        self.shape = crop_shape
-        self.pad = crop_shape // 2
+        self.show_agents = kwargs.get('show_agents', False)
+        try:
+            self.shape = np.array(list(crop_shape))
+            self.pad = crop_shape // 2
+        except TypeError:
+            #import pdb; pdb.set_trace()
+            self.shape = np.array(json.loads(str(crop_shape)))
+            self.pad = self.shape // 2
+        if self.show_agents:
+            self.shape.append(2) # add extra two channels for the positions
+        self.shape = np.array(self.shape)
+        #self.pad = crop_shape // 2
         self.pad_value = pad_value
 
         self.observation_space = gym.spaces.Dict({})
@@ -400,7 +425,7 @@ def __init__(self, game, crop_shape: Iterable, pad_value: int, name: str, **kwar
             self.observation_space.spaces[k] = s
         high_value = self.observation_space[self.name].high.max() + 1  # 0s correspond to out-of-bounds tiles
         self.observation_space.spaces[self.name] = gym.spaces.Box(
-            low=0, high=high_value, shape=tuple(crop_shape), dtype=np.uint8
+            low=0, high=high_value if not self.show_agents else max(high_value, kwargs['multiagent']['n_agents']), shape=tuple(self.shape), dtype=np.uint8
         )
 
     def step(self, action, **kwargs):
@@ -422,13 +447,32 @@ def _transform(self, obs):
         # x, y = obs["pos"]
         pos = obs['pos']
 
-        # View Centering
-        # padded = np.pad(map, self.pad, constant_values=self.pad_value)
+        # View Padding
         padded = np.pad(map, self.pad, constant_values=0)  # Denote out-of-bounds tiles as 0.
-        # cropped = padded[x : x + self.size, y : y + self.size]
 
         # Compensate for the bottom-left padding.
+        # View Centering
         cropped = padded[tuple([slice(p, p + self.shape[i]) for i, p in enumerate(pos)])]
+
+        # if show positions is turned on: add an extra channel that shows agent positions
+        # NOTE: Wide representaion cannot use this, since positions are not stored in representation
+        if self.show_agents:
+            #import pdb; pdb.set_trace()
+            #map_expanded = map[:, :, None]
+            agent_positions = self.unwrapped.get_agent_position()
+            agent_positions_map = np.zeros(map.shape)
+            for i, pos in enumerate(agent_positions):
+                agent_positions_map[tuple(pos)] = i + 1
+            #agent_positions_map[agent_positions[:, 0], agent_positions[:, 1]] = 1
+            # view padding
+            padded_positions = np.pad(agent_positions_map, self.pad, constant_values=0)
+
+            # view centering
+            cropped_positions = padded_positions[tuple([slice(p, p + self.shape[i]) for i, p in enumerate(pos)])]
+
+            cropped = np.concatenate((cropped[:, :, None], cropped_positions[:, :, None]), axis=-1).astype(np.uint8)
+
+        #import pdb; pdb.set_trace()
         obs[self.name] = cropped
 
         return obs
@@ -693,13 +737,50 @@ def reset(self):
 
 
 # TODO
+
+def disable_passive_env_checker(env):
+    # remove the passive environment checker wrapper from the env attribute of an env
+    # base case -> the environment is not a wrapper
+    if not hasattr(env, 'env'):
+        return env
+
+    root = env
+    prev = env 
+    while hasattr(prev, 'env'):
+        next_ = prev.env
+        if isinstance(next_, gym.wrappers.env_checker.PassiveEnvChecker):
+            prev.env = next_.env
+        prev = next_
+            
+    return root
+
+"""
+gym wrappers do not allow for consistent seeding
+add a seed method to each wrapper
+"""
+#def seedify(env):
+#    def seed(self, s):
+#        print(self)
+#        return self.env.seed(s)
+#    
+#    root = env
+#    curr = env
+#    while hasattr(curr, 'env'):
+#        type(curr).seed = seed
+#        curr = curr.env
+#    return root
+
+
 class MultiAgentWrapper(gym.Wrapper, MultiAgentEnv):
     def __init__(self, game, **kwargs):
         multiagent_args = kwargs.get('multiagent')
-        self.env = game
+        self.env = disable_passive_env_checker(game) # DISABLE GYM PASSIVE ENVIRONMENT CHECKER
         gym.Wrapper.__init__(self, self.env)
         MultiAgentEnv.__init__(self.env)
-        self.n_agents = multiagent_args.get('n_agents', 2)
+        try:
+            self.n_agents = multiagent_args.get('n_agents', 2)
+        except AttributeError:
+            self.n_agents = json.loads(multiagent_args.replace('\'', '\"'))['n_agents']
         self.observation_space = gym.spaces.Dict({})
         self.action_space = gym.spaces.Dict({})
         for i in range(self.n_agents):
@@ -713,26 +794,51 @@ def reset(self):
         obs = super().reset()
         return obs
 
+    def seed(self, s):
+        return self.unwrapped.seed(s)
+
     def step(self, action):
         # print(f"Step:")
         # print(f"Action: {action}")
         obs, rew, done, info = {}, {}, {}, {}
+
         for k, v in action.items():
             self.unwrapped._rep.set_active_agent(k)
             obs_k, rew[k], done[k], info[k] = super().step(action={k: v})
             obs.update(obs_k)
-        # rew = {f'agent_{i}': rew for i in range(self.n_agents)}
-        # done = {f'agent_{i}': done for i in range(self.n_agents)}
         done['__all__'] = np.all(list(done.values()))
-        # info = {f'agent_{i}': info for i in range(self.n_agents)}
-
-        # for i in range(self.n_agents):
-        #     act_i = action[f'agent_{i}']
-        #     print(act_i)
-        #     obs_i, rew_i, done_i, info_i = super().step(act_i, **kwargs)
-        #     obs.update({f'agent_{i}': obs_i})
-        #     rew.update({f'agent_{i}': rew_i})
-        #     done.update({f'agent_{i}': done_i})
-        #     info.update({f'agent_{i}': info_i})
 
         return obs, rew, done, info
+
+
+class GroupedEnvironmentWrapper(MultiAgentEnv):
+    def __init__(self, env, **kwargs):
+        #import pdb; pdb.set_trace()
+        MultiAgentEnv.__init__(self)
+        #gym.Wrapper.__init__(self, env.env)
+        self.env = env
+        self.groups = self.env.groups
+        self.agent_id_to_group = self.env.agent_id_to_group
+        self._unwrapped = self.env.env.unwrapped
+        #self.thing = 5
+        #super().__init__(env) # inherit the attributes of the base environment
+        #self.env = env
+        self.observation_space = self.env.observation_space
+        self.action_space = self.env.action_space
+        self.ctrl_metrics = self.env.env.ctrl_metrics
+        self.metrics = self.env.env.metrics
+
+    def reset(self):
+        return self.env.reset()
+
+    def step(self, actions):
+        return self.env.step(actions)
+
+    def _ungroup_items(self, items):
+        return self.env._ungroup_items(items)
+    
+    def _group_items(self, items):
+        return self.env._group_items(items)
+
+
+
diff --git a/rllib_inference.py b/rllib_inference.py
new file mode 100644
index 0000000..db1ad4f
--- /dev/null
+++ b/rllib_inference.py
@@ -0,0 +1,300 @@
+import matplotlib as mpl
+mpl.use('Agg')
+import matplotlib.pyplot as plt
+import copy
+import argparse
+import numpy as np
+from pathlib import Path
+import uuid
+from tqdm import tqdm
+from pathlib import Path
+import json
+import imageio
+import pandas as pd
+import ray.rllib.agents.ppo as ppo
+from gym.spaces import Tuple
+import ray.rllib.algorithms.qmix as qmix
+from ray.tune.registry import register_env
+from ray.rllib.policy.policy import PolicySpec
+from ray.rllib.models import ModelCatalog
+from control_pcgrl.rl import models
+from control_pcgrl.rl.envs import make_env
+from control_pcgrl.rl.rllib_utils import ControllablaTrainerFactory as trainer_factory
+from control_pcgrl import wrappers
+
+def load_config(experiment_path):
+    with open(Path(experiment_path, 'params.json'), 'r') as f:
+        config = json.load(f)
+    # override multiagent policy mapping function
+
+    if 'multiagent' in config:
+        if 'default_policy' in config:
+            config['multiagent']['policy_mapping_fn'] = lambda agent_id: 'default_policy'
+        else:
+            config['multiagent']['policy_mapping_fn'] = lambda agent_id: agent_id
+        config['env_config']['multiagent'] = json.loads(config['env_config']['multiagent'].replace("\'", "\""))
+
+    config['evaluation_env'] = True
+    config['explore'] = False # turn off exploration for evaluation
+    config['env_config']['crop_shape'] = json.loads(config['env_config']['crop_shape'])
+    config['env_config']['problem'] = json.loads(config['env_config']['problem'].replace("\'", "\""))
+    
+    env_name = config['env_config']['env_name']
+    return config
+
+def setup_multiagent_config(config, model_cfg):
+    dummy_env = make_env(config)
+    obs_space = dummy_env.observation_space['agent_0']
+    act_space = dummy_env.action_space['agent_0']
+    multiagent_config = {}
+    if config['multiagent']['policies'] == "centralized":
+        multiagent_config['policies'] = {
+            'default_policy': PolicySpec(
+                policy_class=None,
+                observation_space=obs_space,
+                action_space=act_space,
+                config={
+                    'custom_model': 'custom_model',
+                    'custom_model_config': {
+                        "dummy_env_obs_space": copy.copy(obs_space),
+                        **json.loads(model_cfg.replace('\'', '\"')),
+                    }
+                }
+            )
+        }
+        multiagent_config['policy_mapping_fn'] = lambda agent_id: 'default_policy'
+    elif config['multiagent']['policies'] == "decentralized":
+        multiagent_config['policies'] = {
+            f'agent_{i}': PolicySpec(
+                policy_class=None,
+                observation_space=obs_space,
+                action_space=act_space,
+                config={
+                    'custom_model': 'custom_model',
+                    'custom_model_config': {
+                        "dummy_env_obs_space": copy.copy(obs_space),
+                        **json.loads(model_cfg.replace('\'', '\"')),
+                    }
+                }
+            ) for i in range(config['multiagent']['n_agents'])
+        }
+        multiagent_config['policy_mapping_fn'] = lambda agent_id: agent_id
+    return multiagent_config
+
+def checkpoints_iter(experiment_path):
+    experiment_path = Path(experiment_path)
+    return filter(lambda f: 'checkpoint' in f.name, experiment_path.iterdir())
+
+
+def get_best_checkpoint(experiment_path, config):
+    # load progress.csv
+    progress = pd.read_csv(Path(experiment_path, 'progress.csv'))
+
+    max_episode_reward = float('-inf')
+    max_checkpoint = None
+    max_checkpoint_name = None
+    for checkpoint in checkpoints_iter(experiment_path):
+        # get number after underscore in checkpoint
+        trainer = restore_trainer(Path(checkpoint), config)
+        iteration = trainer._iteration
+        # look up iteration in progress dataframe
+        trainer_performance = progress.loc[progress['training_iteration'] == iteration]
+        trainer_reward = trainer_performance['episode_reward_mean'].values[0]
+        # sometimes the first checkpoint has a nan reward
+        if np.isnan(trainer_reward) or trainer_reward > max_episode_reward:
+            max_episode_reward = float('-inf') if np.isnan(trainer_reward) else trainer_reward
+            max_checkpoint = trainer
+            max_checkpoint_name = checkpoint
+    print(f'Loaded from checkpoint: {max_checkpoint_name}')
+    return max_checkpoint
+
+def restore_trainer(checkpoint_path, config):
+    if config['env_config']['algorithm'] == 'QMIX':
+        trainer = qmix.QMix(config=config)
+    else:
+        trainer = ppo.PPOTrainer(config=config)
+    print(checkpoint_path)
+    trainer.restore(str(checkpoint_path))
+    return trainer
+
+def register_model(config):
+    MODELS = {"NCA": models.NCA, "DenseNCA": models.DenseNCA, "SeqNCA": models.SeqNCA, "SeqNCA3D": models.SeqNCA3D}
+    model_conf_str = config['env_config']['model'].replace('\'', '\"')
+    model_name_default = model_conf_str.find('None')
+    if model_name_default > 0:
+        model_conf_str = model_conf_str[:model_name_default-1] + f' \"None\"' + model_conf_str[model_name_default+4:]
+    model_config = json.loads(model_conf_str)
+    if model_config.get('name') == "None":
+        if config['env_config']['representation'] == 'wide':
+            model_cls = models.ConvDeconv2D
+        else:
+            model_cls = models.CustomFeedForwardModel
+    else:
+        model_cls = MODELS[model_config['name']]
+    ModelCatalog.register_custom_model('custom_model', model_cls)
+
+def rollout(env_config, trainer, policy_mapping_fn=None, seed=None):
+    env = make_env(env_config)
+    env.seed(seed)
+    env.reset()
+    env.seed(seed)
+    #env.unwrapped._max_iterations *= 2
+    obs = env.reset()
+    done = False
+    acts, obss, rews, infos, frames = [], [], [], [], []
+    while not done:
+        if policy_mapping_fn is not None:
+            actions = get_multi_agent_actions(trainer, obs, policy_mapping_fn)
+            acts.append({agent: int(act) for agent, act in actions.items()})
+        elif env_config['algorithm'] == 'QMIX':
+            actions = get_qmix_actions(trainer, obs)
+            acts.append({agent: int(act) for agent, act in actions.items()})
+        else:
+            actions = get_single_agent_actions(trainer, obs)
+            acts.append({'agent_0': int(actions)})
+            
+        # build action histogram
+        obs, rew, done, info = env.step(actions)
+        #import pdb; pdb.set_trace()
+        frame = env.render(mode='rgb_array')
+        frames.append(frame)
+        rews.append(rew)
+        infos.append(int(env.unwrapped._rep_stats['path-length']))
+        #infos.append(info)
+        if isinstance(done, dict):
+            done = done['__all__']
+    
+    #import pdb; pdb.set_trace()
+    return {
+        'actions': acts,
+        'rewards': rews,
+        'infos': infos,
+        'frames': frames,
+        'success': env.unwrapped._prob.get_episode_over(env.unwrapped._rep_stats, None),
+        'heatmaps': env.unwrapped._rep.heatmaps
+    }
+
+def save_trial_metrics(metrics, logdir):
+    # save initial frame, final frame, and gif of frames
+    imageio.imsave(Path(logdir, 'initial_map.png'), metrics['frames'][0])
+    imageio.imsave(Path(logdir, 'final_map.png'), metrics['frames'][-1])
+    imageio.mimsave(Path(logdir, 'frames.gif'), metrics['frames'])
+    # save rewards in json file
+    with open(Path(logdir, 'rewards.json'), 'w+') as f:
+        f.write(json.dumps(metrics['rewards']))
+    # graph rewards over time
+    # save infos in json file
+    with open(Path(logdir, 'infos.json'), 'w+') as f:
+        f.write(json.dumps(metrics['infos']))
+    # plot path length over time
+    # save actions in json file
+    with open(Path(logdir, 'actions.json'), 'w+') as f:
+        f.write(json.dumps(list(metrics['actions'])))
+
+    # check success
+    with open(Path(logdir, 'success.json'), 'w+') as f:
+        f.write(json.dumps({'success': bool(metrics['success'])}))
+
+    for i, heatmap in enumerate(metrics['heatmaps']):
+        fig, ax = plt.subplots()
+        im = ax.imshow(heatmap)
+        cbar = ax.figure.colorbar(im, ax=ax)
+        cbar.ax.set_ylabel('changes', rotation=-90, va="bottom")
+        ax.grid(which="minor", color="w", linestyle='-', linewidth=3)
+        fig.savefig(Path(logdir, f'{i}_heatmap.png'), dpi=400)
+        plt.close(fig) # close figure to prevent memory issues
+
+
+def get_qmix_actions(trainer, observations):
+    actions = trainer.compute_single_action(tuple(observations.values()))
+    return {agent: action for agent, action in zip(observations.keys(), actions)}
+
+
+def get_single_agent_actions(trainer, observations):
+    return trainer.compute_single_action(observations)
+
+def get_multi_agent_actions(trainer, observations, policy_mapping_fn):
+    return {
+        agent_id: trainer.compute_single_action(agent_obs, policy_id=policy_mapping_fn(agent_id))
+        for agent_id, agent_obs in observations.items()
+    }
+
+def make_grouped_env(config):
+    
+    try:
+        n_agents = config['multiagent']['n_agents']
+    except:
+        n_agents = json.loads(config['multiagent'].replace('\'', '\"'))['n_agents']
+    dummy_env = make_env(config)
+    groups = {'group_1': list(dummy_env.observation_space.keys())}
+    obs_space = Tuple(dummy_env.observation_space.values())
+    act_space = Tuple(dummy_env.action_space.values())
+    #import pdb; pdb.set_trace()
+    register_env(
+        'grouped_pcgrl',
+        lambda config: wrappers.GroupedEnvironmentWrapper(make_env(config).with_agent_groups(
+            groups, obs_space=obs_space, act_space=act_space))
+        
+    )
+
+# run evals with the checkpoint
+def evaluate(experiment_path):
+    # load and setup config
+    config = load_config(experiment_path)
+    if 'multiagent' in config:
+        config['multiagent'] = setup_multiagent_config(config['env_config'], config['env_config']['model'])
+    # delete keys not recognized by rllib
+    del config['checkpoint_path_file']
+    del config['evaluation_env']
+    del config['callbacks']
+    del config['num_workers']
+    del config['num_envs_per_worker']
+    #del config['multiagent']
+    #import pdb; pdb.set_trace()
+    if config['env_config']['algorithm'] == 'PPO':
+        register_env('pcgrl', make_env)
+    else:
+        make_grouped_env(config['env_config'])
+        #register_env('grouped_pcgrl', make_grouped_env)
+    config['num_gpus'] = 0
+    register_model(config)
+    # load trainer from checkpoint
+    trainer = get_best_checkpoint(experiment_path, config)
+    # rollout the model for n trials
+    logdir = Path(experiment_path, f'eval_best_{uuid.uuid4()}')
+    logdir.mkdir()
+
+    try:
+        policy_mapping_fn = config['multiagent']['policy_mapping_fn']
+    except KeyError:
+        policy_mapping_fn = None
+
+    paths = []
+    max_changes = 0
+    for trial in tqdm(range(40)):
+        results = rollout(config['env_config'], trainer, policy_mapping_fn, seed=trial*100)
+        #results = rollout(config['env_config'], trainer, config['multiagent']['policy_mapping_fn'], seed=trial*100)
+        trial_log_dir = Path(logdir, f'{trial}')
+        trial_log_dir.mkdir()
+        paths.append(results['infos'][-1])
+        #changes.append(results['infos'][-1]['changes'] / results['infos'][-1]['iterations'])
+        save_trial_metrics(results, trial_log_dir)
+
+    print(f'Wrote logs to: {logdir}')
+
+
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+            '--experiment_path',
+            '-e',
+            dest='experiment_path',
+            type=str,
+            required=True
+            )
+
+    #parser.add_argument('checkpoint_loader') # just load the best for now
+    args = parser.parse_args()
+    evaluate(Path(args.experiment_path))
diff --git a/tests/test_multiagent_representations.py b/tests/test_multiagent_representations.py
new file mode 100644
index 0000000..43f50ba
--- /dev/null
+++ b/tests/test_multiagent_representations.py
@@ -0,0 +1,205 @@
+import pytest
+from random import randint
+from copy import deepcopy
+from pathlib import Path
+from itertools import permutations, product
+import numpy as np
+from control_pcgrl import wrappers
+from control_pcgrl.rl.envs import make_env
+
+@pytest.fixture
+def basic_env_config():
+    return {
+        'problem': {
+            'name': 'binary',
+            'weights': {'path_length': 100},
+            'controls': '???',
+            'alp_gmm': '???',
+        },
+        'hardware': {'n_cpu': 1, 'n_gpu': 1, 'num_envs_per_worker': 10}, 
+        'multiagent': {'n_agents': 2},
+        'representation': 'turtle',
+        'map_shape': [16, 16],
+        'crop_shape': [32, 32],
+        'max_board_scans': 3,
+        'n_aux_tiles': 0,
+        'evaluation_env': False,
+        'observation_size': None,
+        'controls': None,
+        'change_percentage': None,
+        'static_prob': None,
+        'action_size': None,
+        'log_dir': Path('./'),
+    }
+
+
+def validate_turtle_actions(actions, old_positions, new_positions, old_map, new_map):
+    map_shape = old_map.shape
+    def validate_move(action, old_position, new_position):
+        if action == 0:
+            if old_position[0] == 0:
+                assert new_position[0] == old_position[0]
+            else:
+                assert old_position[0] - new_position[0] == 1 
+            assert old_position[1] - new_position[1] == 0
+        elif action == 1:
+            if old_position[0] == map_shape[0] - 1:
+                assert new_position[0] == old_position[0]
+            else:
+                assert old_position[0] - new_position[0] == -1
+            assert old_position[1] - new_position[1] == 0
+        elif action == 2:
+            if old_position[1] == 0:
+                assert new_position[1] == old_position[1]
+            else:
+                assert old_position[1] - new_position[1] == 1
+            assert old_position[0] - new_position[0] == 0
+        elif action == 3:
+            if old_position[1] == map_shape[1] - 1:
+                assert new_position[1] == old_position[1]
+            else:
+                assert old_position[1] - new_position[1] == -1
+            assert old_position[0] - new_position[0] == 0
+    
+    for agent, old_pos, new_pos in zip(actions, old_positions, new_positions):
+        action = actions[agent]
+        if action < 4:
+            validate_move(action, old_pos, new_pos)
+        else:
+            # position shouldn't change when we place an item
+            assert tuple(old_pos) == tuple(new_pos)
+            assert new_map[tuple(new_pos)] == action - 4
+
+@pytest.mark.parametrize(
+    'action_0,action_1',
+    permutations(list(range(6)), 2)
+)
+def test_multiagent_turtle(basic_env_config, action_0, action_1):
+    # GIVEN
+    env_config = basic_env_config
+    env_name = 'binary-turtle-v0'
+    env = wrappers.CroppedImagePCGRLWrapper(env_name, **env_config)
+    env = wrappers.MultiAgentWrapper(env, **env_config)
+    actions = {'agent_0': action_0, 'agent_1': action_1}
+    env.reset()
+    rep = env.unwrapped._rep
+    init_positions = deepcopy(rep.get_positions())
+    #init_positions = deepcopy(rep._positions)
+    init_map = deepcopy(rep.rep._map)
+    
+    # WHEN
+    rep.update(actions)
+
+    # THEN
+    validate_turtle_actions(
+            actions,
+            init_positions,
+            rep._positions,
+            init_map,
+            rep.rep._map
+            )
+
+@pytest.mark.parametrize(
+    'action_0,action_1',
+    permutations(list(range(2)), 2)
+)
+def test_multiagent_narrow(basic_env_config, action_0, action_1):
+    # GIVEN
+    env_config = basic_env_config
+    env_config['representation'] = 'narrow'
+    env_name = 'binary-narrow-v0'
+    env = wrappers.CroppedImagePCGRLWrapper(env_name, **env_config)
+    env = wrappers.MultiAgentWrapper(env, **env_config)
+    env.reset()
+    rep = env.unwrapped._rep
+    init_map = deepcopy(rep.rep._map)
+    init_positions = deepcopy(rep._positions)
+    actions = {'agent_0': action_0, 'agent_1': action_1}
+    # WHEN
+    rep.update(actions)
+    new_map = rep.rep._map
+
+    # THEN
+    new_positions = rep.get_positions()
+    # check that position is updated correctly
+    # Note: Test does not account for changes in vertical position
+    assert new_positions[0][1] - 1 == init_positions[0][1]
+    assert new_positions[1][1] - 1 == init_positions[1][1]
+    # check that map is updated correctly
+    assert new_map[tuple(init_positions[0])] == actions['agent_0']
+    assert new_map[tuple(init_positions[1])] == actions['agent_1']
+
+# INCOMPLETE TEST
+@pytest.mark.parametrize(
+    'position_x_0,position_y_0,action_0,position_x_1,position_y_1,action_1',
+     [[randint(0, 15), randint(0, 15), randint(0, 1), randint(0, 15), randint(0, 15), randint(0, 1)]]
+)
+def test_multiagent_wide(
+    basic_env_config,
+    position_x_0,
+    position_y_0,
+    action_0,
+    position_x_1,
+    position_y_1,
+    action_1
+    ):
+    # GIVEN
+    env_config = basic_env_config
+    env_config['representation'] = 'wide'
+    env_name = 'binary-wide-v0'
+    env = wrappers.ActionMapImagePCGRLWrapper(env_name, **env_config)
+    env = wrappers.MultiAgentWrapper(env, **env_config)
+    env.reset()
+    rep = env.unwrapped._rep
+    init_map = deepcopy(rep.rep._map)
+    init_positions = deepcopy(rep._positions)
+    actions = {
+        'agent_0': [position_y_0, position_x_0, action_0],
+        'agent_1': [position_y_1, position_x_1, action_1]
+        }
+
+    # WHEN
+    rep.update(actions)
+
+    # THEN check that map is changed correctly
+    new_map = rep.rep._map
+    assert new_map[position_y_0][position_x_0] == action_0
+    assert new_map[position_y_1][position_x_1] == action_1
+    # make sure that the map being modified is the same one that the pcgrl env uses
+    np.testing.assert_array_equal(new_map, rep.unwrapped._map)
+
+
+def print_agent_positions(agent):
+    print()
+    pass
+
+def test_multiagent_position_sharing(basic_env_config):
+    # GIVEN
+    env_config = basic_env_config
+    env_config['env_name'] = 'binary-narrow-v0'
+    env_config['show_agents'] = True
+    env_config['evaluate'] = False
+
+    env = make_env(env_config)
+    
+    # WHEN
+    obs = env.reset()
+    print(obs['agent_0'].shape)
+    actions = {'agent_0': 0, 'agent_1': 0}
+    
+    newobs, _, _, _ = env.step(actions)
+    agent_positions = np.where(newobs['agent_0'][:, :, -1]>0)
+    print(agent_positions)
+    agent_positions = np.where(newobs['agent_1'][:, :, -1]>0)
+    print(agent_positions)
+    newobs, _, _, _ = env.step(actions)
+    agent_positions = np.where(newobs['agent_0'][:, :, -1]>0)
+    print(agent_positions)
+    agent_positions = np.where(newobs['agent_1'][:, :, -1]>0)
+    print(agent_positions)
+    print(env.unwrapped._rep._positions)
+    import pdb; pdb.set_trace()
+
+
+    # THEN
+    pass