Eclectic-Sheep · belerico · Dec 19, 2023 · Oct 5, 2023 · Oct 6, 2023 · Oct 6, 2023
@@ -689,6 +689,7 @@ def main(fabric: Fabric, cfg: Dict[str, Any]):
                         n_samples=1,
                         dtype=None,
                         device=device,
+                        from_numpy=cfg.buffer.from_numpy,
                     )  # [N_samples, Seq_len, Batch_size, ...]
                     batch = {k: v[0].float() for k, v in sample.items()}
                     train(

@@ -733,6 +733,7 @@ def main(fabric: Fabric, cfg: Dict[str, Any]):
                 n_samples=n_samples,
                 dtype=None,
                 device=fabric.device,
+                from_numpy=cfg.buffer.from_numpy,
             )
             with timer("Time/train_time", SumMetric(sync_on_compute=cfg.metric.sync_on_compute)):
                 for i in range(next(iter(local_data.values())).shape[0]):

@@ -676,6 +676,7 @@ def main(fabric: Fabric, cfg: Dict[str, Any]):
                 ),
                 dtype=None,
                 device=fabric.device,
+                from_numpy=cfg.buffer.from_numpy,
             )
             with timer("Time/train_time", SumMetric(sync_on_compute=cfg.metric.sync_on_compute)):
                 for i in range(next(iter(local_data.values())).shape[0]):

@@ -41,7 +41,9 @@ def train(
     # Sample a minibatch in a distributed way: Line 5 - Algorithm 2
     # We sample one time to reduce the communications between processes
     sample = rb.sample_tensors(
-        cfg.algo.per_rank_gradient_steps * cfg.algo.per_rank_batch_size, sample_next_obs=cfg.buffer.sample_next_obs
+        cfg.algo.per_rank_gradient_steps * cfg.algo.per_rank_batch_size,
+        sample_next_obs=cfg.buffer.sample_next_obs,
+        from_numpy=cfg.buffer.from_numpy,
     )
     critic_data = fabric.all_gather(sample)
     flatten_dim = 3 if fabric.world_size > 1 else 2
@@ -63,7 +65,7 @@ def train(
         critic_sampler = BatchSampler(sampler=critic_idxes, batch_size=cfg.algo.per_rank_batch_size, drop_last=False)
 
     # Sample a different minibatch in a distributed way to update actor and alpha parameter
-    sample = rb.sample_tensors(cfg.algo.per_rank_batch_size)
+    sample = rb.sample_tensors(cfg.algo.per_rank_batch_size, from_numpy=cfg.buffer.from_numpy)
     actor_data = fabric.all_gather(sample)
     actor_data = {k: v.view(-1, *v.shape[flatten_dim:]) for k, v in actor_data.items()}
     if fabric.world_size > 1:

@@ -729,6 +729,7 @@ def main(fabric: Fabric, cfg: Dict[str, Any]):
                         n_samples=1,
                         dtype=None,
                         device=device,
+                        from_numpy=cfg.buffer.from_numpy,
                     )  # [N_samples, Seq_len, Batch_size, ...]
                     batch = {k: v[0].float() for k, v in sample.items()}
                     train(

@@ -357,6 +357,7 @@ def main(fabric: Fabric, cfg: Dict[str, Any], exploration_cfg: Dict[str, Any]):
                         n_samples=1,
                         dtype=None,
                         device=device,
+                        from_numpy=cfg.buffer.from_numpy,
                     )  # [N_samples, Seq_len, Batch_size, ...]
                     batch = {k: v[0].float() for k, v in sample.items()}
                     train(

@@ -878,6 +878,7 @@ def main(fabric: Fabric, cfg: Dict[str, Any]):
                 n_samples=n_samples,
                 dtype=None,
                 device=fabric.device,
+                from_numpy=cfg.buffer.from_numpy,
             )
             # Start training
             with timer("Time/train_time", SumMetric(sync_on_compute=cfg.metric.sync_on_compute)):

@@ -380,6 +380,7 @@ def main(fabric: Fabric, cfg: Dict[str, Any], exploration_cfg: Dict[str, Any]):
                 n_samples=n_samples,
                 dtype=None,
                 device=fabric.device,
+                from_numpy=cfg.buffer.from_numpy,
             )
             # Start training
             with timer("Time/train_time", SumMetric(sync_on_compute=cfg.metric.sync_on_compute)):

@@ -947,6 +947,7 @@ def main(fabric: Fabric, cfg: Dict[str, Any]):
                 ),
                 dtype=None,
                 device=fabric.device,
+                from_numpy=cfg.buffer.from_numpy,
             )
             # Start training
             with timer("Time/train_time", SumMetric(sync_on_compute=cfg.metric.sync_on_compute)):

@@ -379,6 +379,7 @@ def main(fabric: Fabric, cfg: Dict[str, Any], exploration_cfg: Dict[str, Any]):
                 ),
                 dtype=None,
                 device=fabric.device,
+                from_numpy=cfg.buffer.from_numpy,
             )
             # Start training
             with timer("Time/train_time", SumMetric(sync_on_compute=cfg.metric.sync_on_compute)):

@@ -343,7 +343,7 @@ def main(fabric: Fabric, cfg: Dict[str, Any]):
                         fabric.print(f"Rank-0: policy_step={policy_step}, reward_env_{i}={ep_rew[-1]}")
 
         # Transform the data into PyTorch Tensors
-        local_data = rb.to_tensor(dtype=None, device=device)
+        local_data = rb.to_tensor(dtype=None, device=device, from_numpy=cfg.buffer.from_numpy)
 
         # Estimate returns with GAE (https://arxiv.org/abs/1506.02438)
         with torch.no_grad():

@@ -264,7 +264,7 @@ def player(
                         fabric.print(f"Rank-0: policy_step={policy_step}, reward_env_{i}={ep_rew[-1]}")
 
         # Transform the data into PyTorch Tensors
-        local_data = rb.to_tensor(dtype=None, device=device)
+        local_data = rb.to_tensor(dtype=None, device=device, from_numpy=cfg.buffer.from_numpy)
 
         # Estimate returns with GAE (https://arxiv.org/abs/1506.02438)
         normalized_obs = normalize_obs(next_obs, cfg.algo.cnn_keys.encoder, obs_keys)

@@ -372,7 +372,7 @@ def main(fabric: Fabric, cfg: Dict[str, Any]):
                         fabric.print(f"Rank-0: policy_step={policy_step}, reward_env_{i}={ep_rew[-1]}")
 
         # Transform the data into PyTorch Tensors
-        local_data = rb.to_tensor(dtype=None, device=device)
+        local_data = rb.to_tensor(dtype=None, device=device, from_numpy=cfg.buffer.from_numpy)
 
         # Estimate returns with GAE (https://arxiv.org/abs/1506.02438)
         with torch.no_grad():

@@ -286,6 +286,7 @@ def main(fabric: Fabric, cfg: Dict[str, Any]):
                 sample_next_obs=cfg.buffer.sample_next_obs,
                 dtype=None,
                 device=device,
+                from_numpy=cfg.buffer.from_numpy,
             )  # [G*B]
             gathered_data: Dict[str, torch.Tensor] = fabric.all_gather(sample)  # [World, G*B]
             for k, v in gathered_data.items():

@@ -235,6 +235,7 @@ def player(
                 sample_next_obs=cfg.buffer.sample_next_obs,
                 dtype=None,
                 device=device,
+                from_numpy=cfg.buffer.from_numpy,
             )
             # chunks = {k1: [k1_chunk_1, k1_chunk_2, ...], k2: [k2_chunk_1, k2_chunk_2, ...]}
             chunks = {

@@ -368,6 +368,7 @@ def main(fabric: Fabric, cfg: Dict[str, Any]):
             sample = rb.sample_tensors(
                 training_steps * cfg.algo.per_rank_gradient_steps * cfg.algo.per_rank_batch_size,
                 sample_next_obs=cfg.buffer.sample_next_obs,
+                from_numpy=cfg.buffer.from_numpy,
             )  # [G*B, 1]
             gathered_data = fabric.all_gather(sample)  # [G*B, World, 1]
             flatten_dim = 3 if fabric.world_size > 1 else 2

@@ -1,3 +1,4 @@
 size: ???
 memmap: True
-validate_args: False
+validate_args: False
+from_numpy: False