Lang Feng commited on
Commit
bae6790
·
unverified ·
1 Parent(s): 37e4ddd

add 'resources_per_worker' config for easily managing cpus/gpus of each env worker (#148)

Browse files
agent_system/environments/env_manager.py CHANGED
@@ -22,6 +22,7 @@ import os
22
  from agent_system.environments.prompts import *
23
  from agent_system.environments.base import EnvironmentManagerBase, to_numpy
24
  from agent_system.memory import SimpleMemory
 
25
 
26
  def parse_gamefile(infos):
27
  gamefile = []
@@ -518,10 +519,12 @@ def make_envs(config):
518
  if not isinstance(config.env.rollout.n, int):
519
  raise ValueError("config.env.rollout.n should be an integer")
520
  group_n = config.env.rollout.n if config.env.rollout.n > 0 else 1
 
 
521
  if "gym_cards" in config.env.env_name.lower():
522
  from agent_system.environments.env_package.gym_cards import build_gymcards_envs, gym_projection
523
- _envs = build_gymcards_envs(env_name=config.env.env_name, seed=config.env.seed, env_num=config.data.train_batch_size, group_n=group_n, is_train=True)
524
- _val_envs = build_gymcards_envs(env_name=config.env.env_name, seed=config.env.seed + 1000, env_num=config.data.val_batch_size, group_n=1, is_train=False)
525
 
526
  projection_f = partial(gym_projection, env_name=config.env.env_name)
527
  envs = GymCardEnvironmentManager(_envs, projection_f, config)
@@ -539,8 +542,8 @@ def make_envs(config):
539
  env_kwargs = {
540
  'eval_dataset': 'eval_in_distribution', # 'eval_in_distribution' or 'eval_out_of_distribution'
541
  }
542
- _envs = build_alfworld_envs(alf_config_path, config.env.seed, config.data.train_batch_size, group_n, is_train=True, env_kwargs=env_kwargs)
543
- _val_envs = build_alfworld_envs(alf_config_path, config.env.seed + 1000, config.data.val_batch_size, 1, is_train=False, env_kwargs=env_kwargs)
544
 
545
  projection_f = partial(alfworld_projection)
546
  envs = AlfWorldEnvironmentManager(_envs, projection_f, config)
@@ -554,8 +557,8 @@ def make_envs(config):
554
  'max_steps': config.env.max_steps,
555
  'search_depth': config.env.sokoban.search_depth
556
  }
557
- _envs = build_sokoban_envs(config.env.seed, config.data.train_batch_size, group_n, mode=config.env.sokoban.mode, is_train=True, env_kwargs=env_kwargs)
558
- _val_envs = build_sokoban_envs(config.env.seed + 1000, config.data.val_batch_size, 1, mode=config.env.sokoban.mode, is_train=False, env_kwargs=env_kwargs)
559
 
560
  projection_f = partial(sokoban_projection)
561
  envs = SokobanEnvironmentManager(_envs, projection_f, config)
@@ -576,8 +579,8 @@ def make_envs(config):
576
  'file_path': file_path,
577
  'attr_path': attr_path
578
  }
579
- _envs = build_webshop_envs(seed=config.env.seed, env_num=config.data.train_batch_size, group_n=group_n, is_train=True, env_kwargs=env_kwargs)
580
- _val_envs = build_webshop_envs(seed=config.env.seed + 1000, env_num=config.data.val_batch_size, group_n=1, is_train=False, env_kwargs=env_kwargs)
581
 
582
  projection_f = partial(webshop_projection)
583
  envs = WebshopEnvironmentManager(_envs, projection_f, config)
@@ -587,8 +590,8 @@ def make_envs(config):
587
  return envs, val_envs
588
  elif "appworld" in config.env.env_name.lower():
589
  from agent_system.environments.env_package.appworld import build_appworld_envs, appworld_projection
590
- _envs = build_appworld_envs(dataset_name='train', seed=config.env.seed, env_num=config.data.train_batch_size, group_n=group_n, start_server_id=0)
591
- _val_envs = build_appworld_envs(dataset_name='test_normal', seed=config.env.seed + 1000, env_num=config.data.val_batch_size, group_n=1, start_server_id=config.data.train_batch_size*group_n)
592
 
593
  projection_f = partial(appworld_projection)
594
  envs = AppWorldEnvironmentManager(_envs, projection_f, config)
 
22
  from agent_system.environments.prompts import *
23
  from agent_system.environments.base import EnvironmentManagerBase, to_numpy
24
  from agent_system.memory import SimpleMemory
25
+ from omegaconf import OmegaConf
26
 
27
  def parse_gamefile(infos):
28
  gamefile = []
 
519
  if not isinstance(config.env.rollout.n, int):
520
  raise ValueError("config.env.rollout.n should be an integer")
521
  group_n = config.env.rollout.n if config.env.rollout.n > 0 else 1
522
+ resources_per_worker = OmegaConf.to_container(config.env.resources_per_worker, resolve=True)
523
+
524
  if "gym_cards" in config.env.env_name.lower():
525
  from agent_system.environments.env_package.gym_cards import build_gymcards_envs, gym_projection
526
+ _envs = build_gymcards_envs(env_name=config.env.env_name, seed=config.env.seed, env_num=config.data.train_batch_size, group_n=group_n, is_train=True, resources_per_worker=resources_per_worker)
527
+ _val_envs = build_gymcards_envs(env_name=config.env.env_name, seed=config.env.seed + 1000, env_num=config.data.val_batch_size, group_n=1, is_train=False, resources_per_worker=resources_per_worker)
528
 
529
  projection_f = partial(gym_projection, env_name=config.env.env_name)
530
  envs = GymCardEnvironmentManager(_envs, projection_f, config)
 
542
  env_kwargs = {
543
  'eval_dataset': 'eval_in_distribution', # 'eval_in_distribution' or 'eval_out_of_distribution'
544
  }
545
+ _envs = build_alfworld_envs(alf_config_path, config.env.seed, config.data.train_batch_size, group_n, is_train=True, env_kwargs=env_kwargs, resources_per_worker=resources_per_worker)
546
+ _val_envs = build_alfworld_envs(alf_config_path, config.env.seed + 1000, config.data.val_batch_size, 1, is_train=False, env_kwargs=env_kwargs, resources_per_worker=resources_per_worker)
547
 
548
  projection_f = partial(alfworld_projection)
549
  envs = AlfWorldEnvironmentManager(_envs, projection_f, config)
 
557
  'max_steps': config.env.max_steps,
558
  'search_depth': config.env.sokoban.search_depth
559
  }
560
+ _envs = build_sokoban_envs(config.env.seed, config.data.train_batch_size, group_n, mode=config.env.sokoban.mode, is_train=True, env_kwargs=env_kwargs, resources_per_worker=resources_per_worker)
561
+ _val_envs = build_sokoban_envs(config.env.seed + 1000, config.data.val_batch_size, 1, mode=config.env.sokoban.mode, is_train=False, env_kwargs=env_kwargs, resources_per_worker=resources_per_worker)
562
 
563
  projection_f = partial(sokoban_projection)
564
  envs = SokobanEnvironmentManager(_envs, projection_f, config)
 
579
  'file_path': file_path,
580
  'attr_path': attr_path
581
  }
582
+ _envs = build_webshop_envs(seed=config.env.seed, env_num=config.data.train_batch_size, group_n=group_n, is_train=True, env_kwargs=env_kwargs, resources_per_worker=resources_per_worker)
583
+ _val_envs = build_webshop_envs(seed=config.env.seed + 1000, env_num=config.data.val_batch_size, group_n=1, is_train=False, env_kwargs=env_kwargs, resources_per_worker=resources_per_worker)
584
 
585
  projection_f = partial(webshop_projection)
586
  envs = WebshopEnvironmentManager(_envs, projection_f, config)
 
590
  return envs, val_envs
591
  elif "appworld" in config.env.env_name.lower():
592
  from agent_system.environments.env_package.appworld import build_appworld_envs, appworld_projection
593
+ _envs = build_appworld_envs(dataset_name='train', seed=config.env.seed, env_num=config.data.train_batch_size, group_n=group_n, start_server_id=0, resources_per_worker=resources_per_worker)
594
+ _val_envs = build_appworld_envs(dataset_name='test_normal', seed=config.env.seed + 1000, env_num=config.data.val_batch_size, group_n=1, start_server_id=config.data.train_batch_size*group_n, resources_per_worker=resources_per_worker)
595
 
596
  projection_f = partial(appworld_projection)
597
  envs = AppWorldEnvironmentManager(_envs, projection_f, config)
agent_system/environments/env_package/alfworld/envs.py CHANGED
@@ -52,7 +52,6 @@ def compute_reward(info, multi_modal=False):
52
  reward = 10.0 * float(info['won'])
53
  return reward
54
 
55
- @ray.remote(num_cpus=0.2)
56
  class AlfworldWorker:
57
  """
58
  Ray remote actor that replaces the worker function.
@@ -84,7 +83,7 @@ class AlfworldWorker:
84
  return image
85
 
86
  class AlfworldEnvs(gym.Env):
87
- def __init__(self, alf_config_path, seed=0, env_num=1, group_n=1, is_train=True, env_kwargs={}):
88
  super().__init__()
89
 
90
  # Initialize Ray if not already initialized
@@ -100,9 +99,10 @@ class AlfworldEnvs(gym.Env):
100
  self.group_n = group_n
101
 
102
  # Create Ray remote actors instead of processes
 
103
  self.workers = []
104
  for i in range(self.num_processes):
105
- worker = AlfworldWorker.remote(config, seed + (i // self.group_n), base_env)
106
  self.workers.append(worker)
107
 
108
  self.prev_admissible_commands = [None for _ in range(self.num_processes)]
@@ -202,5 +202,5 @@ class AlfworldEnvs(gym.Env):
202
  for worker in self.workers:
203
  ray.kill(worker)
204
 
205
- def build_alfworld_envs(alf_config_path, seed, env_num, group_n, is_train=True, env_kwargs={}):
206
- return AlfworldEnvs(alf_config_path, seed, env_num, group_n, is_train, env_kwargs)
 
52
  reward = 10.0 * float(info['won'])
53
  return reward
54
 
 
55
  class AlfworldWorker:
56
  """
57
  Ray remote actor that replaces the worker function.
 
83
  return image
84
 
85
  class AlfworldEnvs(gym.Env):
86
+ def __init__(self, alf_config_path, seed, env_num, group_n, resources_per_worker, is_train=True, env_kwargs={}):
87
  super().__init__()
88
 
89
  # Initialize Ray if not already initialized
 
99
  self.group_n = group_n
100
 
101
  # Create Ray remote actors instead of processes
102
+ env_worker = ray.remote(**resources_per_worker)(AlfworldWorker)
103
  self.workers = []
104
  for i in range(self.num_processes):
105
+ worker = env_worker.remote(config, seed + (i // self.group_n), base_env)
106
  self.workers.append(worker)
107
 
108
  self.prev_admissible_commands = [None for _ in range(self.num_processes)]
 
202
  for worker in self.workers:
203
  ray.kill(worker)
204
 
205
+ def build_alfworld_envs(alf_config_path, seed, env_num, group_n, resources_per_worker, is_train=True, env_kwargs={}):
206
+ return AlfworldEnvs(alf_config_path, seed, env_num, group_n, resources_per_worker, is_train, env_kwargs)
agent_system/environments/env_package/appworld/envs.py CHANGED
@@ -39,7 +39,6 @@ def load_available_ports(port_file="appworld_ports.ports"):
39
 
40
  return ports
41
 
42
- @ray.remote(num_cpus=0.1)
43
  class AppWorldWorker:
44
  """
45
  Ray Actor that holds an instance of AppWorld and operates the environment
@@ -115,6 +114,7 @@ class AppWorldEnvs:
115
  env_num,
116
  group_n,
117
  start_server_id,
 
118
  port_file="appworld_ports.ports"
119
  ):
120
  super().__init__()
@@ -145,10 +145,11 @@ class AppWorldEnvs:
145
  ray.init()
146
 
147
  # Create Ray actors (workers)
 
148
  self.workers = []
149
  for i in range(self.num_processes):
150
  port = self.available_ports[i]
151
- worker = AppWorldWorker.remote(
152
  worker_id=start_server_id + i,
153
  max_interactions=self.max_interactions,
154
  port=port
@@ -240,6 +241,7 @@ def build_appworld_envs(dataset_name="train",
240
  env_num=1,
241
  group_n=1,
242
  start_server_id=0,
 
243
  ):
244
 
245
  return AppWorldEnvs(
@@ -249,4 +251,5 @@ def build_appworld_envs(dataset_name="train",
249
  env_num=env_num,
250
  group_n=group_n,
251
  start_server_id=start_server_id,
 
252
  )
 
39
 
40
  return ports
41
 
 
42
  class AppWorldWorker:
43
  """
44
  Ray Actor that holds an instance of AppWorld and operates the environment
 
114
  env_num,
115
  group_n,
116
  start_server_id,
117
+ resources_per_worker,
118
  port_file="appworld_ports.ports"
119
  ):
120
  super().__init__()
 
145
  ray.init()
146
 
147
  # Create Ray actors (workers)
148
+ env_worker = ray.remote(**resources_per_worker)(AppWorldWorker)
149
  self.workers = []
150
  for i in range(self.num_processes):
151
  port = self.available_ports[i]
152
+ worker = env_worker.remote(
153
  worker_id=start_server_id + i,
154
  max_interactions=self.max_interactions,
155
  port=port
 
241
  env_num=1,
242
  group_n=1,
243
  start_server_id=0,
244
+ resources_per_worker={"num_cpus": 0.1},
245
  ):
246
 
247
  return AppWorldEnvs(
 
251
  env_num=env_num,
252
  group_n=group_n,
253
  start_server_id=start_server_id,
254
+ resources_per_worker=resources_per_worker
255
  )
agent_system/environments/env_package/gym_cards/envs.py CHANGED
@@ -18,7 +18,7 @@ import ray
18
  import numpy as np
19
  from gym_cards.envs import Point24Env, EZPointEnv, BlackjackEnv, NumberLineEnv
20
 
21
- @ray.remote(num_cpus=0.2)
22
  class GymCardsWorker:
23
  """
24
  Ray remote actor that replaces the worker function.
@@ -66,6 +66,7 @@ class GymMultiProcessEnv(gym.Env):
66
  seed=0,
67
  env_num=1,
68
  group_n=1,
 
69
  is_train=True):
70
  super().__init__()
71
 
@@ -80,11 +81,12 @@ class GymMultiProcessEnv(gym.Env):
80
  self.num_processes = env_num * group_n
81
 
82
  np.random.seed(seed)
83
-
84
  # Create Ray remote actors instead of processes
 
85
  self.workers = []
86
  for _ in range(self.num_processes):
87
- worker = GymCardsWorker.remote(self.env_id)
88
  self.workers.append(worker)
89
 
90
  def step(self, actions):
@@ -162,6 +164,7 @@ def build_gymcards_envs(env_name,
162
  seed,
163
  env_num,
164
  group_n,
 
165
  is_train=True):
166
  """
167
  Externally exposed constructor function to create parallel Gym environments.
@@ -176,5 +179,6 @@ def build_gymcards_envs(env_name,
176
  seed=seed,
177
  env_num=env_num,
178
  group_n=group_n,
 
179
  is_train=is_train,
180
  )
 
18
  import numpy as np
19
  from gym_cards.envs import Point24Env, EZPointEnv, BlackjackEnv, NumberLineEnv
20
 
21
+
22
  class GymCardsWorker:
23
  """
24
  Ray remote actor that replaces the worker function.
 
66
  seed=0,
67
  env_num=1,
68
  group_n=1,
69
+ resources_per_worker={"num_cpus": 0.1},
70
  is_train=True):
71
  super().__init__()
72
 
 
81
  self.num_processes = env_num * group_n
82
 
83
  np.random.seed(seed)
84
+
85
  # Create Ray remote actors instead of processes
86
+ env_worker = ray.remote(**resources_per_worker)(GymCardsWorker)
87
  self.workers = []
88
  for _ in range(self.num_processes):
89
+ worker = env_worker.remote(self.env_id)
90
  self.workers.append(worker)
91
 
92
  def step(self, actions):
 
164
  seed,
165
  env_num,
166
  group_n,
167
+ resources_per_worker,
168
  is_train=True):
169
  """
170
  Externally exposed constructor function to create parallel Gym environments.
 
179
  seed=seed,
180
  env_num=env_num,
181
  group_n=group_n,
182
+ resources_per_worker=resources_per_worker,
183
  is_train=is_train,
184
  )
agent_system/environments/env_package/sokoban/envs.py CHANGED
@@ -18,7 +18,6 @@ import gym
18
  from agent_system.environments.env_package.sokoban.sokoban import SokobanEnv
19
  import numpy as np
20
 
21
- @ray.remote(num_cpus=0.2)
22
  class SokobanWorker:
23
  """
24
  Ray remote actor that replaces the worker function.
@@ -57,6 +56,7 @@ class SokobanMultiProcessEnv(gym.Env):
57
  env_num=1,
58
  group_n=1,
59
  mode='rgb_array',
 
60
  is_train=True,
61
  env_kwargs=None):
62
  """
@@ -82,9 +82,10 @@ class SokobanMultiProcessEnv(gym.Env):
82
  env_kwargs = {}
83
 
84
  # Create Ray remote actors instead of processes
 
85
  self.workers = []
86
  for i in range(self.num_processes):
87
- worker = SokobanWorker.remote(self.mode, env_kwargs)
88
  self.workers.append(worker)
89
 
90
  def step(self, actions):
@@ -178,6 +179,7 @@ def build_sokoban_envs(
178
  env_num=1,
179
  group_n=1,
180
  mode='rgb_array',
 
181
  is_train=True,
182
  env_kwargs=None):
183
- return SokobanMultiProcessEnv(seed, env_num, group_n, mode, is_train, env_kwargs=env_kwargs)
 
18
  from agent_system.environments.env_package.sokoban.sokoban import SokobanEnv
19
  import numpy as np
20
 
 
21
  class SokobanWorker:
22
  """
23
  Ray remote actor that replaces the worker function.
 
56
  env_num=1,
57
  group_n=1,
58
  mode='rgb_array',
59
+ resources_per_worker={"num_cpus": 0.1},
60
  is_train=True,
61
  env_kwargs=None):
62
  """
 
82
  env_kwargs = {}
83
 
84
  # Create Ray remote actors instead of processes
85
+ env_worker = ray.remote(**resources_per_worker)(SokobanWorker)
86
  self.workers = []
87
  for i in range(self.num_processes):
88
+ worker = env_worker.remote(self.mode, env_kwargs)
89
  self.workers.append(worker)
90
 
91
  def step(self, actions):
 
179
  env_num=1,
180
  group_n=1,
181
  mode='rgb_array',
182
+ resources_per_worker={"num_cpus": 0.1},
183
  is_train=True,
184
  env_kwargs=None):
185
+ return SokobanMultiProcessEnv(seed, env_num, group_n, mode, resources_per_worker, is_train, env_kwargs=env_kwargs)
agent_system/environments/env_package/webshop/envs.py CHANGED
@@ -21,7 +21,6 @@ import numpy as np
21
  # Ray remote worker actor -----------------------------------------------------
22
  # -----------------------------------------------------------------------------
23
 
24
- @ray.remote(num_cpus=0.2)
25
  class WebshopWorker:
26
  """Ray remote actor that replaces the worker function.
27
  Each actor hosts a *WebAgentTextEnv* instance.
@@ -94,9 +93,10 @@ class WebshopMultiProcessEnv(gym.Env):
94
  """
95
  def __init__(
96
  self,
97
- seed: int = 0,
98
- env_num: int = 1,
99
- group_n: int = 1,
 
100
  is_train: bool = True,
101
  env_kwargs: dict = None,
102
  ) -> None:
@@ -117,10 +117,10 @@ class WebshopMultiProcessEnv(gym.Env):
117
  self._env_kwargs = env_kwargs if env_kwargs is not None else {'observation_mode': 'text', 'num_products': None}
118
 
119
  # -------------------------- Ray actors setup --------------------------
 
120
  self._workers = []
121
-
122
  for i in range(self.num_processes):
123
- worker = WebshopWorker.remote(seed + (i // self.group_n), self._env_kwargs)
124
  self._workers.append(worker)
125
 
126
  # Get goals from the first worker
@@ -239,9 +239,10 @@ class WebshopMultiProcessEnv(gym.Env):
239
  # -----------------------------------------------------------------------------
240
 
241
  def build_webshop_envs(
242
- seed: int = 0,
243
- env_num: int = 1,
244
- group_n: int = 1,
 
245
  is_train: bool = True,
246
  env_kwargs: dict = None,
247
  ):
@@ -250,6 +251,7 @@ def build_webshop_envs(
250
  seed=seed,
251
  env_num=env_num,
252
  group_n=group_n,
 
253
  is_train=is_train,
254
  env_kwargs=env_kwargs,
255
  )
 
21
  # Ray remote worker actor -----------------------------------------------------
22
  # -----------------------------------------------------------------------------
23
 
 
24
  class WebshopWorker:
25
  """Ray remote actor that replaces the worker function.
26
  Each actor hosts a *WebAgentTextEnv* instance.
 
93
  """
94
  def __init__(
95
  self,
96
+ seed: int,
97
+ env_num: int,
98
+ group_n: int,
99
+ resources_per_worker: dict,
100
  is_train: bool = True,
101
  env_kwargs: dict = None,
102
  ) -> None:
 
117
  self._env_kwargs = env_kwargs if env_kwargs is not None else {'observation_mode': 'text', 'num_products': None}
118
 
119
  # -------------------------- Ray actors setup --------------------------
120
+ env_worker = ray.remote(**resources_per_worker)(WebshopWorker)
121
  self._workers = []
 
122
  for i in range(self.num_processes):
123
+ worker = env_worker.remote(seed + (i // self.group_n), self._env_kwargs)
124
  self._workers.append(worker)
125
 
126
  # Get goals from the first worker
 
239
  # -----------------------------------------------------------------------------
240
 
241
  def build_webshop_envs(
242
+ seed: int,
243
+ env_num: int,
244
+ group_n: int,
245
+ resources_per_worker: dict,
246
  is_train: bool = True,
247
  env_kwargs: dict = None,
248
  ):
 
251
  seed=seed,
252
  env_num=env_num,
253
  group_n=group_n,
254
+ resources_per_worker=resources_per_worker,
255
  is_train=is_train,
256
  env_kwargs=env_kwargs,
257
  )
examples/dapo_trainer/run_alfworld.sh CHANGED
@@ -2,6 +2,8 @@ set -x
2
  ENGINE=${1:-vllm}
3
  export VLLM_ATTENTION_BACKEND=XFORMERS
4
 
 
 
5
  train_data_size=16
6
  val_data_size=128
7
  group_size=8
@@ -60,6 +62,7 @@ python3 -m verl.trainer.main_ppo \
60
  env.seed=0 \
61
  env.max_steps=50 \
62
  env.rollout.n=${group_size} \
 
63
  trainer.critic_warmup=0 \
64
  trainer.logger=['console','wandb'] \
65
  trainer.project_name='verl_agent_alfworld' \
 
2
  ENGINE=${1:-vllm}
3
  export VLLM_ATTENTION_BACKEND=XFORMERS
4
 
5
+ num_cpus_per_env_worker=0.1 # The CPU resource allocated for each environment worker. If you want to use less CPU resources, you can decrease this value.
6
+
7
  train_data_size=16
8
  val_data_size=128
9
  group_size=8
 
62
  env.seed=0 \
63
  env.max_steps=50 \
64
  env.rollout.n=${group_size} \
65
+ env.resources_per_worker.num_cpus=$num_cpus_per_env_worker \
66
  trainer.critic_warmup=0 \
67
  trainer.logger=['console','wandb'] \
68
  trainer.project_name='verl_agent_alfworld' \
examples/dapo_trainer/run_webshop.sh CHANGED
@@ -2,6 +2,8 @@ set -x
2
  ENGINE=${1:-vllm}
3
  export VLLM_ATTENTION_BACKEND=XFORMERS
4
 
 
 
5
  train_data_size=16
6
  val_data_size=128
7
  group_size=8
@@ -60,6 +62,7 @@ python3 -m verl.trainer.main_ppo \
60
  env.seed=0 \
61
  env.max_steps=15 \
62
  env.rollout.n=${group_size} \
 
63
  trainer.critic_warmup=0 \
64
  trainer.logger=['console','wandb'] \
65
  trainer.project_name='verl_agent_webshop' \
 
2
  ENGINE=${1:-vllm}
3
  export VLLM_ATTENTION_BACKEND=XFORMERS
4
 
5
+ num_cpus_per_env_worker=0.1 # The CPU resource allocated for each environment worker. If you want to use less CPU resources, you can decrease this value.
6
+
7
  train_data_size=16
8
  val_data_size=128
9
  group_size=8
 
62
  env.seed=0 \
63
  env.max_steps=15 \
64
  env.rollout.n=${group_size} \
65
+ env.resources_per_worker.num_cpus=$num_cpus_per_env_worker \
66
  trainer.critic_warmup=0 \
67
  trainer.logger=['console','wandb'] \
68
  trainer.project_name='verl_agent_webshop' \
examples/gigpo_dynamic_trainer/run_alfworld.sh CHANGED
@@ -2,6 +2,8 @@ set -x
2
  ENGINE=${1:-vllm}
3
  export VLLM_ATTENTION_BACKEND=XFORMERS
4
 
 
 
5
  train_data_size=16
6
  val_data_size=128
7
  group_size=8
@@ -64,6 +66,7 @@ python3 -m verl.trainer.main_ppo \
64
  env.seed=0 \
65
  env.max_steps=50 \
66
  env.rollout.n=$group_size \
 
67
  trainer.critic_warmup=0 \
68
  trainer.logger=['console','wandb'] \
69
  trainer.project_name='verl_agent_alfworld' \
 
2
  ENGINE=${1:-vllm}
3
  export VLLM_ATTENTION_BACKEND=XFORMERS
4
 
5
+ num_cpus_per_env_worker=0.1 # The CPU resource allocated for each environment worker. If you want to use less CPU resources, you can decrease this value.
6
+
7
  train_data_size=16
8
  val_data_size=128
9
  group_size=8
 
66
  env.seed=0 \
67
  env.max_steps=50 \
68
  env.rollout.n=$group_size \
69
+ env.resources_per_worker.num_cpus=$num_cpus_per_env_worker \
70
  trainer.critic_warmup=0 \
71
  trainer.logger=['console','wandb'] \
72
  trainer.project_name='verl_agent_alfworld' \
examples/gigpo_dynamic_trainer/run_sokoban.sh CHANGED
@@ -2,6 +2,8 @@ set -x
2
  ENGINE=${1:-vllm}
3
  export VLLM_ATTENTION_BACKEND=XFORMERS
4
 
 
 
5
  train_data_size=32
6
  val_data_size=128
7
  group_size=8
@@ -66,6 +68,7 @@ python3 -m verl.trainer.main_ppo \
66
  env.max_steps=15 \
67
  env.rollout.n=$group_size \
68
  env.sokoban.mode='rgb_array' \
 
69
  trainer.critic_warmup=0 \
70
  trainer.logger=['console','wandb'] \
71
  trainer.project_name='verl_agent_sokoban' \
 
2
  ENGINE=${1:-vllm}
3
  export VLLM_ATTENTION_BACKEND=XFORMERS
4
 
5
+ num_cpus_per_env_worker=0.1 # The CPU resource allocated for each environment worker. If you want to use less CPU resources, you can decrease this value.
6
+
7
  train_data_size=32
8
  val_data_size=128
9
  group_size=8
 
68
  env.max_steps=15 \
69
  env.rollout.n=$group_size \
70
  env.sokoban.mode='rgb_array' \
71
+ env.resources_per_worker.num_cpus=$num_cpus_per_env_worker \
72
  trainer.critic_warmup=0 \
73
  trainer.logger=['console','wandb'] \
74
  trainer.project_name='verl_agent_sokoban' \
examples/gigpo_dynamic_trainer/run_webshop.sh CHANGED
@@ -2,6 +2,8 @@ set -x
2
  ENGINE=${1:-vllm}
3
  export VLLM_ATTENTION_BACKEND=XFORMERS
4
 
 
 
5
  train_data_size=16
6
  val_data_size=128
7
  group_size=8
@@ -64,6 +66,7 @@ python3 -m verl.trainer.main_ppo \
64
  env.seed=0 \
65
  env.max_steps=15 \
66
  env.rollout.n=$group_size \
 
67
  trainer.critic_warmup=0 \
68
  trainer.logger=['console','wandb'] \
69
  trainer.project_name='verl_agent_webshop' \
 
2
  ENGINE=${1:-vllm}
3
  export VLLM_ATTENTION_BACKEND=XFORMERS
4
 
5
+ num_cpus_per_env_worker=0.1 # The CPU resource allocated for each environment worker. If you want to use less CPU resources, you can decrease this value.
6
+
7
  train_data_size=16
8
  val_data_size=128
9
  group_size=8
 
66
  env.seed=0 \
67
  env.max_steps=15 \
68
  env.rollout.n=$group_size \
69
+ env.resources_per_worker.num_cpus=$num_cpus_per_env_worker \
70
  trainer.critic_warmup=0 \
71
  trainer.logger=['console','wandb'] \
72
  trainer.project_name='verl_agent_webshop' \
examples/gigpo_trainer/run_alfworld.sh CHANGED
@@ -2,6 +2,8 @@ set -x
2
  ENGINE=${1:-vllm}
3
  export VLLM_ATTENTION_BACKEND=XFORMERS
4
 
 
 
5
  train_data_size=16
6
  val_data_size=128
7
  group_size=8
@@ -56,6 +58,7 @@ python3 -m verl.trainer.main_ppo \
56
  env.seed=0 \
57
  env.max_steps=50 \
58
  env.rollout.n=$group_size \
 
59
  trainer.critic_warmup=0 \
60
  trainer.logger=['console','wandb'] \
61
  trainer.project_name='verl_agent_alfworld' \
 
2
  ENGINE=${1:-vllm}
3
  export VLLM_ATTENTION_BACKEND=XFORMERS
4
 
5
+ num_cpus_per_env_worker=0.1 # The CPU resource allocated for each environment worker. If you want to use less CPU resources, you can decrease this value.
6
+
7
  train_data_size=16
8
  val_data_size=128
9
  group_size=8
 
58
  env.seed=0 \
59
  env.max_steps=50 \
60
  env.rollout.n=$group_size \
61
+ env.resources_per_worker.num_cpus=$num_cpus_per_env_worker \
62
  trainer.critic_warmup=0 \
63
  trainer.logger=['console','wandb'] \
64
  trainer.project_name='verl_agent_alfworld' \
examples/gigpo_trainer/run_alfworld_lora.sh CHANGED
@@ -2,6 +2,8 @@ set -x
2
  ENGINE=${1:-vllm}
3
  export VLLM_ATTENTION_BACKEND=XFORMERS
4
 
 
 
5
  train_data_size=16
6
  val_data_size=128
7
  group_size=8
@@ -56,6 +58,7 @@ python3 -m verl.trainer.main_ppo \
56
  env.seed=0 \
57
  env.max_steps=50 \
58
  env.rollout.n=$group_size \
 
59
  trainer.critic_warmup=0 \
60
  trainer.logger=['console','wandb'] \
61
  trainer.project_name='verl_agent_alfworld' \
 
2
  ENGINE=${1:-vllm}
3
  export VLLM_ATTENTION_BACKEND=XFORMERS
4
 
5
+ num_cpus_per_env_worker=0.1 # The CPU resource allocated for each environment worker. If you want to use less CPU resources, you can decrease this value.
6
+
7
  train_data_size=16
8
  val_data_size=128
9
  group_size=8
 
58
  env.seed=0 \
59
  env.max_steps=50 \
60
  env.rollout.n=$group_size \
61
+ env.resources_per_worker.num_cpus=$num_cpus_per_env_worker \
62
  trainer.critic_warmup=0 \
63
  trainer.logger=['console','wandb'] \
64
  trainer.project_name='verl_agent_alfworld' \
examples/gigpo_trainer/run_blackjack.sh CHANGED
@@ -2,6 +2,8 @@ set -x
2
  ENGINE=${1:-vllm}
3
  export VLLM_ATTENTION_BACKEND=XFORMERS
4
 
 
 
5
  train_data_size=32
6
  val_data_size=128
7
  group_size=8
@@ -56,6 +58,7 @@ python3 -m verl.trainer.main_ppo \
56
  env.seed=0 \
57
  env.max_steps=15 \
58
  env.rollout.n=$group_size \
 
59
  trainer.critic_warmup=0 \
60
  trainer.logger=['console','wandb'] \
61
  trainer.project_name='verl_agent_blackjack' \
 
2
  ENGINE=${1:-vllm}
3
  export VLLM_ATTENTION_BACKEND=XFORMERS
4
 
5
+ num_cpus_per_env_worker=0.1 # The CPU resource allocated for each environment worker. If you want to use less CPU resources, you can decrease this value.
6
+
7
  train_data_size=32
8
  val_data_size=128
9
  group_size=8
 
58
  env.seed=0 \
59
  env.max_steps=15 \
60
  env.rollout.n=$group_size \
61
+ env.resources_per_worker.num_cpus=$num_cpus_per_env_worker \
62
  trainer.critic_warmup=0 \
63
  trainer.logger=['console','wandb'] \
64
  trainer.project_name='verl_agent_blackjack' \
examples/gigpo_trainer/run_ezpoints.sh CHANGED
@@ -2,6 +2,8 @@ set -x
2
  ENGINE=${1:-vllm}
3
  export VLLM_ATTENTION_BACKEND=XFORMERS
4
 
 
 
5
  train_data_size=16
6
  val_data_size=128
7
  group_size=8
@@ -56,6 +58,7 @@ python3 -m verl.trainer.main_ppo \
56
  env.seed=0 \
57
  env.max_steps=8 \
58
  env.rollout.n=${group_size} \
 
59
  trainer.critic_warmup=0 \
60
  trainer.logger=['console','wandb'] \
61
  trainer.project_name='verl_agent_ezpoints' \
 
2
  ENGINE=${1:-vllm}
3
  export VLLM_ATTENTION_BACKEND=XFORMERS
4
 
5
+ num_cpus_per_env_worker=0.1 # The CPU resource allocated for each environment worker. If you want to use less CPU resources, you can decrease this value.
6
+
7
  train_data_size=16
8
  val_data_size=128
9
  group_size=8
 
58
  env.seed=0 \
59
  env.max_steps=8 \
60
  env.rollout.n=${group_size} \
61
+ env.resources_per_worker.num_cpus=$num_cpus_per_env_worker \
62
  trainer.critic_warmup=0 \
63
  trainer.logger=['console','wandb'] \
64
  trainer.project_name='verl_agent_ezpoints' \
examples/gigpo_trainer/run_numberline.sh CHANGED
@@ -2,6 +2,8 @@ set -x
2
  ENGINE=${1:-vllm}
3
  export VLLM_ATTENTION_BACKEND=XFORMERS
4
 
 
 
5
  train_data_size=16
6
  val_data_size=128
7
  group_size=8
@@ -56,6 +58,7 @@ python3 -m verl.trainer.main_ppo \
56
  env.seed=0 \
57
  env.max_steps=10 \
58
  env.rollout.n=$group_size \
 
59
  trainer.critic_warmup=0 \
60
  trainer.logger=['console','wandb'] \
61
  trainer.project_name='verl_agent_numberLine' \
 
2
  ENGINE=${1:-vllm}
3
  export VLLM_ATTENTION_BACKEND=XFORMERS
4
 
5
+ num_cpus_per_env_worker=0.1 # The CPU resource allocated for each environment worker. If you want to use less CPU resources, you can decrease this value.
6
+
7
  train_data_size=16
8
  val_data_size=128
9
  group_size=8
 
58
  env.seed=0 \
59
  env.max_steps=10 \
60
  env.rollout.n=$group_size \
61
+ env.resources_per_worker.num_cpus=$num_cpus_per_env_worker \
62
  trainer.critic_warmup=0 \
63
  trainer.logger=['console','wandb'] \
64
  trainer.project_name='verl_agent_numberLine' \
examples/gigpo_trainer/run_sokoban.sh CHANGED
@@ -2,6 +2,8 @@ set -x
2
  ENGINE=${1:-vllm}
3
  export VLLM_ATTENTION_BACKEND=XFORMERS
4
 
 
 
5
  train_data_size=32
6
  val_data_size=128
7
  group_size=8
@@ -57,6 +59,7 @@ python3 -m verl.trainer.main_ppo \
57
  env.max_steps=15 \
58
  env.rollout.n=$group_size \
59
  env.sokoban.mode='rgb_array' \
 
60
  trainer.critic_warmup=0 \
61
  trainer.logger=['console','wandb'] \
62
  trainer.project_name='verl_agent_sokoban' \
 
2
  ENGINE=${1:-vllm}
3
  export VLLM_ATTENTION_BACKEND=XFORMERS
4
 
5
+ num_cpus_per_env_worker=0.1 # The CPU resource allocated for each environment worker. If you want to use less CPU resources, you can decrease this value.
6
+
7
  train_data_size=32
8
  val_data_size=128
9
  group_size=8
 
59
  env.max_steps=15 \
60
  env.rollout.n=$group_size \
61
  env.sokoban.mode='rgb_array' \
62
+ env.resources_per_worker.num_cpus=$num_cpus_per_env_worker \
63
  trainer.critic_warmup=0 \
64
  trainer.logger=['console','wandb'] \
65
  trainer.project_name='verl_agent_sokoban' \
examples/gigpo_trainer/run_webshop.sh CHANGED
@@ -2,6 +2,8 @@ set -x
2
  ENGINE=${1:-vllm}
3
  export VLLM_ATTENTION_BACKEND=XFORMERS
4
 
 
 
5
  train_data_size=16
6
  val_data_size=128
7
  group_size=8
@@ -56,6 +58,7 @@ python3 -m verl.trainer.main_ppo \
56
  env.seed=0 \
57
  env.max_steps=15 \
58
  env.rollout.n=$group_size \
 
59
  trainer.critic_warmup=0 \
60
  trainer.logger=['console','wandb'] \
61
  trainer.project_name='verl_agent_webshop' \
 
2
  ENGINE=${1:-vllm}
3
  export VLLM_ATTENTION_BACKEND=XFORMERS
4
 
5
+ num_cpus_per_env_worker=0.1 # The CPU resource allocated for each environment worker. If you want to use less CPU resources, you can decrease this value.
6
+
7
  train_data_size=16
8
  val_data_size=128
9
  group_size=8
 
58
  env.seed=0 \
59
  env.max_steps=15 \
60
  env.rollout.n=$group_size \
61
+ env.resources_per_worker.num_cpus=$num_cpus_per_env_worker \
62
  trainer.critic_warmup=0 \
63
  trainer.logger=['console','wandb'] \
64
  trainer.project_name='verl_agent_webshop' \
examples/gigpo_trainer/run_webshop_lora.sh CHANGED
@@ -2,6 +2,8 @@ set -x
2
  ENGINE=${1:-vllm}
3
  export VLLM_ATTENTION_BACKEND=XFORMERS
4
 
 
 
5
  train_data_size=16
6
  val_data_size=128
7
  group_size=8
@@ -57,6 +59,7 @@ python3 -m verl.trainer.main_ppo \
57
  env.seed=0 \
58
  env.max_steps=15 \
59
  env.rollout.n=$group_size \
 
60
  trainer.critic_warmup=0 \
61
  trainer.logger=['console','wandb'] \
62
  trainer.project_name='verl_agent_webshop' \
 
2
  ENGINE=${1:-vllm}
3
  export VLLM_ATTENTION_BACKEND=XFORMERS
4
 
5
+ num_cpus_per_env_worker=0.1 # The CPU resource allocated for each environment worker. If you want to use less CPU resources, you can decrease this value.
6
+
7
  train_data_size=16
8
  val_data_size=128
9
  group_size=8
 
59
  env.seed=0 \
60
  env.max_steps=15 \
61
  env.rollout.n=$group_size \
62
+ env.resources_per_worker.num_cpus=$num_cpus_per_env_worker \
63
  trainer.critic_warmup=0 \
64
  trainer.logger=['console','wandb'] \
65
  trainer.project_name='verl_agent_webshop' \
examples/gigpo_trainer/run_webshop_qwen3.sh CHANGED
@@ -2,6 +2,8 @@ set -x
2
  ENGINE=${1:-vllm}
3
  export VLLM_ATTENTION_BACKEND=XFORMERS
4
 
 
 
5
  train_data_size=16
6
  val_data_size=128
7
  group_size=8
@@ -54,6 +56,7 @@ python3 -m verl.trainer.main_ppo \
54
  env.seed=0 \
55
  env.max_steps=15 \
56
  env.rollout.n=$group_size \
 
57
  trainer.critic_warmup=0 \
58
  trainer.logger=['console','wandb'] \
59
  trainer.project_name='verl_agent_webshop' \
 
2
  ENGINE=${1:-vllm}
3
  export VLLM_ATTENTION_BACKEND=XFORMERS
4
 
5
+ num_cpus_per_env_worker=0.1 # The CPU resource allocated for each environment worker. If you want to use less CPU resources, you can decrease this value.
6
+
7
  train_data_size=16
8
  val_data_size=128
9
  group_size=8
 
56
  env.seed=0 \
57
  env.max_steps=15 \
58
  env.rollout.n=$group_size \
59
+ env.resources_per_worker.num_cpus=$num_cpus_per_env_worker \
60
  trainer.critic_warmup=0 \
61
  trainer.logger=['console','wandb'] \
62
  trainer.project_name='verl_agent_webshop' \
examples/grpo_trainer/run_alfworld.sh CHANGED
@@ -2,6 +2,8 @@ set -x
2
  ENGINE=${1:-vllm}
3
  export VLLM_ATTENTION_BACKEND=XFORMERS
4
 
 
 
5
  train_data_size=16
6
  val_data_size=128
7
  group_size=8
@@ -52,6 +54,7 @@ python3 -m verl.trainer.main_ppo \
52
  env.seed=0 \
53
  env.max_steps=50 \
54
  env.rollout.n=$group_size \
 
55
  trainer.critic_warmup=0 \
56
  trainer.logger=['console','wandb'] \
57
  trainer.project_name='verl_agent_alfworld' \
 
2
  ENGINE=${1:-vllm}
3
  export VLLM_ATTENTION_BACKEND=XFORMERS
4
 
5
+ num_cpus_per_env_worker=0.1 # The CPU resource allocated for each environment worker. If you want to use less CPU resources, you can decrease this value.
6
+
7
  train_data_size=16
8
  val_data_size=128
9
  group_size=8
 
54
  env.seed=0 \
55
  env.max_steps=50 \
56
  env.rollout.n=$group_size \
57
+ env.resources_per_worker.num_cpus=$num_cpus_per_env_worker \
58
  trainer.critic_warmup=0 \
59
  trainer.logger=['console','wandb'] \
60
  trainer.project_name='verl_agent_alfworld' \
examples/grpo_trainer/run_balckjack.sh CHANGED
@@ -2,6 +2,8 @@ set -x
2
  ENGINE=${1:-vllm}
3
  export VLLM_ATTENTION_BACKEND=XFORMERS
4
 
 
 
5
  train_data_size=32
6
  val_data_size=128
7
  group_size=8
@@ -52,6 +54,7 @@ python3 -m verl.trainer.main_ppo \
52
  env.seed=0 \
53
  env.max_steps=15 \
54
  env.rollout.n=$group_size \
 
55
  trainer.critic_warmup=0 \
56
  trainer.logger=['console','wandb'] \
57
  trainer.project_name='verl_agent_blackjack' \
 
2
  ENGINE=${1:-vllm}
3
  export VLLM_ATTENTION_BACKEND=XFORMERS
4
 
5
+ num_cpus_per_env_worker=0.1 # The CPU resource allocated for each environment worker. If you want to use less CPU resources, you can decrease this value.
6
+
7
  train_data_size=32
8
  val_data_size=128
9
  group_size=8
 
54
  env.seed=0 \
55
  env.max_steps=15 \
56
  env.rollout.n=$group_size \
57
+ env.resources_per_worker.num_cpus=$num_cpus_per_env_worker \
58
  trainer.critic_warmup=0 \
59
  trainer.logger=['console','wandb'] \
60
  trainer.project_name='verl_agent_blackjack' \
examples/grpo_trainer/run_sokoban.sh CHANGED
@@ -2,6 +2,8 @@ set -x
2
  ENGINE=${1:-vllm}
3
  export VLLM_ATTENTION_BACKEND=XFORMERS
4
 
 
 
5
  train_data_size=32
6
  val_data_size=128
7
  group_size=8
@@ -53,6 +55,7 @@ python3 -m verl.trainer.main_ppo \
53
  env.max_steps=15 \
54
  env.rollout.n=$group_size \
55
  env.sokoban.mode='rgb_array' \
 
56
  trainer.critic_warmup=0 \
57
  trainer.logger=['console','wandb'] \
58
  trainer.project_name='verl_agent_sokoban' \
 
2
  ENGINE=${1:-vllm}
3
  export VLLM_ATTENTION_BACKEND=XFORMERS
4
 
5
+ num_cpus_per_env_worker=0.1 # The CPU resource allocated for each environment worker. If you want to use less CPU resources, you can decrease this value.
6
+
7
  train_data_size=32
8
  val_data_size=128
9
  group_size=8
 
55
  env.max_steps=15 \
56
  env.rollout.n=$group_size \
57
  env.sokoban.mode='rgb_array' \
58
+ env.resources_per_worker.num_cpus=$num_cpus_per_env_worker \
59
  trainer.critic_warmup=0 \
60
  trainer.logger=['console','wandb'] \
61
  trainer.project_name='verl_agent_sokoban' \
examples/grpo_trainer/run_webshop.sh CHANGED
@@ -2,6 +2,8 @@ set -x
2
  ENGINE=${1:-vllm}
3
  export VLLM_ATTENTION_BACKEND=XFORMERS
4
 
 
 
5
  train_data_size=16
6
  val_data_size=128
7
  group_size=8
@@ -52,6 +54,7 @@ python3 -m verl.trainer.main_ppo \
52
  env.seed=0 \
53
  env.max_steps=15 \
54
  env.rollout.n=$group_size \
 
55
  trainer.critic_warmup=0 \
56
  trainer.logger=['console','wandb'] \
57
  trainer.project_name='verl_agent_webshop' \
 
2
  ENGINE=${1:-vllm}
3
  export VLLM_ATTENTION_BACKEND=XFORMERS
4
 
5
+ num_cpus_per_env_worker=0.1 # The CPU resource allocated for each environment worker. If you want to use less CPU resources, you can decrease this value.
6
+
7
  train_data_size=16
8
  val_data_size=128
9
  group_size=8
 
54
  env.seed=0 \
55
  env.max_steps=15 \
56
  env.rollout.n=$group_size \
57
+ env.resources_per_worker.num_cpus=$num_cpus_per_env_worker \
58
  trainer.critic_warmup=0 \
59
  trainer.logger=['console','wandb'] \
60
  trainer.project_name='verl_agent_webshop' \
examples/ppo_trainer/run_alfworld.sh CHANGED
@@ -2,6 +2,8 @@ set -x
2
  ENGINE=${1:-vllm}
3
  export VLLM_ATTENTION_BACKEND=XFORMERS
4
 
 
 
5
  train_data_size=128 # match GRPO and GiGPO configuration (16 × 8)
6
  val_data_size=128
7
 
@@ -56,6 +58,7 @@ python3 -m verl.trainer.main_ppo \
56
  env.env_name=alfworld/AlfredTWEnv \
57
  env.seed=0 \
58
  env.max_steps=50 \
 
59
  trainer.critic_warmup=0 \
60
  trainer.logger=['console','wandb'] \
61
  trainer.project_name='verl_agent_alfworld' \
 
2
  ENGINE=${1:-vllm}
3
  export VLLM_ATTENTION_BACKEND=XFORMERS
4
 
5
+ num_cpus_per_env_worker=0.1 # The CPU resource allocated for each environment worker. If you want to use less CPU resources, you can decrease this value.
6
+
7
  train_data_size=128 # match GRPO and GiGPO configuration (16 × 8)
8
  val_data_size=128
9
 
 
58
  env.env_name=alfworld/AlfredTWEnv \
59
  env.seed=0 \
60
  env.max_steps=50 \
61
+ env.resources_per_worker.num_cpus=$num_cpus_per_env_worker \
62
  trainer.critic_warmup=0 \
63
  trainer.logger=['console','wandb'] \
64
  trainer.project_name='verl_agent_alfworld' \
examples/ppo_trainer/run_webshop.sh CHANGED
@@ -2,6 +2,8 @@ set -x
2
  ENGINE=${1:-vllm}
3
  export VLLM_ATTENTION_BACKEND=XFORMERS
4
 
 
 
5
  train_data_size=128 # match GRPO and GiGPO configuration (16 × 8)
6
  val_data_size=128
7
 
@@ -56,6 +58,7 @@ python3 -m verl.trainer.main_ppo \
56
  env.env_name=Webshop \
57
  env.seed=0 \
58
  env.max_steps=15 \
 
59
  trainer.critic_warmup=0 \
60
  trainer.logger=['console','wandb'] \
61
  trainer.project_name='verl_agent_webshop' \
 
2
  ENGINE=${1:-vllm}
3
  export VLLM_ATTENTION_BACKEND=XFORMERS
4
 
5
+ num_cpus_per_env_worker=0.1 # The CPU resource allocated for each environment worker. If you want to use less CPU resources, you can decrease this value.
6
+
7
  train_data_size=128 # match GRPO and GiGPO configuration (16 × 8)
8
  val_data_size=128
9
 
 
58
  env.env_name=Webshop \
59
  env.seed=0 \
60
  env.max_steps=15 \
61
+ env.resources_per_worker.num_cpus=$num_cpus_per_env_worker \
62
  trainer.critic_warmup=0 \
63
  trainer.logger=['console','wandb'] \
64
  trainer.project_name='verl_agent_webshop' \
verl/trainer/config/ppo_trainer.yaml CHANGED
@@ -288,6 +288,9 @@ env:
288
  seed: 0
289
  max_steps: 50
290
  history_length: 2
 
 
 
291
  rollout:
292
  n: -1 # the group number of envs (for GRPO and GiGPO). -1 means disable env grouping.
293
 
 
288
  seed: 0
289
  max_steps: 50
290
  history_length: 2
291
+ resources_per_worker: # resources for each env worker
292
+ num_cpus: 0.1
293
+ num_gpus: 0
294
  rollout:
295
  n: -1 # the group number of envs (for GRPO and GiGPO). -1 means disable env grouping.
296