~/tmp/baselines/baselines/deepq/deepq.py in learn(env, network, seed, lr, total_timesteps, buffer_size, exploration_fraction, exploration_final_eps, train_freq, batch_size, print_freq, checkpoint_freq, checkpoint_path, learning_starts, gamma, target_network_update_freq, prioritized_replay, prioritized_replay_alpha, prioritized_replay_beta0, prioritized_replay_beta_iters, prioritized_replay_eps, param_noise, callback, load_path, **network_kwargs)
202 make_obs_ph=make_obs_ph,
203 q_func=q_func,
--> 204 num_actions=env.action_space.n,
205 optimizer=tf.train.AdamOptimizer(learning_rate=lr),
206 gamma=gamma,
AttributeError: 'list' object has no attribute 'n'
from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv
from baselines.run import get_learn_function
from multiagent.environment import MultiAgentEnv
import multiagent.scenarios as scenarios
common_kwargs = dict(total_timesteps=30000, network="mlp", gamma=1.0, seed=0)
learn_kwargs = {
'a2c' : dict(nsteps=32, value_network='copy', lr=0.05),
'acktr': dict(nsteps=32, value_network='copy'),
'deepq': dict(total_timesteps=20000),
'ppo2': dict(value_network='copy'),
'trpo_mpi': {}
}
alg = "deepq"
kwargs = common_kwargs.copy()
kwargs.update(learn_kwargs[alg])
learn_fn = lambda e: get_learn_function(alg)(env=e, **kwargs)
def env_fn():
scenario = scenarios.load("simple_tag.py").Scenario()
world = scenario.make_world()
env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation, scenario.benchmark_data)
return env
env = SubprocVecEnv([env_fn])
model = learn_fn(env)