def get_3d_policy_kwargs(extractor_name):
feature_extractor_class = PointNetImaginationExtractorGP
feature_extractor_kwargs = {"pc_key": "relocate-point_cloud",
# "pc_key": "instance_1-point_cloud",
# "gt_key": "instance_1-seg_gt",
"extractor_name": extractor_name,
"imagination_keys": [f'imagination_{key}' for key in IMG_CONFIG['relocate_goal_robot'].keys()],
"state_key": "state"
}
policy_kwargs = {
"features_extractor_class": feature_extractor_class,
"features_extractor_kwargs": feature_extractor_kwargs,
# "net_arch": [dict(pi=[64, 64], vf=[64, 64])],
"activation_fn": nn.ReLU,
}
return policy_kwargs
def training():
env_params = dict(object_name=object_name, rotation_reward_weight=rotation_reward_weight,
randomness_scale=1, use_visual_obs=use_visual_obs, use_gui=False,
# no_rgb=True
)
if "CUDA_VISIBLE_DEVICES" in os.environ:
env_params["device"] = "cuda"
environment = AllegroRelocateRLEnv(**env_params)
model = PPO("PointCloudPolicy", env, verbose=1,
n_epochs=args.ep,
n_steps=(args.n // args.workers) * horizon,
learning_rate=args.lr,
batch_size=args.bs,
seed=seed,
policy_kwargs=get_3d_policy_kwargs(extractor_name=extractor_name),
min_lr=args.lr,
max_lr=args.lr,
adaptive_kl=0.02,
target_kl=0.2,
)
obs=env.reset()
if pretrain_path is not None:
state_dict: OrderedDict = torch.load(pretrain_path)
model.policy.features_extractor.extractor.load_state_dict(state_dict, strict=False)
print("load pretrained model: ", pretrain_path)
rollout = int(model.num_timesteps / (horizon * args.n))
if args.freeze:
model.policy.features_extractor.extractor.eval()
for param in model.policy.features_extractor.extractor.parameters():
param.requires_grad = False
print("freeze model!")
model.learn(
total_timesteps=int(env_iter),
reset_num_timesteps=False,
iter_start=rollout,
callback=None
)
Is the training code organized well? Or can you give some advice to reproduce the experiment result?