def train(opt):
    torch.manual_seed(123)
    if os.path.isdir(opt.log_path):
        shutil.rmtree(opt.log_path)
    os.makedirs(opt.log_path)
    if not os.path.isdir(opt.saved_path):
        os.makedirs(opt.saved_path)
    mp = _mp.get_context("spawn")
    env, num_states, num_actions = create_train_env(opt.world, opt.stage,
                                                    opt.action_type)
    global_model = ActorCritic(num_states, num_actions)
    if opt.use_gpu:
        global_model.cuda()
    global_model.share_memory()
    if opt.load_from_previous_stage:
        if opt.stage == 1:
            previous_world = opt.world - 1
            previous_stage = 4
        else:
            previous_world = opt.world
            previous_stage = opt.stage - 1
        file_ = "{}/a3c_super_mario_bros_{}_{}".format(opt.saved_path,
                                                       previous_world,
                                                       previous_stage)
        if os.path.isfile(file_):
            global_model.load_state_dict(torch.load(file_))

    optimizer = GlobalAdam(global_model.parameters(), lr=opt.lr)
    local_train(0, opt, global_model, optimizer, True)
示例#2
0
def train(opt):
    torch.manual_seed(123)
    if os.path.isdir(opt.log_path):
        shutil.rmtree(opt.log_path)
    os.makedirs(opt.log_path)
    if not os.path.isdir(opt.saved_path):
        os.makedirs(opt.saved_path)
    mp = _mp.get_context("spawn")
    global_model = ActorCritic(num_inputs=3, num_actions=90)
    global_icm = IntrinsicCuriosityModule(num_inputs=3, num_actions=90)
    if opt.use_gpu:
        global_model.cuda()
        global_icm.cuda()
    global_model.share_memory()
    global_icm.share_memory()

    optimizer = GlobalAdam(list(global_model.parameters()) + list(global_icm.parameters()), lr=opt.lr)
    processes = []
    for index in range(opt.num_processes):
        if index == 0:
            process = mp.Process(target=local_train, args=(index, opt, global_model, global_icm, optimizer, True))
        else:
            process = mp.Process(target=local_train, args=(index, opt, global_model, global_icm, optimizer))
        process.start()
        processes.append(process)
    for process in processes:
        process.join()
示例#3
0
def train(opt):
    torch.manual_seed(123)
    opt.log_path = opt.log_path + "/" + opt.exp
    opt.saved_path = opt.saved_path + "/" + opt.exp
    opt.output_path = opt.output_path + "/" + opt.exp
    if os.path.isdir(opt.log_path):
        shutil.rmtree(opt.log_path)
    os.makedirs(opt.log_path)
    mp = _mp.get_context("spawn")
    global_model = ActorCritic(num_inputs=3, num_actions=opt.num_actions)
    global_icm = IntrinsicCuriosityModule(num_inputs=3, num_actions=opt.num_actions)

    if opt.resume_path:
        print("Load model from checkpoint: {}".format(opt.resume_path))
        global_model.load_state_dict(torch.load("{}/a3c".format(opt.resume_path)))
        global_icm.load_state_dict(torch.load("{}/icm".format(opt.resume_path)))

    if opt.use_gpu:
        global_model.cuda()
        global_icm.cuda()
    global_model.share_memory()
    global_icm.share_memory()

    optimizer = GlobalAdam(list(global_model.parameters()) + list(global_icm.parameters()), lr=opt.lr)
    processes = []
    for index in range(opt.num_processes):
        if index == 0:
            process = mp.Process(target=local_train, args=(index, opt, global_model, global_icm, optimizer, True))
        else:
            process = mp.Process(target=local_train, args=(index, opt, global_model, global_icm, optimizer))
        process.start()
        processes.append(process)
    for process in processes:
        process.join()
示例#4
0
def train(opt):
    torch.manual_seed(SEED)

    if os.path.isdir(opt.log_path):
        shutil.rmtree(opt.log_path)
    os.makedirs(opt.log_path)

    if not os.path.isdir(opt.saved_path):
        os.makedirs(opt.saved_path)

    mp = _mp.get_context("spawn")
    env, num_states, num_actions = create_train_env(opt.world, opt.stage,
                                                    opt.action_type)
    global_model = ActorCritic(num_states, num_actions)
    global_model.share_memory()

    if opt.load_from_previous_stage:
        if opt.stage == 1:
            previous_world = opt.world - 1
            previous_stage = 4
        else:
            previous_world = opt.world
            previous_stage = opt.stage - 1

        file_ = f"{opt.saved_path}/a3c_super_mario_bros_{previous_world}_{previous_stage}"
        if os.path.isfile(file_):
            global_model.load_state_dict(torch.load(file_))

    optimizer = GlobalAdam(global_model.parameters(), lr=opt.lr)
    processes = []

    for index in range(opt.num_processes):
        if index == 0:
            process = mp.Process(target=local_train,
                                 args=(index, opt, global_model, optimizer,
                                       True))
        else:
            process = mp.Process(target=local_train,
                                 args=(index, opt, global_model, optimizer))

        process.start()
        processes.append(process)

    process = mp.Process(target=local_test,
                         args=(opt.num_processes, opt, global_model))
    process.start()
    processes.append(process)

    for process in processes:
        process.join()
def shared_learn(args):

    os.environ['OMP_NUM_THREADS'] = '1'
    torch.manual_seed(123)
    # create path for logs
    if os.path.isdir(args.sum_path):
        shutil.rmtree(args.sum_path)
    os.makedirs(args.sum_path)
    if not os.path.isdir(args.trained_models_path):
        os.makedirs(args.trained_models_path)
    mp = _mp.get_context('spawn')

    # create initial mario environment
    env, num_states, num_actions = build_environment(args.world, args.stage)

    print('Num of states: {}'.format(num_states))  #4
    print('environment: {}'.format(env))
    print('Num of actions: {}'.format(num_actions))  #12

    # check if cuda is available else cpu
    device = torch.device('cuda' if (
        args.use_cuda and torch.cuda.is_available()) else 'cpu')

    CAE_shared_model = Convolutional_AutoEncoder()  #.to(device)
    A3C_shared_model = ActorCritic(num_states, num_actions)  #.to(device)
    # if a new stage, then it picks up previous saved model
    if args.new_stage:
        A3C_shared_model.load_state_dict(
            torch.load('{}/a3c_super_mario_bros_{}_{}_enc2'.format(
                args.world, args.stage, args.trained_models_path)))
        A3C_shared_model.eval()
    # GPU check
    if (args.use_cuda and torch.cuda.is_available()):
        A3C_shared_model.cuda()
        CAE_shared_model.cuda()
    # shares memory with worker instances
    CAE_shared_model.share_memory()

    A3C_shared_model.share_memory()

    print('A3C')
    print(A3C_shared_model)
    # intialize optimizer
    optimizer_cae = CAE_shared_model.createLossAndOptimizer(
        CAE_shared_model, 0.001)
    optimizer_a3c = SharedAdam(A3C_shared_model.parameters(), lr=args.lr)
    #optimizer.share_memory()

    # processes
    workers = []

    # start train process (run for the set number of workers)
    for rank in range(args.num_processes):
        if rank == 0:
            worker = mp.Process(target=train_a3c,
                                args=(rank, args, optimizer_a3c,
                                      A3C_shared_model, CAE_shared_model,
                                      optimizer_cae, True))
        else:
            worker = mp.Process(target=train_a3c,
                                args=(rank, args, optimizer_a3c,
                                      A3C_shared_model, CAE_shared_model,
                                      optimizer_cae, True))
        worker.start()
        worker.append(worker)

    # test worker
    worker = mp.Process(target=test_a3c,
                        args=(rank, args, A3C_shared_model, CAE_shared_model))
    worker.start()
    workers.append(worker)

    # join all processes
    for worker in workers:
        worker.join()