示例#1
0
def train(shared_model, shared_optimizer, rank, args, info):
    env = gym.make(args.env)
    env.seed(args.seed + rank)

    torch.manual_seed(args.seed + rank)
    model = ActorCritic(num_actions=args.num_actions)
    state = torch.tensor(prepro(env.reset()))

    start_time = last_disp_time = time.time()
    episode_length = 0
    episode_reward = 0
    episode_loss = 0
    done = True

    while info['frames'][0] <= 8e7 or args.test:
        model.load_state_dict(shared_model.state_dict())

        if done:
            hx = torch.zeros(1, 256)
        else:
            hx = hx.detach()
        values = []
        logps = []
        actions = []
        rewards = []

        for step in range(args.steps):
            episode_length += 1
            value, logit, hx = model((state.view(1, 1, 80, 80), hx))
            logp = F.log_softmax(logit, dim=-1)

            action = torch.exp(logp).multinomial(num_samples=1).data[0]
            state, reward, done, _ = env.step(action.numpy()[0])
            # if args.test:
            #     env.render()

            state = torch.tensor(prepro(state))
            episode_reward += reward
            reward = np.clip(reward, -1, 1)
            done = done or episode_length >= 1e4

            info['frames'].add_(1)
            num_frames = int(info['frames'].item())
            if num_frames % 4e6 == 0:
                torch.save(
                    shared_model.state_dict(), args.save_dir +
                    'model.{:.0f}.tar'.format(num_frames / 1e6))

            if done:
                info['episodes'] += 1
                if info['episodes'][0] == 1:
                    interp = 1
                else:
                    interp = 1 - args.horizon
                info['run_epr'].mul_(1 - interp).add_(interp * episode_reward)
                info['run_loss'].mul_(1 - interp).add_(interp * episode_loss)

            if rank == 0 and time.time() - last_disp_time > 60:
                elapsed = time.strftime("%Hh %Mm %Ss",
                                        time.gmtime(time.time() - start_time))
                printlog(
                    args.save_dir,
                    'time {}, episodes {:.0f}, frames {:.1f}M, mean episode_reward {:.2f}, run loss {:.2f}'
                    .format(elapsed, info['episodes'].item(), num_frames / 1e6,
                            info['run_epr'].item(), info['run_loss'].item()))
                last_disp_time = time.time()

            if done:
                episode_length, episode_reward, episode_loss = 0, 0, 0
                state = torch.tensor(prepro(env.reset()))

            values.append(value)
            logps.append(logp)
            actions.append(action)
            rewards.append(reward)

        if done:
            next_value = torch.zeros(1, 1)
        else:
            next_value = model((state.unsqueeze(0), hx))[0]
        values.append(next_value.detach())

        loss = cost_func(args, torch.cat(values), torch.cat(logps),
                         torch.cat(actions), np.asarray(rewards))
        episode_loss += loss.item()
        shared_optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 40)

        for param, shared_param in zip(model.parameters(),
                                       shared_model.parameters()):
            if shared_param.grad is None:
                shared_param._grad = param.grad
        shared_optimizer.step()
示例#2
0
    mp.set_start_method('spawn')
    args = get_args()
    args.save_dir = 'a3c-{}/'.format(args.env.lower())
    if args.test:
        args.processes = 1
        args.lr = 0
    args.num_actions = gym.make(args.env).action_space.n
    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)

    torch.manual_seed(args.seed)
    shared_model = ActorCritic(num_actions=args.num_actions).share_memory()
    shared_optimizer = SharedAdam(shared_model.parameters(), lr=args.lr)

    info = {
        k: torch.DoubleTensor([0]).share_memory_()
        for k in ['run_epr', 'run_loss', 'episodes', 'frames']
    }
    info['frames'] += shared_model.try_load(args.save_dir) * 1e6
    if int(info['frames'].item()) == 0:
        printlog(args.save_dir, '', mode='w')

    processes = []
    for rank in range(args.processes):
        p = mp.Process(target=train,
                       args=(shared_model, shared_optimizer, rank, args, info))
        p.start()
        processes.append(p)
    for p in processes:
        p.join()
示例#3
0
import os
import time
import date
import utils

def void test(self)
{
  try
    utils.printlog(" print %s" % "test")
    return 0
  exception Exception,e
    utils.pringlog("exception here")
    return 1 
}
示例#4
0
    data_len = len(train_data)+len(val_data)
    tr_txt = []
    tr_label = []
    val_txt = []
    val_label = []

    for i, data_t in enumerate(train_data):
        tr_txt.append(data_t.comment_text)
        tr_label.append(data_t.target)

    for j, data_v in enumerate(val_data):
        val_txt.append(data_v.comment_text)
        val_label.append(data_v.target)


    utils.printlog("cheak split: tr data {}\tval data {}\t total : {}\t".format(len(tr_label), len(val_label), data_len))

    tokenizer = BertTokenizer.from_pretrained(opt.bert_model)
    ConvertText2Token = utils.ConvertText2Token(seq_length=opt.seq_len, tokenizer=tokenizer)

    train_dataset = Toxicdataset(
            textlist=tr_txt,
            labellist=tr_label,
            tokenizer=ConvertText2Token
        )

    dl_tr = DataLoader(
        train_dataset,
        batch_size=opt.train_batch_size,
        sampler=RandomSampler(train_dataset),
        num_workers=opt.workers,
示例#5
0
                         pin_memory=True)

    num_labels = 2  #class number

    if opt.mode == 'test':

        use_gpu = torch.cuda.is_available()

        model = torch.load(opt.save_path + 'best')

        model.to(device)

        global_step = 0
        nb_test_steps = 0

        utils.printlog("***** Running training *****")
        utils.printlog("  Num examples = {:d}".format(len(test_txt)))
        utils.printlog("  Batch size = {:d}".format(opt.test_batch_size))

        predictions = []
        model.eval()

        nb_test_examples, nb_test_steps = 0, 0
        with torch.no_grad():
            for step, batch in enumerate(
                    tqdm(dl_test, position=1, desc="Train Iteration")):
                batch = tuple(t.to(device) for t in batch)
                tokens_tensor, segments_tensors = batch
                logits = model(tokens_tensor, segments_tensors, labels=None)

                #ss = torch.nn.Softmax(1)