def train(shared_model, shared_optimizer, rank, args, info): env = gym.make(args.env) env.seed(args.seed + rank) torch.manual_seed(args.seed + rank) model = ActorCritic(num_actions=args.num_actions) state = torch.tensor(prepro(env.reset())) start_time = last_disp_time = time.time() episode_length = 0 episode_reward = 0 episode_loss = 0 done = True while info['frames'][0] <= 8e7 or args.test: model.load_state_dict(shared_model.state_dict()) if done: hx = torch.zeros(1, 256) else: hx = hx.detach() values = [] logps = [] actions = [] rewards = [] for step in range(args.steps): episode_length += 1 value, logit, hx = model((state.view(1, 1, 80, 80), hx)) logp = F.log_softmax(logit, dim=-1) action = torch.exp(logp).multinomial(num_samples=1).data[0] state, reward, done, _ = env.step(action.numpy()[0]) # if args.test: # env.render() state = torch.tensor(prepro(state)) episode_reward += reward reward = np.clip(reward, -1, 1) done = done or episode_length >= 1e4 info['frames'].add_(1) num_frames = int(info['frames'].item()) if num_frames % 4e6 == 0: torch.save( shared_model.state_dict(), args.save_dir + 'model.{:.0f}.tar'.format(num_frames / 1e6)) if done: info['episodes'] += 1 if info['episodes'][0] == 1: interp = 1 else: interp = 1 - args.horizon info['run_epr'].mul_(1 - interp).add_(interp * episode_reward) info['run_loss'].mul_(1 - interp).add_(interp * episode_loss) if rank == 0 and time.time() - last_disp_time > 60: elapsed = time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - start_time)) printlog( args.save_dir, 'time {}, episodes {:.0f}, frames {:.1f}M, mean episode_reward {:.2f}, run loss {:.2f}' .format(elapsed, info['episodes'].item(), num_frames / 1e6, info['run_epr'].item(), info['run_loss'].item())) last_disp_time = time.time() if done: episode_length, episode_reward, episode_loss = 0, 0, 0 state = torch.tensor(prepro(env.reset())) values.append(value) logps.append(logp) actions.append(action) rewards.append(reward) if done: next_value = torch.zeros(1, 1) else: next_value = model((state.unsqueeze(0), hx))[0] values.append(next_value.detach()) loss = cost_func(args, torch.cat(values), torch.cat(logps), torch.cat(actions), np.asarray(rewards)) episode_loss += loss.item() shared_optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 40) for param, shared_param in zip(model.parameters(), shared_model.parameters()): if shared_param.grad is None: shared_param._grad = param.grad shared_optimizer.step()
mp.set_start_method('spawn') args = get_args() args.save_dir = 'a3c-{}/'.format(args.env.lower()) if args.test: args.processes = 1 args.lr = 0 args.num_actions = gym.make(args.env).action_space.n if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) torch.manual_seed(args.seed) shared_model = ActorCritic(num_actions=args.num_actions).share_memory() shared_optimizer = SharedAdam(shared_model.parameters(), lr=args.lr) info = { k: torch.DoubleTensor([0]).share_memory_() for k in ['run_epr', 'run_loss', 'episodes', 'frames'] } info['frames'] += shared_model.try_load(args.save_dir) * 1e6 if int(info['frames'].item()) == 0: printlog(args.save_dir, '', mode='w') processes = [] for rank in range(args.processes): p = mp.Process(target=train, args=(shared_model, shared_optimizer, rank, args, info)) p.start() processes.append(p) for p in processes: p.join()
import os import time import date import utils def void test(self) { try utils.printlog(" print %s" % "test") return 0 exception Exception,e utils.pringlog("exception here") return 1 }
data_len = len(train_data)+len(val_data) tr_txt = [] tr_label = [] val_txt = [] val_label = [] for i, data_t in enumerate(train_data): tr_txt.append(data_t.comment_text) tr_label.append(data_t.target) for j, data_v in enumerate(val_data): val_txt.append(data_v.comment_text) val_label.append(data_v.target) utils.printlog("cheak split: tr data {}\tval data {}\t total : {}\t".format(len(tr_label), len(val_label), data_len)) tokenizer = BertTokenizer.from_pretrained(opt.bert_model) ConvertText2Token = utils.ConvertText2Token(seq_length=opt.seq_len, tokenizer=tokenizer) train_dataset = Toxicdataset( textlist=tr_txt, labellist=tr_label, tokenizer=ConvertText2Token ) dl_tr = DataLoader( train_dataset, batch_size=opt.train_batch_size, sampler=RandomSampler(train_dataset), num_workers=opt.workers,
pin_memory=True) num_labels = 2 #class number if opt.mode == 'test': use_gpu = torch.cuda.is_available() model = torch.load(opt.save_path + 'best') model.to(device) global_step = 0 nb_test_steps = 0 utils.printlog("***** Running training *****") utils.printlog(" Num examples = {:d}".format(len(test_txt))) utils.printlog(" Batch size = {:d}".format(opt.test_batch_size)) predictions = [] model.eval() nb_test_examples, nb_test_steps = 0, 0 with torch.no_grad(): for step, batch in enumerate( tqdm(dl_test, position=1, desc="Train Iteration")): batch = tuple(t.to(device) for t in batch) tokens_tensor, segments_tensors = batch logits = model(tokens_tensor, segments_tensors, labels=None) #ss = torch.nn.Softmax(1)