def run_multi(func, args, name, i, repeat): log_name = os.path.join(log_dir, "%d.%s.log" % (i, name)) log = Logger(strm=open(log_name, "w")) for j in range(repeat): t = func(*args) t.start() t.join() log.log(t.get()) del log
def run_multi(func, args, name, i, repeat): log_name = os.path.join(log_dir, '%d.%s.log' % (i, name)) log = Logger(strm=open(log_name, 'w')) for j in range(repeat): t = func(*args) t.start() t.join() log.log(t.get()) del log
def run_multi(func, name, i): log_name = os.path.join(log_dir, '%d.%s.log' % (i, name)) log = Logger(strm=open(log_name, 'w')) li = [] for j in range(i): li.append(func(j)) for j in range(i): li[j].start() for j in range(i): li[j].join() log.log(li[j].get()) del log
def master(comm): logger = Logger("{}/es_train_{}.log".format(cfg.logger_save_dir, cfg.timestr)) logger.log(cfg.info) controller = Controller() es = cma.CMAEvolutionStrategy(flatten_controller(controller), cfg.es_sigma, {'popsize': cfg.population_size}) for step in range(cfg.es_steps): solutions = es.ask() for idx, solution in enumerate(solutions): comm.send(solution, dest=idx+1, tag=1) check = np.ones(cfg.num_workers) rewards = [] for idx in range(cfg.num_workers): reward = comm.recv(source=idx+1, tag=2) rewards.append(reward) check[idx] = 0 assert check.sum() == 0 assert len(rewards) == cfg.num_workers r_cost = - np.array(rewards) reg_cost = l2_reg(solutions) cost = reg_cost + r_cost es.tell(solutions, cost.tolist()) sigma = es.result[6] rms_var = np.mean(sigma * sigma) info = "Step {:d}\t Max_R {:4f}\t Mean_R {:4f}\t Min_R {:4f}\t RMS_Var {:4f}\t Reg_Cost {:4f}\t R_Cost {:4f}".format( step, max(rewards), np.mean(rewards), min(rewards), rms_var, r_cost.mean(), reg_cost.mean()) logger.log(info) if step % 25 == 0: current_param = es.result[5] current_controller = deflatten_controller(current_param) save_path = "{}/controller_curr_{}_step_{:05d}.pth".format(cfg.model_save_dir, cfg.timestr, step) torch.save({'model': current_controller.state_dict()}, save_path) best_param = es.result[0] best_controller = deflatten_controller(best_param) save_path = "{}/controller_best_{}_step_{:05d}.pth".format(cfg.model_save_dir, cfg.timestr, step) torch.save({'model': best_controller.state_dict()}, save_path)
def vae_extract(): logger = Logger("{}/vae_extract_{}.log".format(cfg.logger_save_dir, cfg.timestr)) logger.log(cfg.info) print("Loading Dataset") data_list = glob.glob(cfg.seq_save_dir + '/*.npz') data_list.sort() N = len(data_list) // 4 procs = [] for idx in range(4): p = Process(target=extract, args=(data_list[idx * N:(idx + 1) * N], idx, N)) procs.append(p) p.start() time.sleep(1) for p in procs: p.join()
def vae_train(): logger = Logger("{}/vae_train_{}.log".format(cfg.logger_save_dir, cfg.timestr)) logger.log(cfg.info) logger.log("Loading Dataset") data_list = glob.glob(cfg.seq_save_dir +'/*.npz') datas = Parallel(n_jobs=cfg.num_cpus, verbose=1)(delayed(load_npz)(f) for f in data_list) datasets = [NumpyData(x) for x in datas] total_data = ConcatDataset(datasets) train_data_loader = DataLoader(total_data, batch_size=cfg.vae_batch_size, shuffle=True, num_workers=10, pin_memory=False) print('Total frames: {}'.format(len(total_data))) model = torch.nn.DataParallel(VAE()).cuda() optimizer = torch.optim.Adam(model.parameters(), lr=cfg.vae_lr) for epoch in range(cfg.vae_num_epoch): current_loss = 0 now = time.time() for idx, imgs in enumerate(train_data_loader): data_duration = time.time() - now now = time.time() imgs = imgs.float().cuda() / 255.0 mu, logvar, imgs_rc, z = model(imgs) r_loss = (imgs_rc - imgs).pow(2).view(imgs.size(0), -1).sum(dim=1).mean() kl_loss = - 0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp(), dim=1) min_kl = torch.zeros_like(kl_loss) + cfg.vae_kl_tolerance * cfg.vae_z_size kl_loss = torch.max(kl_loss, min_kl).mean() loss = r_loss + kl_loss optimizer.zero_grad() loss.backward() optimizer.step() current_loss += loss.item() * imgs.size(0) model_duration = time.time() - now total_duration = data_duration + model_duration if idx % 10 == 0: info = "Epoch {:2d}\t Step [{:5d}/{:5d}]\t Loss {:6.3f}\t R_Loss {:6.3f}\t \ KL_Loss {:6.3f}\t Maxvar {:6.3f}\t Speed {:6.3f}\t Time: [{:.5f}/{:.5f}]\t".format( epoch, idx, len(train_data_loader), loss.item(), r_loss.item(), kl_loss.item(), logvar.max().item(), imgs.size(0) / total_duration, data_duration, total_duration) logger.log(info) now = time.time() to_save_data = {'model': model.module.state_dict(), 'loss': current_loss} to_save_path = '{}/vae_{}_e{:03d}.pth'.format(cfg.model_save_dir, cfg.timestr, epoch) torch.save(to_save_data, to_save_path)
def rnn_train(): logger = Logger("{}/rnn_train_{}.log".format(cfg.logger_save_dir, cfg.timestr)) logger.log(cfg.info) data_list = glob.glob(cfg.seq_extract_dir + '/*.npz') datas = Parallel(n_jobs=cfg.num_cpus, verbose=1)(delayed(load_npz)(f) for f in data_list) model = torch.nn.DataParallel(RNNModel()).cuda() optimizer = torch.optim.Adam(model.parameters()) global_step = 0 for epoch in range(cfg.rnn_num_epoch): np.random.shuffle(datas) data = map(np.concatenate, zip(*datas)) dataset = SeqData(*data) dataloader = DataLoader(dataset, batch_size=cfg.rnn_batch_size, shuffle=False) for idx, idata in enumerate(dataloader): # mu, logvar, actions, rewards, dones now = time.time() lr = adjust_learning_rate(optimizer, global_step) idata = list(x.cuda() for x in idata) z = idata[0] + torch.exp(idata[1] / 2.0) * torch.randn_like( idata[1]) target_z = z[:, 1:, :].contiguous().view(-1, 1) target_d = idata[-1][:, 1:].float() if z.size(0) != cfg.rnn_batch_size: continue logmix, mu, logstd, done_p = model(z, idata[2], idata[4]) # logmix = F.log_softmax(logmix) logmix_max = logmix.max(dim=1, keepdim=True)[0] logmix_reduce_logsumexp = (logmix - logmix_max).exp().sum( dim=1, keepdim=True).log() + logmix_max logmix = logmix - logmix_reduce_logsumexp # v = F.log_softmax(v) v = logmix - 0.5 * ((target_z - mu) / torch.exp(logstd))**2 - logstd - cfg.logsqrt2pi v_max = v.max(dim=1, keepdim=True)[0] v = (v - v_max).exp().sum(dim=1).log() + v_max.squeeze() # maximize the prob, minimize the negative log likelihood z_loss = -v.mean() r_loss = F.binary_cross_entropy_with_logits(done_p, target_d, reduce=False) r_factor = torch.ones_like(r_loss) + target_d * cfg.rnn_r_loss_w r_loss = torch.mean(r_loss * r_factor) loss = z_loss + r_loss optimizer.zero_grad() loss.backward() optimizer.step() global_step += 1 duration = time.time() - now if idx % 10 == 0: info = "Epoch {:2d}\t Step [{:5d}/{:5d}]\t Z_Loss {:5.3f}\t \ R_Loss {:5.3f}\t Loss {:5.3f}\t LR {:.5f}\t Speed {:5.2f}".format( epoch, idx, len(dataloader), z_loss.item(), r_loss.item(), loss.item(), lr, cfg.rnn_batch_size / duration) logger.log(info) if epoch % 10 == 0: to_save_data = {'model': model.module.state_dict()} to_save_path = '{}/rnn_{}_e{:03d}.pth'.format( cfg.model_save_dir, cfg.timestr, epoch) torch.save(to_save_data, to_save_path)