def _report_rouge(self, predictions, references): a_lst = [] predictions = list(predictions) references = list(references) for i, p in enumerate(predictions): a_lst.append((p, references[i])) pool = Pool(24) rouge_scores = {"r1": [], "r2": [], "rl": []} for d in tqdm(pool.imap(_multi_rg, a_lst), total=len(a_lst)): if d is not None: rouge_scores["r1"].append(d[0]) rouge_scores["r2"].append(d[1]) rouge_scores["rl"].append(d[2]) pool.close() pool.join() r1 = np.mean(rouge_scores["r1"]) r2 = np.mean(rouge_scores["r2"]) rl = np.mean(rouge_scores["rl"]) if len(self.args.log_folds) > 0: with open(self.args.log_folds, mode='a') as f: f.write("{:.4f}\t{:.4f}\t{:.4f}".format(r1 / 100, r2 / 100, rl / 100)) f.write('\n') logger.info("Metric\tScore\t95% CI") logger.info("ROUGE-1\t{:.2f}\t({:.2f},{:.2f})".format(r1 * 100, 0, 0)) logger.info("ROUGE-2\t{:.2f}\t({:.2f},{:.2f})".format(r2 * 100, 0, 0)) logger.info("ROUGE-L\t{:.2f}\t({:.2f},{:.2f})".format(rl * 100, 0, 0)) logger.info("Data path: %s" % self.args.bert_data_path) logger.info("Model path: %s" % self.args.model_path) return r1, r2, rl
def async_build_examples( self, data_type: str, dials: List[Tuple[str, dict]]) -> Tuple[list, list]: """Use multiprocessing to process raw dialogue data. Args: data_type: train, dev or test dials: raw dialogues data Returns: new examples by all processes """ neg_examples = Manager().list() pos_examples = Manager().list() dials4single_process = (len(dials) - 1) // self.config['num_processes'] + 1 print(f'Single process have {dials4single_process} dials ...') pool = Pool(self.config['num_processes']) for i in range(self.config['num_processes']): pool.apply_async(func=self.iter_dials, args=(dials[dials4single_process * i:dials4single_process * (i + 1)], data_type, pos_examples, neg_examples, i)) pool.close() pool.join() pos_examples = list(pos_examples) neg_examples = list(neg_examples) return neg_examples, pos_examples
def step(self, items_seq, items_ori, items_batch=None, boxes_batch=None): if (items_batch != None) & (boxes_batch != None): self.items_batch = items_batch self.boxes_batch = boxes_batch self.batch_indx = list(range(self.BATCH_SIZE)) self.expected_items_n = [self.ITEMS_SEQ_LN] * BATCH_SIZE self.all_outs = {i: [] for i in range(self.BATCH_SIZE)} self.current_level = 0 self.items_batch_alligned = None self.boxes_batch_alligned = None items_seq_ = torch.LongTensor(items_seq).transpose(1, 0).expand( self.INPUT_SIZE, self.ITEMS_SEQ_LN, self.BATCH_SIZE).transpose(2, 0) items_ori_ = items_ori[torch.arange(self.BATCH_SIZE).expand( self.ITEMS_SEQ_LN, self.BATCH_SIZE).transpose(1, 0), torch.LongTensor(items_seq). expand(self.BATCH_SIZE, self.ITEMS_SEQ_LN)] self.items_batch_alligned = self.items_batch[[ self.base_indx_items, items_seq_, items_ori_ ]] lookup_sm = self.boxes_batch.expand( self.ITEMS_SEQ_LN, self.BATCH_SIZE, self.ITEMS_SEQ_LN, self.INPUT_SIZE).transpose( 1, 0) - self.items_batch_alligned.unsqueeze(2) validities = (lookup_sm >= 0).all(3).any(2).tolist() new_seq = [] for i, j in zip(items_seq, validities): new_seq.append([i[k] for k in range(len(i)) if j[k] == True]) self.batch_indx = [i for i in self.batch_indx if len(new_seq[i]) > 0] items_seq = [i for i in new_seq if len(i) > 0] zp = list( zip(self.batch_indx, self.items_batch[self.batch_indx], self.boxes_batch[self.batch_indx], items_seq, items_ori[self.batch_indx])) p = Pool(10) out = p.map(self.target_func, zp) p.close() p.join() out = [pickle.loads(i) for i in out] out_series = pd.Series(out) _ = out_series.apply(lambda x: self.dict_update(x)) # out = [i for i in out if i[1] < i[2]] self.batch_indx = [i[0] for i in out] self.current_level += 1 items_seq = [i[5] for i in out] all_rewards = [i[-1] * i[-2] for i in out] # filled_items_indx = {i:[i[2] for i in j] for i,j in self.all_outs.items() if len(j) > 0} # filled_items_HUs = {i:[i[7] for i in j if len(i[7]) > 0] for i,j in self.all_outs.items()} # all_rewards = [self.calc_reward(self.items_batch[i],i,filled_items_indx,filled_items_HUs) for i in range(self.BATCH_SIZE)] return all_rewards
def final_experiments(): for i in range(2, 5): x_train, train, x_valid, valid, x_test, test, name = get_dataset(i) print("Running for", name, "dataset") x_train, e_train, t_train, x_valid, e_valid, t_valid, x_test, e_test, t_test = scale_data_to_torch( x_train, train, x_valid, valid, x_test, test) print("Dataset loaded and scaled") risk_set, risk_set_valid, risk_set_test = compute_risk_set( t_train, t_valid, t_test) print("Risk set computed") data_dict = { "x_train": x_train, "e_train": e_train, "t_train": t_train, "x_valid": x_valid, "e_valid": e_valid, "t_valid": t_valid, "x_test": x_test, "e_test": e_test, "t_test": t_test, "risk_set": risk_set, "risk_set_valid": risk_set_valid, "risk_set_test": risk_set_test } n_in = x_train.shape[1] linear_models = [2, 5, 10, 12] learning_rates = [1e-4, 1e-3] layer_sizes = [[n_in], [n_in, n_in], [n_in, n_in, n_in], [n_in, 20, 15]] data = [data_dict] hyperparams = [(linear_model, learning_rate, layer_size, seed, d) for layer_size in layer_sizes for learning_rate in learning_rates for linear_model in linear_models for seed in range(3) for d in data] print("Hyperparams initialized") p = Pool(50) print("Pool created") output = p.map(run_experiment, hyperparams) p.close() p.join() print("Models trained. Writing to file") filename = name + "_results.pkl" f = open(filename, "wb") pkl.dump(output, f) f.flush() f.close() print(name, "done") print("")
def mul_infer(args): setuplogging() set_start_method('spawn', force=True) root_data_dir = os.path.join(args.root_data_dir, 'testdata') checkpoint = torch.load(os.path.join(args.model_dir, args.load_ckpt_name), map_location=torch.device('cpu')) subcategory_dict = checkpoint['subcategory_dict'] category_dict = checkpoint['category_dict'] logging.info('load ckpt: {}'.format(args.load_ckpt_name)) check_preprocess_result(args, root_data_dir, mode='test', category=category_dict, subcategory=subcategory_dict) logging.info('finish the preprocess of docfeatures') docid_features, category_dict, subcategory_dict = read_news( args, root_data_dir) news_index = {} news_feature = [] cnt = 0 for k, v in docid_features.items(): news_index[k] = cnt news_feature.append(v) cnt += 1 news_num = len(news_feature) logging.info('news_num:{}'.format(news_num)) pool = Pool(processes=args.world_size) results = [] sigle_size = news_num // args.world_size for rank in range(args.world_size): start = sigle_size * rank end = sigle_size * (rank + 1) if rank == args.world_size - 1: end = news_num local_features = news_feature[start:end] result = pool.apply_async(sigle_process_infer, args=(rank, local_features, checkpoint, args)) results.append(result) pool.close() pool.join() results = [x.get() for x in results] news_vecs = np.concatenate(results, 0) return news_index, news_vecs
def runInParallel(args): cur_args = [] for i in range(args.r): cur_args.append({'seed': base_seed + i}) print('total to run', cur_args, 'nProc', args.nproc) if args.nproc > 1: pool = Pool(processes=int(args.nproc)) pool.map(single_run_algorithm, cur_args) pool.close() pool.join() else: for cur_arg in cur_args: single_run_algorithm(cur_arg)
def forward( self, images, ): parameters = [] num_workers = self.num_workers pool = Pool(num_workers) for bx in range(len(images)): bx_params = [bx, images[bx], True] parameters.append(bx_params, ) predictions = pool.map(worker_distance_transform, parameters) predictions = torch.stack(predictions) pool.close() pool.join() return predictions
def propagate(nnf, feat_A, feat_AP, feat_B, feat_BP, patch_size, iters=2, rand_search_radius=200): print("\tpatch_size:{}; num_iters:{}; rand_search_radius:{}".format(patch_size, iters, rand_search_radius)) nnd = np.zeros(nnf.shape[:2]) A_size = feat_A.shape[:2] B_size = feat_B.shape[:2] for ay in range(A_size[0]): for ax in range(A_size[1]): by, bx = nnf[ay, ax] nnd[ay, ax] = cal_dist(ay, ax, by, bx, feat_A, feat_AP, feat_B, feat_BP, A_size, B_size, patch_size) manager = mp.Manager() q = manager.Queue(A_size[1] * A_size[0]) cpus = min(mp.cpu_count(), A_size[0] // 20 + 1) for i in range(iters): p = Pool(cpus) ay_start = 0 while ay_start < A_size[0]: ax_start = 0 while ax_start < A_size[1]: p.apply_async(pixelmatch, args=(q, ax_start, ay_start, cpus, nnf, nnd, A_size, B_size, feat_A, feat_AP, feat_B, feat_BP, patch_size, rand_search_radius,)) ax_start += A_size[1] // cpus + 1 ay_start += A_size[0] // cpus + 1 p.close() p.join() while not q.empty(): ax, ay, xbest, ybest, dbest = q.get() nnf[ay, ax] = np.array([ybest, xbest]) nnd[ay, ax] = dbest return nnf, nnd
def create_episodes( self, n_episodes: int, n_processes: int, mcts_samples: int, mcts_temp: float, mcts_cpuct: int, mcts_observation_weight: float, model: Model, ) -> List[Tuple[List[ObservationType], List[np.ndarray], int, Summary]]: pool = Pool(n_processes) res = pool.starmap( self._generator.perform_episode, [[mcts_samples, mcts_temp, mcts_cpuct, mcts_observation_weight, model]] * n_episodes, ) pool.close() pool.terminate() pool.join() return res
def save(self): try: mp.set_start_method('spawn') except RuntimeError: pass pool = Pool(processes=4) self.labels = [] item = 0 for label_name in label_map.keys(): images = self._listdir(os.path.join(self.path, label_name)) for i in range(len(images)): rows, cols = [], [] files = glob.glob( os.path.join(self.path, label_name, images[i], str(self.magnify), '*.jpeg')) for file in files: filename = os.path.basename(file) nums = filename.split('_') row, col = int(nums[0]), int(nums[1]) rows.append(row) cols.append(col) num_row = max(rows) - min(rows) + 1 num_col = max(cols) - min(cols) + 1 patches = np.chararray((num_row, num_col), itemsize=1024) for file in files: filename = os.path.basename(file) nums = filename.split('_') row, col = int(nums[0]), int(nums[1]) patches[row - min(rows), col - min(cols)] = file self.labels.append(label_map[label_name]) # Save feature vector pool.apply_async(self.doit, args=(item, patches, num_row, num_col), error_callback=self.print_error) item += 1 # Save labels torch.save(self.labels, self._get_label_file()) pool.close() pool.join() print('done')
def fit(self, train_data, train_label): a = time() pool = Pool(16) results = [] for i in range(self.round): print(i) bag_index = np.random.choice(np.arange(train_label.shape[0]), train_label.shape[0]) data = train_data[bag_index] label = train_label[bag_index] results.append( pool.apply_async(self.parallel_fit, args=(self.clf[i], data, label))) pool.close() pool.join() for i, result in enumerate(results): self.clf[i] = result.get() print('Class %d cost %.1f seconds' % (i, time() - a))
class MetricTester: """Class used for efficiently run alot of parametrized tests in ddp mode. Makes sure that ddp is only setup once and that pool of processes are used for all tests. All tests should subclass from this and implement a new method called `test_metric_name` where the method `self.run_metric_test` is called inside. """ atol = 1e-8 def setup_class(self): """Setup the metric class. This will spawn the pool of workers that are used for metric testing and setup_ddp """ self.poolSize = NUM_PROCESSES self.pool = Pool(processes=self.poolSize) self.pool.starmap(setup_ddp, [(rank, self.poolSize) for rank in range(self.poolSize)]) def teardown_class(self): """ Close pool of workers """ self.pool.close() self.pool.join() def run_functional_metric_test( self, preds: Tensor, target: Tensor, metric_functional: Callable, sk_metric: Callable, metric_args: dict = None, **kwargs_update, ): """Main method that should be used for testing functions. Call this inside testing method Args: preds: torch tensor with predictions target: torch tensor with targets metric_functional: lightning metric class that should be tested sk_metric: callable function that is used for comparison metric_args: dict with additional arguments used for class initialization kwargs_update: Additional keyword arguments that will be passed with preds and target when running update on the metric. """ _functional_test( preds=preds, target=target, metric_functional=metric_functional, sk_metric=sk_metric, metric_args=metric_args, atol=self.atol, **kwargs_update, ) def run_class_metric_test( self, ddp: bool, preds: Tensor, target: Tensor, metric_class: Metric, sk_metric: Callable, dist_sync_on_step: bool, metric_args: dict = None, check_dist_sync_on_step: bool = True, check_batch: bool = True, **kwargs_update, ): """Main method that should be used for testing class. Call this inside testing methods. Args: ddp: bool, if running in ddp mode or not preds: torch tensor with predictions target: torch tensor with targets metric_class: lightning metric class that should be tested sk_metric: callable function that is used for comparison dist_sync_on_step: bool, if true will synchronize metric state across processes at each ``forward()`` metric_args: dict with additional arguments used for class initialization check_dist_sync_on_step: bool, if true will check if the metric is also correctly calculated per batch per device (and not just at the end) check_batch: bool, if true will check if the metric is also correctly calculated across devices for each batch (and not just at the end) kwargs_update: Additional keyword arguments that will be passed with preds and target when running update on the metric. """ if not metric_args: metric_args = {} if ddp: if sys.platform == "win32": pytest.skip("DDP not supported on windows") self.pool.starmap( partial( _class_test, preds=preds, target=target, metric_class=metric_class, sk_metric=sk_metric, dist_sync_on_step=dist_sync_on_step, metric_args=metric_args, check_dist_sync_on_step=check_dist_sync_on_step, check_batch=check_batch, atol=self.atol, **kwargs_update, ), [(rank, self.poolSize) for rank in range(self.poolSize)], ) else: _class_test( 0, 1, preds=preds, target=target, metric_class=metric_class, sk_metric=sk_metric, dist_sync_on_step=dist_sync_on_step, metric_args=metric_args, check_dist_sync_on_step=check_dist_sync_on_step, check_batch=check_batch, atol=self.atol, **kwargs_update, ) def run_precision_test_cpu( self, preds: torch.Tensor, target: torch.Tensor, metric_module: Metric, metric_functional: Callable, metric_args: dict = {}, ): """Test if an metric can be used with half precision tensors on cpu Args: preds: torch tensor with predictions target: torch tensor with targets metric_module: the metric module to test metric_functional: the metric functional to test metric_args: dict with additional arguments used for class initialization """ _assert_half_support(metric_module(**metric_args), partial(metric_functional, **metric_args), preds, target, device="cpu") def run_precision_test_gpu( self, preds: torch.Tensor, target: torch.Tensor, metric_module: Metric, metric_functional: Callable, metric_args: dict = {}, ): """Test if an metric can be used with half precision tensors on gpu Args: preds: torch tensor with predictions target: torch tensor with targets metric_module: the metric module to test metric_functional: the metric functional to test metric_args: dict with additional arguments used for class initialization """ _assert_half_support(metric_module(**metric_args), partial(metric_functional, **metric_args), preds, target, device="cuda")
str) + ".jpg" return df path = untar_data(URLs.FOOD) train_path = path / 'train.txt' test_path = path / 'test.txt' def load_data(index): train_df = filelist2df(train_path) test_df = filelist2df(test_path) food = DataBlock(blocks=(ImageBlock, CategoryBlock), get_x=ColReader(1, pref=path / 'images'), splitter=RandomSplitter(), get_y=ColReader(cols=0), item_tfms=Resize(224)) dls = food.dataloaders(train_df.values, bs=64) if __name__ == '__main__': set_start_method('spawn', force=True) try: pool = Pool(8) pool.map(load_data, [1, 2, 3, 4, 5, 6, 7, 8]) except KeyboardInterrupt: exit() finally: pool.terminate() pool.join()
if len(pair_name_list[i].split('-')) == 3: tgtname, domainID, tplname = pair_name_list[i].split('-') tgtname = "%s-%s" % (tgtname, domainID) else: tgtname, tplname = pair_name_list[i].split('-') observations = torch.load(os.path.join( s1_path, "%s-%s.DRNF.Score.pkl" % (tplname, tgtname)), pickle_module=pickle) observations = observations.float() pool0.apply_async(generateAlign, args=(tplname + tpl_type, tgtname + tgt_type, observations, transitions), callback=getoutput_init) pool0.close() pool0.join() pbar.close() print("finish initial alignment in %.2fs" % (time.time() - start)) # empty the cache if args.s1 == "" and args.s2 == '': del observation, featdata, seqX, seqY, maskX, maskY del obsmodel, model1 del data_generator, AlignmentSet with torch.cuda.device(GPU): torch.cuda.empty_cache() del obs_group, crf_group, crfmodel if args.s2 == '': NDTAlignmentSet = NDTAlignmentDataSet(
def train(model, src_vocab, trg_vocab, optim_wrapper, train_iter, vldt_iter, loss_function): global opt, min_loss, max_bleu subprocess_pool = Pool(2) model.train() print('start training!!!', id(model)) for epoch in range(opt.epoch, opt.nepoch): # TODO cur_epoch = epoch + 1 total_loss = 0 print('############### epoch = %d ###############\n' % cur_epoch) for batch_idx, batch in enumerate(train_iter, start=1): sorted_batch = sort_batch(batch) src_raw = sorted_batch[0] trg_raw = sorted_batch[1] # 获得以word indices表示的源句子和目标语句 src = batch_str2idx_with_flag(src_raw, src_vocab, unk=UNK, pad=PAD, sos=SOS, eos=EOS) f_trg = batch_str2idx_with_flag(trg_raw, trg_vocab, unk=UNK, pad=PAD, sos=SOS, eos=EOS) src, f_trg = to_Tensor(src, f_trg, tensor_type=torch.LongTensor, cuda=opt.cuda) src_mask = get_batch_mask(src, src_vocab, PAD) f_trg_mask = get_batch_mask(f_trg, trg_vocab, PAD) ''' # b_trg = batch_str2idx_with_flag(trg_raw, trg_vocab, unk=UNK, pad=PAD, sos=SOS, eos=EOS, reverse=True) # 目标端反向的句子batch,暂时不用 # src, f_trg, b_trg = to_Tensor(src, f_trg, b_trg, tensor_type=torch.LongTensor, cuda=opt.cuda) # b_trg_mask = get_batch_mask(b_trg, trg_vocab, PAD) ''' y_prob = model(src, src_mask, f_trg, f_trg_mask) # --------------------------------------- TODO f_trg = torch.cat( (f_trg, torch.LongTensor([[dec_pad] for _ in range(int(f_trg.size(0)))])), 1) loss = loss_function(y_prob.transpose(1, 2), f_trg[:, 1:]) total_loss = total_loss + float(loss) loss.backward() # ---------------------------------------- if batch_idx % opt.interval == 0: total_loss = total_loss / opt.interval if total_loss < min_loss: print('& epoch = %d batch_idx = %d min_loss = %f &\n' % (cur_epoch, batch_idx / opt.interval, total_loss)) min_loss = total_loss save_min_loss_model(model, opt.checkpoint_dir, batch_idx / opt.interval, cur_epoch, min_loss, info='Transformer_min_loss_model') else: print('- batch_idx = %d, loss = %f -\n' % (batch_idx / opt.interval, total_loss)) #torch.nn.utils.clip_grad_norm_(model.parameters(), opt.max_norm, norm_type=2) # 参数更新前执行梯度裁剪,默认取L2范数 optim_wrapper.step() optim_wrapper.zero_grad() total_loss = 0 optim_wrapper.update_lr_per_step() ''' # 开启额外cpu进程测试开发集bleu时调用下面语句 # 从第4轮训练开始,每隔opt.vldt_freq个batch,另开子进程测试一次bleu if cur_epoch >= 4 and (batch_idx * opt.interval) % opt.vldt_freq == 0: cpu_model = copy.deepcopy(model).cpu() subprocess_pool.apply_async(evaluate, args=(opt, cpu_model, src_vocab, trg_vocab, vldt_iter, batch_idx, cur_epoch), callback=my_callback) ''' if (batch_idx / opt.interval) % 100 == 0: print('- epoch = %d, min_loss = %f -\n' % (cur_epoch, min_loss)) # --------------------------------------- sentences = [] for i in range(5): sentence = [] for j in range(y_prob.size(1)): sentence.append(int(y_prob[i][j].argmax())) sentences.append(sentence) sentences = batch_idx2str(sentences, trg_vocab) for i in range(5): print('source:') print(' '.join(src_raw[i])) print('ref:') print(' '.join(trg_raw[i])) print('pred:') print(' '.join(sentences[i])) print('---------------------') # --------------------------------------- optim_wrapper.zero_grad() optim_wrapper.update_lr_per_epoch() save_checkpoint_model(model, opt.checkpoint_dir, cur_epoch, info='Transformer_checkpoint_model') print('$ min_loss: %f, max_bleu: %f $\n' % (min_loss, max_bleu)) # 关闭进程池等待开发集bleu测试完成 subprocess_pool.close() subprocess_pool.join()
class MetricTester: """Class used for efficiently run alot of parametrized tests in ddp mode. Makes sure that ddp is only setup once and that pool of processes are used for all tests. All tests should subclass from this and implement a new method called `test_metric_name` where the method `self.run_metric_test` is called inside. """ atol = 1e-8 def setup_class(self): """Setup the metric class. This will spawn the pool of workers that are used for metric testing and setup_ddp """ self.poolSize = NUM_PROCESSES self.pool = Pool(processes=self.poolSize) self.pool.starmap(setup_ddp, [(rank, self.poolSize) for rank in range(self.poolSize)]) def teardown_class(self): """ Close pool of workers """ self.pool.close() self.pool.join() def run_functional_metric_test( self, preds: Tensor, target: Tensor, metric_functional: Callable, sk_metric: Callable, metric_args: dict = None, fragment_kwargs: bool = False, **kwargs_update, ): """Main method that should be used for testing functions. Call this inside testing method Args: preds: torch tensor with predictions target: torch tensor with targets metric_functional: lightning metric class that should be tested sk_metric: callable function that is used for comparison metric_args: dict with additional arguments used for class initialization fragment_kwargs: whether tensors in kwargs should be divided as `preds` and `target` among processes kwargs_update: Additional keyword arguments that will be passed with preds and target when running update on the metric. """ device = 'cuda' if (torch.cuda.is_available() and torch.cuda.device_count() > 0) else 'cpu' _functional_test( preds=preds, target=target, metric_functional=metric_functional, sk_metric=sk_metric, metric_args=metric_args, atol=self.atol, device=device, fragment_kwargs=fragment_kwargs, **kwargs_update, ) def run_class_metric_test( self, ddp: bool, preds: Tensor, target: Tensor, metric_class: Metric, sk_metric: Callable, dist_sync_on_step: bool, metric_args: dict = None, check_dist_sync_on_step: bool = True, check_batch: bool = True, fragment_kwargs: bool = False, check_scriptable: bool = True, **kwargs_update, ): """Main method that should be used for testing class. Call this inside testing methods. Args: ddp: bool, if running in ddp mode or not preds: torch tensor with predictions target: torch tensor with targets metric_class: lightning metric class that should be tested sk_metric: callable function that is used for comparison dist_sync_on_step: bool, if true will synchronize metric state across processes at each ``forward()`` metric_args: dict with additional arguments used for class initialization check_dist_sync_on_step: bool, if true will check if the metric is also correctly calculated per batch per device (and not just at the end) check_batch: bool, if true will check if the metric is also correctly calculated across devices for each batch (and not just at the end) fragment_kwargs: whether tensors in kwargs should be divided as `preds` and `target` among processes kwargs_update: Additional keyword arguments that will be passed with preds and target when running update on the metric. """ if not metric_args: metric_args = {} if ddp: if sys.platform == "win32": pytest.skip("DDP not supported on windows") self.pool.starmap( partial( _class_test, preds=preds, target=target, metric_class=metric_class, sk_metric=sk_metric, dist_sync_on_step=dist_sync_on_step, metric_args=metric_args, check_dist_sync_on_step=check_dist_sync_on_step, check_batch=check_batch, atol=self.atol, fragment_kwargs=fragment_kwargs, check_scriptable=check_scriptable, **kwargs_update, ), [(rank, self.poolSize) for rank in range(self.poolSize)], ) else: device = 'cuda' if (torch.cuda.is_available() and torch.cuda.device_count() > 0) else 'cpu' _class_test( rank=0, worldsize=1, preds=preds, target=target, metric_class=metric_class, sk_metric=sk_metric, dist_sync_on_step=dist_sync_on_step, metric_args=metric_args, check_dist_sync_on_step=check_dist_sync_on_step, check_batch=check_batch, atol=self.atol, device=device, fragment_kwargs=fragment_kwargs, check_scriptable=check_scriptable, **kwargs_update, ) def run_precision_test_cpu( self, preds: Tensor, target: Tensor, metric_module: Metric, metric_functional: Callable, metric_args: dict = None, **kwargs_update, ): """Test if a metric can be used with half precision tensors on cpu Args: preds: torch tensor with predictions target: torch tensor with targets metric_module: the metric module to test metric_functional: the metric functional to test metric_args: dict with additional arguments used for class initialization kwargs_update: Additional keyword arguments that will be passed with preds and target when running update on the metric. """ metric_args = metric_args or {} _assert_half_support(metric_module(**metric_args), metric_functional, preds, target, device="cpu", **kwargs_update) def run_precision_test_gpu( self, preds: Tensor, target: Tensor, metric_module: Metric, metric_functional: Callable, metric_args: dict = None, **kwargs_update, ): """Test if a metric can be used with half precision tensors on gpu Args: preds: torch tensor with predictions target: torch tensor with targets metric_module: the metric module to test metric_functional: the metric functional to test metric_args: dict with additional arguments used for class initialization kwargs_update: Additional keyword arguments that will be passed with preds and target when running update on the metric. """ metric_args = metric_args or {} _assert_half_support(metric_module(**metric_args), metric_functional, preds, target, device="cuda", **kwargs_update) def run_differentiability_test( self, preds: Tensor, target: Tensor, metric_module: Metric, metric_functional: Callable, metric_args: dict = None, ): """Test if a metric is differentiable or not Args: preds: torch tensor with predictions target: torch tensor with targets metric_module: the metric module to test metric_args: dict with additional arguments used for class initialization """ metric_args = metric_args or {} # only floating point tensors can require grad metric = metric_module(**metric_args) if preds.is_floating_point(): preds.requires_grad = True out = metric(preds[0], target[0]) # metrics can return list of values if isinstance(out, list): assert all(metric.is_differentiable == o.requires_grad for o in out) else: assert metric.is_differentiable == out.requires_grad if metric.is_differentiable: # check for numerical correctness assert torch.autograd.gradcheck( partial(metric_functional, **metric_args), (preds[0].double(), target[0])) # reset as else it will carry over to other tests preds.requires_grad = False
import torch from torch.multiprocessing import Pool, Process, set_start_method from torch.autograd import Variable import numpy as np from scipy.ndimage import zoom def get_pred(args): img = args[0] scale = args[1] # feed input data input_img = Variable(torch.from_numpy(img), volatile=True).cuda() return input_img if __name__ == '__main__': try: set_start_method('spawn') except RuntimeError: pass img = np.float32(np.random.randint(0, 2, (300, 300, 3))) scales = [1,2,3,4,5] scale_list = [] for scale in scales: scale_list.append([img,scale]) multi_pool = Pool(processes=5) predictions = multi_pool.map(get_pred,scale_list) multi_pool.close() multi_pool.join()
class DataLoader(): def __init__(self, args): self.dir_bin = args.dir_bin line_load_list = self.dir_bin + 'line_load_list.t7' vocab_file = self.dir_bin + 'vocab.t7' assert os.path.isfile(self.dir_bin + 'specM.bin') assert os.path.isfile(self.dir_bin + 'specL.bin') assert os.path.isfile(self.dir_bin + 'text.bin') self.batch_size = args.batch_size self.trunc_size = args.trunc_size self.r_factor = args.r_factor self.dec_out_size = args.dec_out_size self.post_out_size = args.post_out_size self.shuffle_data = True if args.shuffle_data == 1 else False self.iter_per_epoch = None self.is_subbatch_end = True self.curr_split = None self.vocab_size = None self.process = None self.queue = Queue(maxsize=args.load_queue_size) self.n_workers = args.n_workers self.use_gpu = args.use_gpu self.num_gpu = len(args.gpu) if len(args.gpu) > 0 else 1 self.pinned_memory = True if args.pinned_memory == 1 and self.use_gpu else False self.vocab_size = self.get_num_vocab(vocab_file) text_limit = args.text_limit wave_limit = args.wave_limit # col1: idx / col2: wave_length / col3: text_length # col4: offset_M / col5: offset_L / col6: offset_T self.load_list = torch.load(line_load_list) spec_len_list = self.load_list[:, 1].clone() text_len_list = self.load_list[:, 2].clone() # exclude files whose wave length exceeds wave_limit sort_length, sort_idx = spec_len_list.sort() text_len_list = torch.gather(text_len_list, 0, sort_idx) sort_idx = sort_idx.view(-1, 1).expand_as(self.load_list) self.load_list = torch.gather(self.load_list, 0, sort_idx) end_idx = sort_length.le(wave_limit).sum() spec_len_list = sort_length[:end_idx] text_len_list = text_len_list[:end_idx] self.load_list = self.load_list[:end_idx] # exclude files whose text length exceeds text_limit sort_length, sort_idx = text_len_list.sort() spec_len_list = torch.gather(spec_len_list, 0, sort_idx) sort_idx = sort_idx.view(-1, 1).expand_as(self.load_list) self.load_list = torch.gather(self.load_list, 0, sort_idx) end_idx = sort_length.le(text_limit).sum() end_idx = end_idx - (end_idx % self.batch_size) # drop residual data text_len_list = sort_length[:end_idx] spec_len_list = spec_len_list[:end_idx] self.load_list = self.load_list[:end_idx] # sort by wave length _, sort_idx = spec_len_list.sort(0, descending=True) text_len_list = torch.gather(text_len_list, 0, sort_idx) sort_idx = sort_idx.view(-1, 1).expand_as(self.load_list) self.load_list = torch.gather(self.load_list, 0, sort_idx) # sort by text length in each batch (PackedSequence requires it) num_batches_per_epoch = self.load_list.size(0) // self.batch_size text_len_list = text_len_list.view(num_batches_per_epoch, -1) self.load_list = self.load_list.view(num_batches_per_epoch, -1, self.load_list.size(1)) sort_length, sort_idx = text_len_list.sort(1, descending=True) sort_idx = sort_idx.view(num_batches_per_epoch, -1, 1).expand_as(self.load_list) self.load_list = torch.gather(self.load_list, 1, sort_idx) # shuffle while preserving order in a batch if self.shuffle_data: _, sort_idx = torch.randn(num_batches_per_epoch).sort() sort_idx = sort_idx.view(-1, 1, 1).expand_as(self.load_list) self.load_list = torch.gather(self.load_list, 0, sort_idx) # nbpe x N x 6 self.load_list = self.load_list.long() # compute number of iterations needed spec_len_list = spec_len_list.view(num_batches_per_epoch, -1) spec_len_list, _ = spec_len_list.div(self.trunc_size).ceil().max(1) self.iter_per_epoch = int(spec_len_list.sum()) # set split cursor self.split_sizes = { 'train': self.load_list.size(0), 'val': -1, 'test': -1 } self.split_cursor = {'train': 0, 'val': 0, 'test': 0} def next_batch(self, split): T, idx = self.trunc_size, self.split_cursor[split] # seek and load data from raw files if self.is_subbatch_end: self.is_subbatch_end = False self.subbatch_cursor = 0 if self.curr_split != split: self.curr_split = split if self.process is not None: self.process.terminate() self.process = Process(target=self.start_async_loader, args=(split, self.split_cursor[split])) self.process.start() self.len_text, self.len_wave, self.curr_text, self.curr_specM, self.curr_specL = self.queue.get( ) self.split_cursor[split] = (idx + 1) % self.split_sizes[split] self.subbatch_max_len = self.len_wave.max() # Variables to return # +1 to length of y to consider shifting for target y subbatch_len_text = [x for x in self.len_text] subbatch_len_wave = [min(x, T) for x in self.len_wave] x_text = self.curr_text y_specM = self.curr_specM[:, self.subbatch_cursor:self.subbatch_cursor + max(subbatch_len_wave) + 1].contiguous() y_specL = self.curr_specL[:, self.subbatch_cursor:self.subbatch_cursor + max(subbatch_len_wave) + 1].contiguous() if self.use_gpu: if self.pinned_memory: x_text = x_text.pin_memory() y_specM = y_specM.pin_memory() y_specL = y_specL.pin_memory() x_text = x_text.cuda() y_specM = y_specM.cuda() y_specL = y_specL.cuda() # Advance split_cursor or Move on to the next batch if self.subbatch_cursor + T < self.subbatch_max_len: self.subbatch_cursor = self.subbatch_cursor + T self.len_wave.sub_(T).clamp_(min=0) else: self.is_subbatch_end = True # Don't compute for empty batch elements if subbatch_len_wave.count(0) > 0: self.len_wave_mask = [ idx for idx, l in enumerate(subbatch_len_wave) if l > 0 ] self.len_wave_mask = torch.LongTensor(self.len_wave_mask) if self.use_gpu: self.len_wave_mask = self.len_wave_mask.cuda() x_text = torch.index_select(x_text, 0, self.len_wave_mask) y_specM = torch.index_select(y_specM, 0, self.len_wave_mask) y_specL = torch.index_select(y_specL, 0, self.len_wave_mask) subbatch_len_text = [ subbatch_len_text[idx] for idx in self.len_wave_mask ] subbatch_len_wave = [ subbatch_len_wave[idx] for idx in self.len_wave_mask ] else: self.len_wave_mask = None return x_text, y_specM, y_specL, subbatch_len_wave, subbatch_len_text def start_async_loader(self, split, load_start_idx): # load batches to the queue asynchronously since it is a bottle-neck N, r = self.batch_size, self.r_factor load_curr_idx = load_start_idx while True: data_T, data_M, data_L, len_T, len_M = ([None for _ in range(N)] for _ in range(5)) # deploy workers to load data self.pool = Pool(self.n_workers) partial_func = partial(load_data_and_length, self.dir_bin, self.load_list[load_curr_idx]) results = self.pool.map_async(func=partial_func, iterable=range(N)) self.pool.close() self.pool.join() for result in results.get(): data_M[result[0]] = result[1] data_L[result[0]] = result[2] data_T[result[0]] = result[3] len_T[result[0]] = result[4] len_M[result[0]] = result[5] # TODO: output size is not accurate.. // len_text = torch.IntTensor(len_T) len_wave = torch.Tensor(len_M).div(r).ceil().mul( r).int() # consider r_factor curr_text = torch.LongTensor(N, len_text.max()).fill_( 0) # null-padding at tail curr_specM = torch.Tensor(N, len_wave.max() + 1, self.dec_out_size).fill_( 0) # null-padding at tail curr_specL = torch.Tensor(N, len_wave.max() + 1, self.post_out_size).fill_( 0) # null-padding at tail # fill the template tensors for j in range(N): curr_text[j, 0:data_T[j].size(0)].copy_(data_T[j]) curr_specM[j, 0:data_M[j].size(0)].copy_(data_M[j]) curr_specL[j, 0:data_L[j].size(0)].copy_(data_L[j]) self.queue.put( (len_text, len_wave, curr_text, curr_specM, curr_specL)) load_curr_idx = (load_curr_idx + 1) % self.split_sizes[split] def mask_prev_h(self, prev_h): if self.len_wave_mask is not None: if self.use_gpu: self.len_wave_mask = self.len_wave_mask.cuda() h_att, h_dec1, h_dec2 = prev_h h_att = torch.index_select(h_att.data, 1, self.len_wave_mask) # batch idx is h_dec1 = torch.index_select(h_dec1.data, 1, self.len_wave_mask) h_dec2 = torch.index_select(h_dec2.data, 1, self.len_wave_mask) prev_h = (Variable(h_att), Variable(h_dec1), Variable(h_dec2)) else: prev_h = prev_h return prev_h def get_num_vocab(self, vocab_file=None): if self.vocab_size: return self.vocab_size else: vocab_dict = torch.load(vocab_file) return len(vocab_dict) + 1 # +1 to consider null-padding
def train(model, src_vocab, trg_vocab, optim_wrapper, train_iter, vldt_iter): global opt, min_loss, max_bleu subprocess_pool = Pool(2) # start training model.train() print('!!!train', id(model)) for epoch in range(opt.epoch, opt.nepoch): cur_epoch = epoch + 1 total_loss = 0 print('############### epoch = %d ###############\n' % cur_epoch) for batch_idx, batch in enumerate(train_iter, start=1): sorted_batch = sort_batch(batch) src_raw = sorted_batch[0] trg_raw = sorted_batch[1] # 获得以word indices表示的源句子和目标语句 src = batch_str2idx_with_flag(src_raw, src_vocab, unk=UNK, pad=PAD, sos=SOS, eos=EOS) f_trg = batch_str2idx_with_flag(trg_raw, trg_vocab, unk=UNK, pad=PAD, sos=SOS, eos=EOS) src, f_trg = to_Tensor(src, f_trg, tensor_type=torch.LongTensor, cuda=opt.cuda) src_mask = get_batch_mask(src, src_vocab, PAD) f_trg_mask = get_batch_mask(f_trg, trg_vocab, PAD) ''' # b_trg = batch_str2idx_with_flag(trg_raw, trg_vocab, unk=UNK, pad=PAD, sos=SOS, eos=EOS, reverse=True) # 目标端反向的句子batch,暂时不用 # src, f_trg, b_trg = to_Tensor(src, f_trg, b_trg, tensor_type=torch.LongTensor, cuda=opt.cuda) # b_trg_mask = get_batch_mask(b_trg, trg_vocab, PAD) ''' loss = model(src, src_mask, f_trg, f_trg_mask) # TODO total_loss = total_loss + float(loss) loss.backward() if batch_idx % opt.interval == 0: total_loss = total_loss / opt.interval if total_loss < min_loss: print('& epoch = %d batch_idx = %d min_loss = %f &\n' % (cur_epoch, batch_idx / opt.interval, total_loss)) min_loss = total_loss save_min_loss_model(model, opt.checkpoint_dir, batch_idx / opt.interval, cur_epoch, min_loss, info='RNNSearch_min_loss_model') else: print('- batch_idx = %d, loss = %f -\n' % (batch_idx / opt.interval, total_loss)) torch.nn.utils.clip_grad_norm_( model.parameters(), opt.max_norm, norm_type=2) # 参数更新前执行梯度裁剪,默认取L2范数 optim_wrapper.step() optim_wrapper.zero_grad() total_loss = 0 optim_wrapper.update_lr_per_step() ''' # 开启额外cpu进程测试开发集bleu时调用下面语句 # 从第4轮训练开始,每隔opt.vldt_freq个batch,另开子进程测试一次bleu if cur_epoch >= 4 and (batch_idx * opt.interval) % opt.vldt_freq == 0: cpu_model = copy.deepcopy(model).cpu() subprocess_pool.apply_async(evaluate, args=(opt, cpu_model, src_vocab, trg_vocab, vldt_iter, batch_idx, cur_epoch), callback=my_callback) ''' optim_wrapper.zero_grad() optim_wrapper.update_lr_per_epoch() save_checkpoint_model(model, opt.checkpoint_dir, cur_epoch, info='RNNSearch_checkpoint_model') print('$ min_loss: %f, max_bleu: %f $\n' % (min_loss, max_bleu)) # 关闭进程池等待开发集bleu测试完成 subprocess_pool.close() subprocess_pool.join()
class MetricTester: """ Class used for efficiently run alot of parametrized tests in ddp mode. Makes sure that ddp is only setup once and that pool of processes are used for all tests. All tests should subclass from this and implement a new method called `test_metric_name` where the method `self.run_metric_test` is called inside. """ def setup_class(self): """ Setup the metric class. This will spawn the pool of workers that are used for metric testing and setup_ddp """ try: set_start_method('spawn') except RuntimeError: pass self.poolSize = NUM_PROCESSES self.pool = Pool(processes=self.poolSize) self.pool.starmap(setup_ddp, [(rank, self.poolSize) for rank in range(self.poolSize)]) def teardown_class(self): """ Close pool of workers """ self.pool.close() self.pool.join() def run_metric_test( self, ddp: bool, preds: torch.Tensor, target: torch.Tensor, metric_class: Metric, sk_metric: Callable, dist_sync_on_step: bool, metric_args: dict = {}, check_dist_sync_on_step: bool = True, check_batch: bool = True, ): """ Main method that should be used for testing. Call this inside testing methods. Args: ddp: bool, if running in ddp mode or not preds: torch tensor with predictions target: torch tensor with targets metric_class: lightning metric class that should be tested sk_metric: callable function that is used for comparison dist_sync_on_step: bool, if true will synchronize metric state across processes at each ``forward()`` metric_args: dict with additional arguments used for class initialization check_dist_sync_on_step: bool, if true will check if the metric is also correctly calculated per batch per device (and not just at the end) check_batch: bool, if true will check if the metric is also correctly calculated across devices for each batch (and not just at the end) """ if ddp: if sys.platform == "win32": pytest.skip("DDP not supported on windows") self.pool.starmap( partial( _compute_batch, preds=preds, target=target, metric_class=metric_class, sk_metric=sk_metric, dist_sync_on_step=dist_sync_on_step, metric_args=metric_args, check_dist_sync_on_step=check_dist_sync_on_step, check_batch=check_batch, ), [(rank, self.poolSize) for rank in range(self.poolSize)], ) else: _compute_batch( 0, 1, preds=preds, target=target, metric_class=metric_class, sk_metric=sk_metric, dist_sync_on_step=dist_sync_on_step, metric_args=metric_args, check_dist_sync_on_step=check_dist_sync_on_step, check_batch=check_batch, )
def eval_(self, X, *args): """ Evaluate a number of DARTS architecture in parallel. X should be a list of Genotypes defined by DARTS API. """ from math import ceil n_parallel = min(len(X), self.n_gpu) res = [] diag_stats = [] if n_parallel == 0: raise ValueError("No GPUs available!") elif n_parallel == 1: for i, genotype in enumerate(X): t = DARTSTrainer(self.data_path, self.save_path, genotype, self.dataset, cutout=self.cutout, auxiliary_tower=self.auxiliary, epochs=self.epochs, eval_policy=self.query_policy) print('Start training: ', i + 1, "/ ", len(X)) try: t.train() # bottleneck result = t.retrieve() res.append(1. - result[0] / 100.) # Turn into error diag_stats.append(result[1]) except Exception as e: logging.error( "An error occured in the current architecture. Assigning nan to the arch. The error is:" ) logging.error(e) res.append(np.nan) diag_stats.append(None) else: gpu_ids = range(n_parallel) num_reps = ceil(len(X) / float(n_parallel)) for i in range(num_reps): x = X[i * n_parallel:min((i + 1) * n_parallel, len( X))] # select the number of parallel archs to evaluate selected_gpus = gpu_ids[:len(x)] other_arg = [ self.data_path, self.save_path, self.dataset, self.cutout, self.epochs, self.query_policy ] args = list(map( list, zip( x, selected_gpus, ), )) args = [a + other_arg for a in args] pool = Pool(processes=len(x)) current_res = pool.starmap(parallel_eval, args) pool.close() pool.join() res.extend([i for i in current_res if i >= 0 ]) # Filter out the negative results due to errors res = np.array(res).flatten() if self.log_scale: res = np.log(res) if self.negative: res = -res return res, diag_stats
def train(self, data, labels, val_data=None, val_labels=None, warm_start=False): """ :param data: :param labels: :param val_data: :param val_labels: :param warm_start: :return: """ # print('start train') # initialize variable data = torch.from_numpy(data).float() labels = torch.from_numpy(labels).char() if val_data is None: train_data, val_data = data, data train_labels, val_labels = labels, labels else: train_data = data train_labels = labels val_data = torch.from_numpy(val_data).float() val_labels = torch.from_numpy(val_labels).char() orig_cols = train_data.size(1) # counting class and get their index self.plus_row_index = [] self.minus_row_index = [] self.orig_minus = 0 self.orig_plus = 0 for idx, value in enumerate(train_labels): if value == 1: self.orig_plus += 1 self.plus_row_index.append(idx) else: self.orig_minus += 1 self.minus_row_index.append(idx) # balanced pick rows and cols plus = max(2, int(self.orig_plus * self.nrows)) minus = max(2, int(self.orig_minus * self.nrows)) num_cols = max(min(5, orig_cols), int(self.nfeatures * orig_cols)) # initialize up triangle matrix and reference index rows_sum = plus + minus if self.adv_train: rows_sum = rows_sum * 2 # plus = plus * 2 # minus = minus * 2 self.yp = torch.ones((self.width, rows_sum), dtype=torch.int8) self.ref_full_index = torch.repeat_interleave(torch.arange( self.updated_features * self.step.shape[0]).view((-1, 1)), rows_sum, dim=1) # multi-process c = self.round // self.num_gpus results = [] logs = {} # print('enter pool') for r in range(c + 1): pool = Pool(self.n_jobs) results = [] for t in range(min(self.n_jobs, self.round - r * self.num_gpus)): if warm_start and self.w_index != []: column_indices = self.w_index[r * self.num_gpus + t] w1 = self.w1[:, :, r * self.num_gpus + t] w2 = self.w2[:, r * self.num_gpus + t] else: column_indices = np.random.choice(np.arange(orig_cols), num_cols, replace=False) # column_indices = np.arange(orig_cols) self.w_index.append(column_indices) results.append( pool.apply_async(self.single_run, args=(train_data, train_labels, plus, minus, val_data, val_labels, column_indices, t % self.num_gpus))) pool.close() pool.join() for i, result in enumerate(results): temp_w, temp_b, temp_obj = result.get() # logs['vote%d_train' % i] = train_log # logs['vote%d_test' % i] = test_log if warm_start: self.w[:, i] = temp_w self.b[:, i] = temp_b self.obj[i] = temp_obj else: self.w.append(temp_w.view((-1, 1))) self.b.append(temp_b.view((1, 1))) self.obj.append(temp_obj) del pool, results if warm_start is False: self.w = torch.cat(self.w, dim=1) self.b = torch.cat(self.b, dim=1) self.obj = torch.Tensor(self.obj) best_index = self.obj.argmax() self.best_acc = self.obj[best_index] self.best_w = self.w[:, best_index] self.best_b = self.b[:, best_index] self.best_w_index = self.w_index[best_index] del self.yp, self.ref_full_index return
def train(self, data, labels, val_data=None, val_labels=None, warm_start=False): """ :param data: :param labels: :param val_data: :param val_labels: :param warm_start: :return: """ # initialize variable data = torch.from_numpy(data).float() labels = torch.from_numpy(labels).char() if val_data is None: # rows = labels.shape[0] # index = np.random.permutation(rows) # val_size = rows // 5 # val_data = data[index[:val_size]] # val_labels = labels[index[:val_size]] # train_data = data[index[val_size:]] # train_labels = labels[index[val_size:]] train_data, val_data = data, data train_labels, val_labels = labels, labels else: train_data = data train_labels = labels val_data = torch.from_numpy(val_data).float() val_labels = torch.from_numpy(val_labels).char() orig_cols = train_data.size(1) # counting class and get their index self.plus_row_index = [] self.minus_row_index = [] self.orig_minus = 0 self.orig_plus = 0 for idx, value in enumerate(train_labels): if value == 1: self.orig_plus += 1 self.plus_row_index.append(idx) else: self.orig_minus += 1 self.minus_row_index.append(idx) # balanced pick rows and cols plus = max(2, int(self.orig_plus * self.nrows)) minus = max(2, int(self.orig_minus * self.nrows)) num_cols = max(min(5, orig_cols), int(self.nfeatures * orig_cols)) # initialize up triangle matrix and reference index rows_sum = plus + minus if self.adv_train: rows_sum = rows_sum * 2 # plus = plus * 2 # minus = minus * 2 self.yp = torch.ones((rows_sum, rows_sum), dtype=torch.int8).triu_(0) self.ref_full_index = torch.repeat_interleave(torch.arange( self.updated_features * self.step.shape[0]).view((-1, 1)), rows_sum, dim=1) # multi-process pool = Pool(self.n_jobs) results = [] for r in range(self.round): if warm_start and self.w_index != []: column_indices = self.w_index[r] w = self.w[:, r] else: column_indices = np.random.choice(np.arange(orig_cols), num_cols, replace=False) self.w_index.append(column_indices) w = np.random.uniform(-1, 1, size=(num_cols, )).astype(np.float32) results.append( pool.apply_async( self.single_run_adv if self.adv_train else self.single_run, args=(train_data, train_labels, plus, minus, val_data, val_labels, w, column_indices, r % self.num_gpus))) pool.close() pool.join() for i, result in enumerate(results): temp_w, temp_b, temp_obj = result.get() if warm_start: self.w[:, i] = temp_w self.b[:, i] = temp_b self.obj[i] = temp_obj else: self.w.append(temp_w.view((-1, 1))) self.b.append(temp_b.view((1, 1))) self.obj.append(temp_obj) if warm_start is False: self.w = torch.cat(self.w, dim=1) self.b = torch.cat(self.b, dim=1) self.obj = torch.Tensor(self.obj) best_index = self.obj.argmax() self.best_acc = self.obj[best_index] self.best_w = self.w[:, best_index] self.best_b = self.b[:, best_index] self.best_w_index = self.w_index[best_index]
def main(): opt = parse_args() device = 'cuda' if torch.cuda.is_available() else 'cpu' random.seed(opt.seed) numpy.random.seed(opt.seed) torch.manual_seed(opt.seed) data_path = 'traffic-data/state-action-cost/data_i80_v0' dataloader = DataLoader(None, opt, 'i80') ( forward_model, value_function, policy_network_il, policy_network_mper, data_stats ) = load_models(opt, data_path, device) splits = torch.load(path.join(data_path, 'splits.pth')) if opt.u_reg > 0.0: forward_model.train() forward_model.opt.u_hinge = opt.u_hinge if hasattr(forward_model, 'value_function'): forward_model.value_function.train() planning.estimate_uncertainty_stats( forward_model, dataloader, n_batches=50, npred=opt.npred) gym.envs.registration.register( id='I-80-v1', entry_point='map_i80_ctrl:ControlledI80', kwargs=dict( fps=10, nb_states=opt.ncond, display=False, delta_t=0.1, store_simulator_video=opt.save_sim_video, show_frame_count=False, ) ) print('Building the environment (loading data, if any)') env_names = { 'i80': 'I-80-v1', } env = gym.make(env_names[opt.map]) plan_file = build_plan_file_name(opt) print(f'[saving to {path.join(opt.save_dir, plan_file)}]') # different performance metrics time_travelled, distance_travelled, road_completed = [], [], [] collided, offscreen = [], [] # values saved for later inspection action_sequences, state_sequences, cost_sequences = [], [], [] image_sequences = [] writer = utils.create_tensorboard_writer(opt) n_test = len(splits['test_indx']) set_start_method('spawn') pool = Pool(opt.num_processes) async_results = [] time_started = time.time() total_images = 0 for j in range(n_test): # print(type(splits), len(splits['test_indx']), splits['test_indx'].shape, list(dataloader.car_sizes.keys())[0:5], list(dataloader.car_sizes[list(dataloader.car_sizes.keys())[0]].keys())[0:5],dataloader.car_sizes[list(dataloader.car_sizes.keys())[0]][list(dataloader.car_sizes[list(dataloader.car_sizes.keys())[0]].keys())[0]]) car_path = dataloader.ids[splits['test_indx'][j]] timeslot, car_id = utils.parse_car_path(car_path) car_sizes = torch.tensor( dataloader.car_sizes[sorted(list(dataloader.car_sizes.keys()))[ timeslot]][car_id] )[None, :] async_results.append( pool.apply_async( process_one_episode, ( opt, env, car_path, forward_model, policy_network_il, data_stats, plan_file, j, car_sizes ) ) ) for j in range(n_test): simulation_result = async_results[j].get() time_travelled.append(simulation_result.time_travelled) distance_travelled.append(simulation_result.distance_travelled) road_completed.append(simulation_result.road_completed) action_sequences.append(torch.from_numpy( simulation_result.action_sequence)) state_sequences.append(torch.from_numpy( simulation_result.state_sequence)) # image_sequences.append(torch.from_numpy( # simulation_result.image_sequence)) cost_sequences.append(simulation_result.cost_sequence) total_images += time_travelled[-1] collided.append(simulation_result.has_collided) offscreen.append(simulation_result.off_screen) log_string = ' | '.join(( f'ep: {j + 1:3d}/{n_test}', f'time: {time_travelled[-1]}', f'distance: {distance_travelled[-1]:.0f}', f'success: {road_completed[-1]:d}', f'mean time: {torch.Tensor(time_travelled).mean():.0f}', f'mean distance: {torch.Tensor(distance_travelled).mean():.0f}', f'mean success: {torch.Tensor(road_completed).mean():.3f}', )) print(log_string) utils.log(path.join(opt.save_dir, f'{plan_file}.log'), log_string) if writer is not None: # writer.add_video( # f'Video/success={simulation_result.road_completed:d}_{j}', # simulation_result.images.unsqueeze(0), # j # ) writer.add_scalar('ByEpisode/Success', simulation_result.road_completed, j) writer.add_scalar('ByEpisode/Collision', simulation_result.has_collided, j) writer.add_scalar('ByEpisode/OffScreen', simulation_result.off_screen, j) writer.add_scalar('ByEpisode/Distance', simulation_result.distance_travelled, j) pool.close() pool.join() diff_time = time.time() - time_started print('avg time travelled per second is', total_images / diff_time) torch.save({"road_completed" : road_completed, "collided": collided, "offscreen": offscreen}, path.join(opt.save_dir, f'{plan_file}.others')) torch.save(action_sequences, path.join( opt.save_dir, f'{plan_file}.actions')) torch.save(state_sequences, path.join(opt.save_dir, f'{plan_file}.states')) # torch.save(image_sequences, path.join(opt.save_dir, f'{plan_file}.images')) torch.save(cost_sequences, path.join(opt.save_dir, f'{plan_file}.costs')) if writer is not None: writer.close()
for i in range(0, table_prep_params['MAX_ROW_LEN'] - rows): table.append( [['<PAD>'] * table_prep_params['LENGTH_PER_CELL']] * table_prep_params['MAX_COL_LEN']) return table def table_words2index(tables): w2i = {w: i for i, w in enumerate(vocab)} for i, t in enumerate(tables): tables[i] = np.vectorize(lambda y: w2i[y])( np.array(t)).tolist() return tables p = Pool(processes=40) X = p.map(pad_table, X) p.close() p.join() X = table_words2index(X) X = np.array(X) print(X.shape) savepkl('./data/xp_2D_10-50_pad.pkl', X) else: device = torch.device( f"cuda:{1}" if torch.cuda.is_available() else 'cpu') dataset = T2VDataset(X, y, vocab, device, config) dataloader = DataLoader(dataset, batch_size=32, shuffle=True) X_, y_ = next(iter(dataloader)) print(X_.shape, y_.shape) print(time.time() - start)
def train(self, data, labels, val_data=None, val_labels=None, warm_start=False): """ :param data: :param labels: :param val_data: :param val_labels: :param warm_start: :return: """ # initialize variable data = torch.from_numpy(data).float() labels = torch.from_numpy(labels).char() if val_data is None: train_data, val_data = data, data train_labels, val_labels = labels, labels else: train_data = data train_labels = labels val_data = torch.from_numpy(val_data).float() val_labels = torch.from_numpy(val_labels).char() orig_cols = train_data.size(1) # counting class and get their index self.plus_row_index = [] self.minus_row_index = [] self.orig_minus = 0 self.orig_plus = 0 for idx, value in enumerate(train_labels): if value == 1: self.orig_plus += 1 self.plus_row_index.append(idx) else: self.orig_minus += 1 self.minus_row_index.append(idx) # balanced pick rows and cols plus = max(2, int(self.orig_plus * self.nrows)) minus = max(2, int(self.orig_minus * self.nrows)) num_cols = max(min(5, orig_cols), int(self.nfeatures * orig_cols)) # initialize up triangle matrix and reference index rows_sum = plus + minus self.yp = torch.ones((self.width, rows_sum), dtype=torch.int8) self.ref_full_index1 = torch.repeat_interleave(torch.arange( self.updated_features * self.step.shape[0]).view((-1, 1)), rows_sum, dim=1) self.ref_full_index2 = torch.repeat_interleave(torch.arange( self.hidden_nodes * self.step.shape[0]).view((-1, 1)), rows_sum, dim=1) # multi-process c = self.round // self.num_gpus for r in range(c + 1): pool = Pool(self.n_jobs) results = [] for t in range(min(self.n_jobs, self.round - r * self.num_gpus)): if warm_start and self.w_index != []: column_indices = self.w_index[r * self.num_gpus + t] w1 = self.w1[:, :, r * self.num_gpus + t] w2 = self.w2[:, r * self.num_gpus + t] else: column_indices = np.random.choice(np.arange(orig_cols), num_cols, replace=False) # column_indices = np.arange(orig_cols) self.w_index.append(column_indices) results.append( pool.apply_async(self.single_run, args=(train_data, train_labels, plus, minus, val_data, val_labels, column_indices, t % self.num_gpus))) pool.close() pool.join() df = pd.DataFrame(columns=[]) for i, result in enumerate(results): temp_w1, temp_b1, temp_w2, temp_b2, temp_obj, uba, ba = result.get( ) df['vote %d imbalanced acc' % i] = uba df['vote %d balanced acc' % i] = ba # temp_w1, temp_b1, temp_w2, temp_b2, temp_obj = self.single_run(train_data, train_labels, plus, minus, val_data, val_labels, w1, w2, column_indices, r % self.num_gpus) if warm_start: self.w1[:, :, i] = temp_w1 self.w2[:, i] = temp_w2 self.b1[:, i] = temp_b1 self.b2[i] = temp_b2 self.obj[i] = temp_obj else: self.w1.append(temp_w1) self.w2.append(temp_w2) self.b1.append(temp_b1) self.b2.append(temp_b2) self.obj.append(temp_obj) del pool, results df.to_csv('v15.csv', index=False) if warm_start is False: self.w1 = torch.stack(self.w1, dim=2) self.w2 = torch.stack(self.w2, dim=1) self.b1 = torch.stack(self.b1, dim=1) self.b2 = torch.Tensor(self.b2) self.obj = torch.Tensor(self.obj) best_index = self.obj.argmax() self.best_acc = self.obj[best_index] self.best_w1 = self.w1[:, :, best_index] self.best_w2 = self.w2[:, best_index] self.best_b1 = self.b1[:, best_index] self.best_b2 = self.b2[best_index] self.best_w_index = self.w_index[best_index] return
class Boundaryloss(object): def __init__(self, dense_crf: DenseCRF, eps: float = 1e-5, crf_num_workers: int = 4): """Compute boundary loss Args: dense_crf: DenseCRF functor eps: Min prob allowed when clamp probs crf_num_workers: num of workers when crf parallel """ self.dense_crf = dense_crf self.eps = eps self.crf_num_workers = crf_num_workers self.crf_pool = Pool(self.crf_num_workers) def crf(self, imgs, probs): np_imgs = BGR2RGB(imgs.cpu().numpy().astype(np.uint8).transpose( 0, 2, 3, 1)) # (N, H, W, C) np_probs = probs.detach().cpu().numpy() # (N, C, H, W) # Scaled imgs to probs shape scaled_imgs = nd.zoom(np_imgs, (1.0, np_probs.shape[2] / np_imgs.shape[1], np_probs.shape[3] / np_imgs.shape[2], 1.0), order=1) # CRF crf_probs = self.crf_pool.starmap(self.dense_crf, zip(scaled_imgs, np_probs)) crf_prob = np.stack(crf_probs, axis=0) # Clamp smoothed probs # TODO: Can be removed? crf_prob[crf_prob < self.eps] = self.eps crf_prob = crf_prob / np.sum(crf_prob, axis=1, keepdims=True) # to Tensor return torch.from_numpy(crf_prob).float().cuda(probs.get_device()) def clamp_softmax(self, score, dim=1): probs = torch.clamp(F.softmax(score, dim), self.eps, 1) probs = probs / torch.sum(probs, dim=dim, keepdim=True) return probs def __call__( self, images, score_map, out_prob=False ) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]: """Compute the constrain-to-boundary loss Args: images: (N, 3, H, W) RGB img score_map: (N, C, H, W) score map out_prob: If true, return smoothed predict_probs. (Default: False) Returns: constrain-to-boundary loss """ probs = self.clamp_softmax(score_map) smooth_probs = self.crf(images, probs) # Compute KL-Div # TODO: clamp is not needed? loss = torch.mean( torch.sum(smooth_probs * torch.log(torch.clamp(smooth_probs / probs, 0.05, 20)), dim=1)) if out_prob: return loss, smooth_probs return loss def __del__(self): self.crf_pool.close() self.crf_pool.join()