def train_loop(self, train_loader, test_loader, model): # train_cifar调用了本函数,训练超网 best_top1 = 0.0 for epoch in range(self.epochs): logging.info("Learning Rate: {:.4f}".format( self.optimizer.param_groups[0]["lr"])) self.writer.add_scalar("learning_rate/weights", self.optimizer.param_groups[0]["lr"], epoch) logging.info("Start to train for epoch {}".format(epoch)) self._training_step(model, train_loader, epoch, info_for_logger="_train_step_", scratch=True) if self.CONFIG.lr_scheduler == "step": self.scheduler.step() top1_avg = self._validate(model, test_loader, epoch, scratch=True) if best_top1 < top1_avg: logging.info("Best top1 acc by now. Save model") best_top1 = top1_avg save(model, self.optimizer, self.CONFIG.path_to_save_scratch) logging.info("The Best top1 acc : {}".format(best_top1)) return best_top1
def load_pair_tvt_splits(): dir = join(get_save_path(), 'pairs_tvt_split') train_ratio = int(FLAGS.tvt_ratio[0] * 100) val_ratio = int(FLAGS.tvt_ratio[1] * 100) test_ratio = 100 - train_ratio - val_ratio ensure_train_connectivity_str = 'ensure_train_connectivity_{}'\ .format(str(FLAGS.ensure_train_connectivity).lower()) num_folds = 1 if FLAGS.cross_val is None else FLAGS.num_folds sfn = '{}_{}_seed_{}_folds_{}_train_{}_val_{}_test_{}_num_negative_pairs_' \ '{}_{}_feat_size_{}_{}'.format( FLAGS.dataset, FLAGS.random_seed, num_folds, train_ratio, val_ratio, test_ratio, ensure_train_connectivity_str, FLAGS.num_negative_samples if FLAGS.negative_sample else 0, '_'.join(get_flags_with_prefix_as_list('node_fe', FLAGS)), FLAGS.feat_size, '_'.join([node_feat.replace('_', '') for node_feat in FLAGS.node_feats]) ) tp = join(dir, sfn) rtn = load(tp) if rtn: tvt_pairs_dict = rtn else: tvt_pairs_dict = _load_pair_tvt_splits_helper() save(tvt_pairs_dict, tp) return tvt_pairs_dict
def load_dataset(dataset_name, tvt, node_feats, edge_feats): if tvt not in ['train', 'val', 'test', 'all']: raise ValueError('Unknown tvt specifier {}'.format(tvt)) name_list = list((dataset_name, tvt)) name_list.append('_'.join( [node_feat.replace('_', '') for node_feat in node_feats])) f_name = '_'.join(name_list) f_path = join(get_save_path(), 'dataset', f_name) ld = load(f_path) if ld: dataset = BiGNNDataset(None, None, None, None, None, None, None, None, ld) else: try: dataset = load_raw_interaction_data(dataset_name, node_feats, edge_feats, tvt) except Exception as e: print(e) raise FileNotFoundError(f'Please get {f_name} from google drive') gc.collect() save(dataset.__dict__, f_path) return dataset
def save_ranking_mat(self, true_m, pred_m, info): p = join(self.get_log_dir(), '{}_ranking_mats'.format(info)) print("in save_ranking_mat") save({ 'true_m': true_m.__dict__, 'pred_m': pred_m.__dict__ }, p, print_msg=False)
def save_pairs_with_results(self, pairs, info, set_name="validation"): p = join(self.get_log_dir(), '{}_pairs'.format(info)) print("in save_pairs_with_results") save( { '{}_data_pairs'.format(set_name): self._shrink_space_pairs(pairs), }, p, print_msg=False)
def search_train_loop(self, val_loader, model, generator): tau = 5 best_top1 = 0.0 for epoch in range(self.epochs): logging.info("Start to train for search epoch {}".format(epoch)) logging.info("Tau: {}".format(tau)) self._generator_training_step(generator, model, val_loader, epoch, tau, info_for_logger="_gen_train_step") top1_avg, _ = self.generator_validate(generator, model, val_loader, epoch, tau, sample=True, info_for_logger="_gen_val_step_") evaluate_metric, total_loss, kendall_tau = evaluate_generator(generator, self.backbone_pool, self.lookup_table, self.low_macs, self.high_macs, self.alpha, self.loss_penalty) logging.info("Total loss : {}".format(total_loss)) if best_loss > total_loss: logging.info("Best loss by now: {} Tau : {}.Save model".format(total_loss, kendall_tau)) best_loss = total_loss save_generator_evaluate_metric(evaluate_metric, self.path_to_generator_eval) save(generator, self.g_optimizer, self.path_to_save_generator) if top1_avg > best_top1 and total_loss < 0.4: logging.info("Best top1-avg by now: {}.Save model".format(top1_avg)) best_top1 = top1_avg save(generator, self.g_optimizer, self.path_to_best_avg_generator) save(generator, self.g_optimizer, "./logs/generator/{}.pth".format(total_loss)) tau *= self.tau_decay logging.info("Best loss: {}".format(best_loss)) save(generator, self.g_optimizer, self.path_to_fianl_generator)
def train_loop(self, train_loader, val_loader, test_loader, model): best_top1 = 0.0 for epoch in range(self.train_epochs): self.writer.add_scalar("learning_rate/weights", self.optimizer.param_groups[0]["lr"], epoch) self.logger.info("Start to train for epoch %d" % (epoch)) self._training_step(model, train_loader, self.optimizer, epoch, info_for_logger="_train_step_") if val_loader is not None: self._training_step(model, val_loader, self.optimizer, epoch, info_for_logger="_train_step_") top1_avg = self._validate(model, test_loader, epoch) self.block_acc.append(top1_avg) if best_top1 < top1_avg: best_top1 = top1_avg self.logger.info("Best top1 acc by now. Save model") save(model, self.path_to_save_model)
def train_loop(self, train_loader, test_loader, model, fold): best_f1 = 0.0 for epoch in range(self.epochs): logging.info("Learning Rate: {:.4f}".format( self.optimizer.param_groups[0]["lr"])) logging.info("Start to train for epoch {}".format(epoch)) self._training_step(model, train_loader, epoch, info_for_logger="_train_step_") f1_avg, error_index = self.validate(model, test_loader, epoch) if best_f1 < f1_avg: logging.info("Best f1 score by now. Save model") best_f1 = f1_avg save( model, self.optimizer, self.CONFIG.path_to_save_model[:-4] + "_{}".format(fold) + self.CONFIG.path_to_save_model[-4:]) logging.info("The Best f1 score : {}".format(best_f1))
def search_train_loop(self, generator): self.epochs = self.warmup_epochs + self.search_epochs # Training generator best_loss = 10000.0 best_top1 = 0 tau = 5 for epoch in range(self.warmup_epochs, self.search_epochs): logging.info("Start to train for search epoch {}".format(epoch)) logging.info("Tau: {}".format(tau)) self._generator_training_step(generator, val_loader, epoch, tau, info_for_logger="_gen_train_step") # ================ Train ============================================ for i in range(): # Training generator arch_param, hardware_constraint = self.set_arch_param( generator, tau=tau) # ============== evaluation flops =============================== gen_flops = self.flops_table.predict_arch_param_efficiency( arch_param) hc_loss = cal_hc_loss(gen_flops.cuda(), hardware_constraint.item(), self.CONFIG.alpha, self.CONFIG.loss_penalty) # =============================================================== self.g_optimizer.zero_grad() # ============== predict top1 accuracy ========================== top1_avg = self.accuracy_predictor(arch_param) ce_loss = -1 * top1_avg # =============================================================== loss = ce_loss + hc_loss logging.info("HC loss : {}".format(hc_loss)) loss.backward() self.g_optimizer.step() self.g_optimizer.zero_grad() # ==================================================================== # ============== Valid =============================================== hardware_constraint, arch_param = self._get_arch_param( generator, hardware_constraint, valid=True) arch_param = self.calculate_one_hot(arch_param) arch_param, hardware_constraint = self.set_arch_param( generator, model, hardware_constraint=hardware_constraint, arch_param=arch_param) # ============== evaluation flops =============================== gen_flops = self.flops_table.predict_arch_param_efficiency( arch_param) hc_loss = cal_hc_loss(gen_flops.cuda(), hardware_constraint.item(), self.CONFIG.alpha, self.CONFIG.loss_penalty) # =============================================================== # ============== predict top1 accuracy ========================== top1_avg = self.accuracy_predictor(arch_param) logger.info("Valid : Top-1 avg : {}".format(top1_avg)) # =============================================================== # ==================================================================== # ============== Evaluate ============================================ total_loss = 0 evaluate_metric = {"gen_flops": [], "true_flops": []} for flops in range(self.CONFIG.low_macs, self.CONFIG.high_macs, 10): hardware_constraint = torch.tensor(flops, dtpye=torch.float32) hardware_constraint = hardware_constraint.view(-1, 1) hardware_constraint = hardware_constraint.to(self.device) normalize_hardware_constraint = min_max_normalize( self.CONFIG.high_macs, self.CONFIG.low_macs, hardware_constraint) noise = torch.randn(*self.backbone.shape) noise = noise.to(device) noise *= 0 arch_param = generator(self.backbone, normalize_hardware_constraint, noise) # ============== evaluation flops =============================== gen_flops = self.flops_table.predict_arch_param_efficiency( arch_param) hc_loss = cal_hc_loss(gen_flops.cuda(), hardware_constraint.item(), self.CONFIG.alpha, self.CONFIG.loss_penalty) # =============================================================== evaluate_metric["gen_flops"].append(gen_flops) evaluate_metric["true_flops"].append(flops) total_loss += hc_loss.item() kendall_tau, _ = stats.kendalltau(evaluate_metric["gen_flops"], evaluate_metric["true_flops"]) # ==================================================================== logging.info("Total loss : {}".format(total_loss)) if best_loss > total_loss: logging.info("Best loss by now: {} Tau : {}.Save model".format( total_loss, kendall_tau)) best_loss = total_loss save_generator_evaluate_metric( evaluate_metric, self.CONFIG.path_to_generator_eval) save(generator, self.g_optimizer, self.CONFIG.path_to_save_generator) if top1_avg > best_top1 and total_loss < 0.4: logging.info( "Best top1-avg by now: {}.Save model".format(top1_avg)) best_top1 = top1_avg save(generator, self.g_optimizer, self.CONFIG.path_to_best_avg_generator) save(generator, self.g_optimizer, "./logs/generator/{}.pth".format(total_loss)) tau *= self.CONFIG.tau_decay self.noise_weight = self.noise_weight * self.CONFIG.noise_decay if self.noise_weight > 0.0001 else 0 logging.info("Noise weight : {}".format(self.noise_weight)) logging.info("Best loss: {}".format(best_loss)) save(generator, self.g_optimizer, self.CONFIG.path_to_fianl_generator)
def main(): ''' if not torch.cuda.is_avaitargetsle(): logging.info('no gpu device avaitargetsle') sys.exit(1)''' np.random.seed(args.seed) if args.gpu == -1: device = torch.device('cpu') else: device = torch.device('cuda:{}'.format(args.gpu)) cudnn.benchmark = True # 为CPU设置种子用于生成随机数,以使得结果是确定的 torch.manual_seed(args.seed) cudnn.enabled=True logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss()#损失函数,交叉熵 criterion = criterion.to(device) model = Network(args.gpu,args.init_channels, dataset_classes, args.layers, criterion) model = model.to(device) logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD( model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) train_data = MyDataset(args=args, subset='train') valid_data = MyDataset(args=args, subset='valid') train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) f_arch = open(os.path.join(args.save, 'arch.txt'),'a') for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) #选出来α,把结构从连续的又变回离散的。 genotype = model.genotype() logging.info('genotype = %s', genotype) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) # training train_acc, train_obj, train_fscores, train_MIoU = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr) logging.info('train_acc %f _fscores %f _MIoU %f', train_acc, train_fscores, train_MIoU) # validation valid_acc, valid_obj, valid_fscores, valid_MIoU = infer(valid_queue, model, criterion) logging.info('valid_acc %f _fcores %f _MIoU %f', valid_acc, valid_fscores, valid_MIoU) utils.save(model, os.path.join(args.save, 'weights.pt')) f_arch.write(str(F.softmax(model.arch_parameters()[0],-1))) f_arch.close()
def train(label, phi, t_label, t_phi, cfg): # writer = SummaryWriter() train_label, validate_label, _, _ = train_test_split( label.label, test_size=cfg.tv_value, random_state=20, shuffle=True) train_dataset = ds.SnapshotDataset(phi, train_label) validate_dataset = ds.SnapshotDataset(phi, validate_label) t_dataset = ds.SnapshotDataset(t_phi, t_label) phi = phi.to(cfg.device) model = End2end(phi, cfg) print(sum(p.numel() for p in model.parameters() if p.requires_grad)) model = model.to(cfg.device) optimizer = util.get_optimizer(cfg.o_name, model, cfg.learning_rate) scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', 0.5, cfg.scheduler) loss_func = get_loss(cfg) # with writer as w: # dummy_x = torch.zeros_like(label[0].unsqueeze(0)) # dummy_y = torch.zeros_like(label[0, 0].unsqueeze(0)) # w.add_graph(model, (dummy_x, dummy_y, phi)) losses = [] val_losses = [] best_val_loss = 1 best_psnr = 0 accumulation_steps = cfg.poor tain_data_loader = DataLoader(train_dataset, batch_size=cfg.batch, shuffle=True, drop_last=True) validate_data_loader = DataLoader(validate_dataset, batch_size=math.floor(cfg.batch / 2), shuffle=False, drop_last=True) for ep in range(cfg.epoch): optimizer.zero_grad() for ep_i, batch in enumerate(train_data_loader): label, y = batch initial = y.repeat(args.frame, 1, 1, 1).permute(1, 0, 2, 3).mul( phi.cpu()).div(phi.cpu().sum(0) + 0.0001) initial = initial.to(cfg.device) y = y.to(cfg.device) label = label.to(cfg.device) model.train() layers, symmetric = model(initial, y, phi) net_output = layers[-1] loss = loss_func(layers, label, symmetric) loss.backward() if (ep_i + 1) % accumulation_steps == 0: print("ep", ep, "ep_i ", ep_i, "loss ", loss.item()) optimizer.step() optimizer.zero_grad() with torch.no_grad(): losses.append(loss.item()) val_loss = torch.zeros([1]) for v_ep_i, v_batch in enumerate(validate_data_loader): v_initial = v_y.repeat(args.frame, 1, 1, 1).permute( 1, 0, 2, 3).mul(phi.cpu()).div(phi.cpu().sum(0) + 0.0001) v_initial = v_initial.to(cfg.device) v_y = v_y.to(cfg.device) v_label = v_label.to(cfg.device) model.eval() v_layers, symmetric = model(v_initial, v_y, phi) net_output = v_layers[-1] val_loss += loss_func(v_layers, v_label, symmetric) scheduler.step(val_loss) val_losses.append(val_loss.item()) print("ep ", ep, "loss ", loss.item(), "val loss ", val_loss, "lr", optimizer.param_groups[0]['lr'], "time ", time()) if ep % cfg.store == 0: best_val_loss = val_loss best_img = np.clip(net_output.detach().cpu().numpy(), 0, 1).astype(np.float64) best_psnr = compare_psnr(v_label.cpu().numpy(), best_img) print("PSNR: ", np.round(best_psnr, 2)) util.save(model, best_psnr, best_img, v_label.cpu().numpy(), cfg) t_phi = t_phi.to(cfg.device) data_loader = DataLoader(t_dataset, batch_size=t_label.shape[0], shuffle=False) label, y = next(iter(data_loader)) initial = y.repeat(args.frame, 1, 1, 1).permute(1, 0, 2, 3).mul( t_phi.cpu()).div(t_phi.cpu().sum(0) + 0.0001) initial = initial.to(cfg.device) y = y.to(cfg.device) layers, _ = model(initial, y, t_phi) net_output = layers[-1].detach().cpu().numpy() psnr = compare_psnr(label.numpy(), np.clip(net_output, 0, 1).astype(np.float64)) return model, psnr, net_output
parser.add_argument('--batch', type=int, default=8) parser.add_argument('--phase', type=int, default=2) parser.add_argument('--share', type=bool, default=False) parser.add_argument('--poor', type=int, default=1) parser.add_argument('--scheduler', type=int, default=5) parser.add_argument('--tv_value', type=float, default=0.9) parser.add_argument('--store', type=int, default=20) args = parser.parse_args() if args.use_gpu: if args.device is None: args.device = util.getbestgpu() else: args.device = 'cpu' train_file, test_file, mask_file, _, _, _ = config.general(args.name) t_label, t_phi = ds.load_test_data(test_file, mask_file, False) if args.name == "Traffic": label, phi = ds.load_train_data(train_file, mask_file, False) else: label, phi = ds.load_train_data(train_file, mask_file, True) print(label.shape) start = time() model, psnr, reconstruction = train(label, phi, t_label, t_phi, args) end = time() t = end - start print("PSNR {}, Training Time: {}".format(psnr, t)) util.save(model, psnr, reconstruction, t_label.cpu().numpy(), args)
def search(args): logging.info('start load dataset') train_data, test_data, x_shape, class_num = get_src_dataset( args.data_path, args.name) x_shape[0] = args.batch_size search_loader, _, _ = get_search_loader(train_data, test_data, args.name, args.split, args.workers, args.batch_size) logging.info('dataset loaded') model = Network(args.name, x_shape, class_num, args) model = model.cuda() flop, param = get_model_infos(model, x_shape) logging.info('Params={:.2f} MB, FLOPs={:.2f} M'.format(param, flop)) w_optimizer, w_scheduler, criterion = get_opt_scheduler( model.get_weights(), args.base_optm, args.base_lr, args.base_decay, args.base_scheduler, args.epoch) criterion = criterion.cuda() if args.arch_optm == 'Adam': a_optimizer = optim.Adam(model.get_alphas(), args.arch_lr, weight_decay=args.arch_decay) else: raise ValueError logging.info('w-optimizer : {:}'.format(w_optimizer)) logging.info('a-optimizer : {:}'.format(a_optimizer)) logging.info('w-scheduler : {:}'.format(w_scheduler)) logging.info('criterion : {:}'.format(criterion)) logging.info('classifier:\n{:}'.format(model.classifier)) best_acc = 0 time_str = '' for epoch in range(1, args.epoch + 1): new_tau = args.max_tau - (args.max_tau - args.min_tau) * epoch / (args.epoch - 1) model.set_tau(new_tau) logging.info('epoch:{:} LR:{:.6f} tau:{:.6f} need time {:}'.format( epoch, w_scheduler.get_lr()[0], new_tau, time_str)) if args.name in ['cifar10', 'cifar100']: model.set_drop_path_prob(args.drop_path_prob * epoch / args.epoch) epoch_str = '[{:03d}/{:03d}]'.format(epoch, args.epoch) # A, B = model.show_alphas() # logging.info(A) # logging.info(B) s_time = time.time() base_top1, base_top5, base_loss, arch_top1, arch_top5, arch_loss = search_train( search_loader, model, criterion, w_optimizer, a_optimizer, epoch_str, args.print_frequency, args.grad_clip) batch_time = (time.time() - s_time) * (args.epoch - epoch) m, s = divmod(batch_time, 60) h, m = divmod(m, 60) time_str = "%d:%02d:%02d" % (h, m, s) train_str = 'train set - epoch:' + epoch_str + ' result Loss:' vla_str = ' val set - epoch:' + epoch_str + ' result Loss:' logging.info(train_str + '{:.6f} Pre@1 : {:.5f}% Pre@5:{:.5f}%'.format( base_loss, base_top1, base_top5)) logging.info(vla_str + '{:.6f} Pre@1 : {:.5f}% Pre@5:{:.5f}%'.format( arch_loss, arch_top1, arch_top5)) if arch_top1 > best_acc: best_acc = arch_top1 logging.info( 'find the best model. best acc is {:.5f}%'.format(best_acc)) logging.info('Save it to {:}'.format(args.save + 'best.pt')) save(model, os.path.join(args.save, 'best.pt')) model.get_genotype() w_scheduler.step() logging.info('best acc is {:.5f}%'.format(best_acc))
def train(args): logging.info('start load dataset') train_data, test_data, x_shape, class_num = get_src_dataset( args.data_path, args.name) _, train_loader, valid_loader = get_search_loader(train_data, test_data, args.name, args.split, args.workers, args.batch_size) logging.info('dataset loaded') model = Network(args.name, x_shape, class_num, args) model = model.cuda() flop, param = get_model_infos(model, x_shape) logging.info('Params={:.2f} MB, FLOPs={:.2f} M'.format(param, flop)) optimizer, scheduler, criterion = get_opt_scheduler( model.get_weights(), args.optimizer, args.lr, args.weight_decay, args.scheduler, args.epoch) criterion = criterion.cuda() logging.info('optimizer : {:}'.format(optimizer)) logging.info('scheduler : {:}'.format(scheduler)) logging.info('criterion : {:}'.format(criterion)) logging.info('classifier:\n{:}'.format(model.classifier)) need_time = AverageMeter() time_str = '' best_acc = 0 for epoch in range(1, args.epoch + 1): logging.info('epoch:{:} LR:{:.6f} need time {:}'.format( epoch, scheduler.get_lr()[0], time_str)) if args.name in ['cifar10', 'cifar100']: model.set_drop_path_prob(args.drop_path_prob * epoch / args.epoch) epoch_str = '[{:03d}/{:03d}]'.format(epoch, args.epoch) s_time = time.time() train_top1, train_top5, train_loss = model_train( train_loader, model, criterion, optimizer, epoch_str, args.print_frequency, args.grad_clip) val_top1, val_top5, val_loss = mode_val(valid_loader, model, criterion, epoch_str, args.print_frequency) need_time.update(time.time() - s_time) m, s = divmod(need_time.avg * (args.epoch - epoch), 60) h, m = divmod(m, 60) time_str = "%d:%02d:%02d" % (h, m, s) train_str = 'train set - epoch:' + epoch_str + ' result Loss:' vla_str = ' val set - epoch:' + epoch_str + ' result Loss:' logging.info(train_str + '{:.6f} Pre@1 : {:.5f}% Pre@5:{:.5f}%'.format( train_loss, train_top1, train_top5)) logging.info(vla_str + '{:.6f} Pre@1 : {:.5f}% Pre@5:{:.5f}%'.format( val_loss, val_top1, val_top5)) if val_top1 > best_acc: best_acc = val_top1 logging.info( 'find the best model. best acc is {:.5f}%'.format(best_acc)) logging.info('Save it to {:}'.format(args.save + 'best.pt')) save(model, os.path.join(args.save, 'best.pt')) scheduler.step() logging.info('best acc is {:.5f}%'.format(best_acc))
def save_global_eval_result_dict(self, global_result_dict): p = join(self.get_log_dir(), 'global_result_dict') print("in save_global_eval_result_dict") save(global_result_dict, p, print_msg=False)
#选出来α,把结构从连续的又变回离散的。 genotype = model.genotype() logging.info('genotype = %s', genotype) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) # training train_acc, train_obj, train_fscores, train_MIoU = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr) logging.info('train_acc %f _fscores %f _MIoU %f', train_acc, train_fscores, train_MIoU) # validation valid_acc, valid_obj, valid_fscores, valid_MIoU = infer(valid_queue, model, criterion) logging.info('valid_acc %f _fcores %f _MIoU %f', valid_acc, valid_fscores, valid_MIoU) utils.save(model, os.path.join(args.save, 'weights.pt')) f_arch.write(str(F.softmax(model.arch_parameters()[0],-1))) f_arch.close() def train(train_queue, valid_queue, model, architect, criterion, optimizer, lr): objs = utils.AvgrageMeter()# 用于保存loss的值 accs = utils.AvgrageMeter() MIoUs = utils.AvgrageMeter() fscores = utils.AvgrageMeter() # device = torch.device('cuda' if torch.cuda.is_avaitargetsle() else 'cpu') if args.gpu == -1: device = torch.device('cpu') else: device = torch.device('cuda:{}'.format(args.gpu)) for step, (input, target) in enumerate(train_queue):#每个step取出一个batch,batchsize是64(256个数据对)
ckpt = tf.train.get_checkpoint_state(snapshot_dir) if ckpt and ckpt.model_checkpoint_path: load(loader, sess, ckpt.model_checkpoint_path) else: print('No checkpoint file found.') else: load(loader, sess, snapshot_dir) print('Start training ...') _step, _loss, _summaries = 0, None, None while _step < iterations: try: _, _step, _g_loss, _p_psnr, _summaries = \ sess.run([g_train_op, g_step, g_loss, train_positive_psnr, summary_op]) if _step % 10 == 0: print('Iteration = {}, global loss = {:.6f}, positive psnr = {:.6f}'.format(_step, _g_loss, _p_psnr)) if _step % 100 == 0: summary_writer.add_summary(_summaries, global_step=_step) print('Save summaries...') if _step % model_save_freq == 0: save(saver, sess, snapshot_dir, _step) except tf.errors.OutOfRangeError: print('Finish successfully!') save(saver, sess, snapshot_dir, _step) break
def main(): seed = util.prepare(args) if not cuda.is_available(): logging.info('no gpu device available') sys.exit(1) CIFAR_CLASSES = 10 np.random.seed(seed) random.seed(seed) torch.manual_seed(seed) cuda.manual_seed(seed) cuda.set_device(args.gpu) cudnn.benchmark = False cudnn.deterministic = True logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) logging.info('hidden_layers:{:}'.format(args.hidden_layers)) logging.info('first_neurons:{:}'.format(args.first_neurons)) logging.info('change:{:}'.format(args.change)) logging.info('activate_func:{:}'.format(args.activate_func)) logging.info('opt:{:}'.format(args.opt)) logging.info('cross_link:{:}'.format(args.cross_link)) genotype = eval("genotypes.%s" % args.arch) model = Network(args.init_channels, CIFAR_CLASSES, args.layers, args.auxiliary, genotype, args) model = model.cuda() logging.info("param size = %fMB", util.count_parameters_in_MB(model)) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) train_transform, valid_transform = util.get_data_transforms_cifar10(args) train_data = datasets.CIFAR10(root=args.data, train=True, download=False, transform=train_transform) valid_data = datasets.CIFAR10(root=args.data, train=False, download=False, transform=valid_transform) train_queue = DataLoader(train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=1) valid_queue = DataLoader(valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=1) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, args.epochs) best_acc = 0 for epoch in range(args.epochs): logging.info('epoch %d lr %.6f', epoch, scheduler.get_lr()[0]) model.drop_path_prob = args.drop_path_prob * epoch / args.epochs epoch_str = '[{:03d}/{:03d}]'.format(epoch, args.epochs) train_acc, train_obj = train(train_queue, model, criterion, optimizer, epoch_str) logging.info('train_acc %.2f', train_acc) valid_acc, valid_obj = infer(valid_queue, model, criterion, epoch_str) logging.info('valid_acc %.2f', valid_acc) if valid_acc > best_acc: logging.info( 'find the best model. Save it to {:}'.format(args.save + 'best.pt')) util.save(model, os.path.join(args.save, 'best.pt')) best_acc = valid_acc scheduler.step() logging.info('best acc is {:}'.format(best_acc))
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion) model = model.cuda() logging.info("param size = %fMB", count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) train_transform, valid_transform = get_data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=2, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=1) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=2, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True, num_workers=1) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, int(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) save(model, os.path.join(args.save, 'weights.pt'))
def search_train_loop(self, train_loader, val_loader, test_loader, model, generator): self.epochs = self.warmup_epochs + self.search_epochs # Training supernet best_top1 = 0.0 for epoch in range( self.warmup_epochs ): # warmup_epochs表示训练supernet的论述,若为0跳过当前的loop直接进入到下面的generator训练环节 logging.info("Learning Rate: {:.4f}".format( self.optimizer.param_groups[0]["lr"])) self.writer.add_scalar("learning_rate/weights", self.optimizer.param_groups[0]["lr"], epoch) logging.info("Start to train for warmup epoch {}".format(epoch)) self._training_step(model, train_loader, epoch, info_for_logger="_train_step_") # 训一个step if self.CONFIG.lr_scheduler == "step": self.scheduler.step() top1_avg = self._validate(model, val_loader, epoch) if best_top1 < top1_avg: logging.info("Best top1 acc by now. Save model") best_top1 = top1_avg save(model, self.optimizer, self.CONFIG.path_to_save_model) # Training generator best_loss = 10000.0 best_top1 = 0 tau = 5 for epoch in range( self.warmup_epochs, self.search_epochs): # epoch计数从warmup开始到search_epochs结束 logging.info("Start to train for search epoch {}".format(epoch)) logging.info("Tau: {}".format(tau)) self._generator_training_step(generator, model, val_loader, epoch, tau, info_for_logger="_gen_train_step") top1_avg, _ = self.generator_validate( generator, model, val_loader, epoch, info_for_logger="_gen_val_step_", target_hardware_constraint=(self.CONFIG.low_flops + self.CONFIG.high_flops) / 2) evaluate_metric, total_loss, kendall_tau = evaluate_generator( generator, self.prior_pool, self.lookup_table, self.CONFIG, self.device) logging.info("Total loss : {}".format(total_loss)) if best_loss > total_loss: logging.info("Best loss by now: {} Tau : {}.Save model".format( total_loss, kendall_tau)) best_loss = total_loss save_generator_evaluate_metric( evaluate_metric, self.CONFIG.path_to_generator_eval) save(generator, self.g_optimizer, self.CONFIG.path_to_save_generator) if top1_avg > best_top1 and total_loss < 0.4: logging.info( "Best top1-avg by now: {}.Save model".format(top1_avg)) best_top1 = top1_avg save(generator, self.g_optimizer, self.CONFIG.path_to_best_avg_generator) tau *= self.CONFIG.tau_decay logging.info("Best loss: {}".format(best_loss)) save(generator, self.g_optimizer, self.CONFIG.path_to_fianl_generator)
def _save_conf_code(self): with open(join(self.logdir, 'config.py'), 'w') as f: f.write(extract_config_code()) p = join(self.get_log_dir(), 'FLAGS') print("in _save_conf_code") save({'FLAGS': FLAGS}, p, print_msg=False)
def main(args): arg = argparse.ArgumentParser( description='Separate on- and off-screen audio from a video') arg.add_argument('vid_file', type=str, help='Video file to process') arg.add_argument( '--duration_mult', type=float, default=None, help= 'Multiply the default duration of the audio (i.e. %f) by this amount. Should be a power of 2.' % sep_params.VidDur) arg.add_argument( '--mask', type=str, default=None, help= "set to 'l' or 'r' to visually mask the left/right half of the video before processing" ) arg.add_argument('--start', type=float, default=0., help='How many seconds into the video to start') arg.add_argument( '--model', type=str, default='full', help='Which variation of othe source separation model to run.') arg.add_argument('--gpu', type=int, default=0, help='Set to -1 for no GPU') arg.add_argument('--out', type=str, default=None, help='Directory to save videos') arg.add_argument('--cam', dest='cam', default=False, action='store_true') # undocumented/deprecated options arg.add_argument('--clip_dur', type=float, default=None) arg.add_argument('--duration', type=float, default=None) arg.add_argument('--fullres', type=bool, default=True) arg.add_argument('--suffix', type=str, default='') arg.add_argument('--max_full_height', type=int, default=600) arg = arg.parse_args(args) arg.fullres = arg.fullres or arg.cam if arg.gpu < 0: arg.gpu = None print 'Start time:', arg.start print 'GPU =', arg.gpu gpus = [arg.gpu] gpus = mu.set_gpus(gpus) if arg.duration_mult is not None: pr = sep_params.full() step = 0.001 * pr.frame_step_ms length = 0.001 * pr.frame_length_ms arg.clip_dur = length + step * (0.5 + pr.spec_len) * arg.duration_mult fn = getattr(sep_params, arg.model) pr = fn(vid_dur=arg.clip_dur) if arg.clip_dur is None: arg.clip_dur = pr.vid_dur pr.input_rms = np.sqrt(0.1**2 + 0.1**2) print 'Spectrogram samples:', pr.spec_len pr.model_path = '../results/nets/sep/%s/net.tf-%d' % (pr.name, pr.train_iters) if not os.path.exists(arg.vid_file): print 'Does not exist:', arg.vid_file sys.exit(1) if arg.duration is None: arg.duration = arg.clip_dur + 0.01 print arg.duration, arg.clip_dur full_dur = arg.duration step_dur = arg.clip_dur / 2. filled = np.zeros(int(np.ceil(full_dur * pr.samp_sr)), 'bool') full_samples_fg = np.zeros(filled.shape, 'float32') full_samples_bg = np.zeros(filled.shape, 'float32') full_samples_src = np.zeros(filled.shape, 'float32') arg.start = ut.make_mod(arg.start, (1. / pr.fps)) ts = np.arange(arg.start, arg.start + full_dur - arg.clip_dur, step_dur) full_ims = [None] * int(np.ceil(full_dur * pr.fps)) # Process each video chunk for t in ut.time_est(ts): t = ut.make_mod(t, (1. / pr.fps)) frame_start = int(t * pr.fps - arg.start * pr.fps) ret = run(arg.vid_file, t, arg.clip_dur, pr, gpus[0], mask=arg.mask, arg=arg) if ret is None: continue ims = ret['ims'] for frame, im in zip(xrange(frame_start, frame_start + len(ims)), ims): full_ims[frame] = im samples_fg = ret['samples_pred_fg'][:, 0] samples_bg = ret['samples_pred_bg'][:, 0] samples_src = ret['samples_src'][:, 0] samples_src = samples_src[:samples_bg.shape[0]] sample_start = int(round((t - arg.start) * pr.samp_sr)) n = samples_src.shape[0] inds = np.arange(sample_start, sample_start + n) ok = ~filled[inds] full_samples_fg[inds[ok]] = samples_fg[ok] full_samples_bg[inds[ok]] = samples_bg[ok] full_samples_src[inds[ok]] = samples_src[ok] filled[inds] = True full_samples_fg = np.clip(full_samples_fg, -1., 1.) full_samples_bg = np.clip(full_samples_bg, -1., 1.) full_samples_src = np.clip(full_samples_src, -1., 1.) full_ims = [x for x in full_ims if x is not None] table = [['start =', arg.start], 'fg:', imtable.Video(full_ims, pr.fps, Sound(full_samples_fg, pr.samp_sr)), 'bg:', imtable.Video(full_ims, pr.fps, Sound(full_samples_bg, pr.samp_sr)), 'src:', imtable.Video(full_ims, pr.fps, Sound(full_samples_src, pr.samp_sr))] # Write videos if arg.out is not None: ut.mkdir(arg.out) vid_s = arg.vid_file.split('/')[-1].split('.mp4')[0] mask_s = '' if arg.mask is None else '_%s' % arg.mask cam_s = '' if not arg.cam else '_cam' suffix_s = '' if arg.suffix == '' else '_%s' % arg.suffix name = '%s%s%s_%s' % (suffix_s, mask_s, cam_s, vid_s) def snd(x): x = Sound(x, pr.samp_sr) x.samples = np.clip(x.samples, -1., 1.) return x print 'Writing to:', arg.out ut.save(pj(arg.out, 'ret%s.pk' % name), ret) ut.make_video(full_ims, pr.fps, pj(arg.out, 'fg%s.mp4' % name), snd(full_samples_fg)) ut.make_video(full_ims, pr.fps, pj(arg.out, 'bg%s.mp4' % name), snd(full_samples_bg)) ut.make_video(full_ims, pr.fps, pj(arg.out, 'src%s.mp4' % name), snd(full_samples_src)) else: print 'Not writing, since --out was not set' print 'Video results:' ig.show(table) return 'fg%s.mp4' % name, 'bg%s.mp4' % name
architecture_num = test_data["architecture_num"][i+250] y = test_data["avg"][i+250] adj_matrix = adj_matrix_table.iloc[architecture_num].values adj_matrix = adj_matrix.reshape(nodes_num, nodes_num) X = get_input_data(adj_matrix) edge_index = get_edge_index(adj_matrix) X = wrap_data(X) y = wrap_data([y]) edge_index = wrap_data(edge_index, dtype=torch.long) outs = model(X, edge_index) loss = criterion(outs, y) test_loss += loss test_metric["architecture_num"].append(i+250) test_metric["predict_avg"].append(outs.item()) test_metric["avg"].append(y.item()) test_loss /= 50 if best_loss > test_loss.item(): save(model, "gcn_weight.pth") print(test_loss.item()) df_metric = pd.DataFrame(test_metric) df_metric.to_csv("./test.csv", index=False) best_loss = test_loss.item()
def save_graph_embeddings_mat(self, init_x, id_map, gs_map): assert (init_x.shape[0] == len(gs_map)) p = join(self.get_log_dir(), "graph_embeddings") save({"init_x": init_x, "id_map": id_map, "gs_map": gs_map}, p)