def upload_yaml(): project=get_project() calc_params = get_calc_params(project) sheet_name=calc_params.sheet_name yaml_data = request.get_json()["yaml"] yaml_title = request.get_json()["title"] save_yaml(project, yaml_data, calc_params.data_path, sheet_name, yaml_title) response=dict(project=get_project_dict(project)) return response, 200
def load_run_experiment_and_save(filename): clf = SVC(C=1, gamma=0.001, kernel='rbf', random_state=0) audio = load_audio(filename) evolution = refit_from_best(clf, audio) exp_filename = 'data/experiments/' + filename.split('/')[-1] exp_filename = exp_filename.replace('.wav', '.yaml') save_yaml(exp_filename, evolution) return evolution
def sample_corners(self): self.corners = [] for i in range(4): raw_input( "============ Move to no.{} corner and press ENTER.".format(i+1)) pose = self.controller.pose_state print(pose) self.corners.append(pose) # save sampled corners to yaml utils.save_yaml(self.yaml, self.corners) print("Sampling completed.")
def run(params, log_dir=None): if log_dir is None: log_dir = params["main_params"]["log_dir"] else: params["main_params"]["log_dir"] = log_dir os.makedirs(log_dir, exist_ok=True) save_yaml(params, out_file=os.path.join(log_dir, "init_params.yml")) initialize(params) print("loading data") datasets = get_loaders(params) models = get_models(params, datasets) runners = get_runners(params, datasets, models) for runner in runners: runner()
def main(config, dset_config): root_dir = Path(increment_path(os.path.join(config.result_dir, "runs"))) Path(os.path.join(root_dir, "weights")).mkdir(parents=True, exist_ok=True) config.result_dir = root_dir log = setup_logger.setFileHandler( filename=os.path.join(root_dir, "log.txt")) save_yaml(config) save_hostname(config) dt_now = datetime.datetime.now() logger.info(f"\n Start: {dt_now.strftime('%Y年%m月%d日 %H:%M:%S')}") dset = get_dataset(config, dset_config, mode="train") valid_dset = get_dataset(config, dset_config, mode="valid") model = get_model(config, dset_config) trainer = get_trainer(config, dset_config) trainer.train(dataset=dset, valid_dataset=valid_dset, model=model) trainer.save()
def optimize(trial: optuna.Trial, model_path, config_path): optimizer = Optimizer(trial) run_config = utils.load_yaml(config_path) mdl_config = utils.load_yaml(model_path) run_config = optimizer.optimize_config(run_config) mdl_config = optimizer.optimize_model(mdl_config) shell = utils.ShellUtils() shell.mkdir("optimize-debug", silent=True) utils.save_yaml(mdl_config, "optimize-debug/model.yml") utils.save_json(run_config, "optimize-debug/run.json") run_path, mdl_path = tempfile.mktemp(), tempfile.mktemp() timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") save_dir = (pathlib.Path(__file__).absolute().parent.joinpath( f"out/woz/{timestamp}")) run_config["save-dir"] = str(save_dir) run_config["model-path"] = mdl_path utils.save_json(run_config, run_path) utils.save_json(mdl_config, mdl_path) retcode, stdout, stderr = utils.Process( args=f"python run.py @load {run_path}".split(), cwd=pathlib.Path(__file__).absolute().parent, print_stdout=True, print_stderr=True).run() if retcode: raise RuntimeError(f"process 'run.py' failed; " f"return code: {retcode}; stderr: {stderr}") shell.remove(run_path, silent=True) shell.remove(mdl_path, silent=True) gen_dirs = list(save_dir.glob("gen-*")) if not gen_dirs: raise RuntimeError(f"no generation directory detected") if len(gen_dirs) > 1: warnings.warn(f"more than 1 generation " f"directories detected: {gen_dirs}") gen_dir = gen_dirs[-1] ttest_results = utils.load_json(gen_dir.joinpath("ttest-results.json")) return -ttest_results["hmean"]["t"]
def train_and_evaluate(cfg, dloader_train, dloader_val, dloader_test, device, writer, experiment_dir): if cfg.opt == 'adam': optimizer = optim.Adam( model.parameters(), lr=float(cfg.lr) ) #, momentum=float(cfg.momentum), weight_decay=5e-4, nesterov=True) elif cfg.opt == 'sgd': optimizer = optim.SGD(model.parameters(), lr=float(cfg.lr), momentum=float(cfg.momentum), weight_decay=5e-4, nesterov=True) if cfg.scheduler: scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[60, 120, 160, 200], gamma=0.2) else: scheduler = None criterion = nn.CrossEntropyLoss() global iter_cnt iter_cnt = 0 best_loss = 1000 for epoch in range(cfg.num_epochs): # print('\nTrain for Epoch: {}/{}'.format(epoch,cfg.num_epochs)) logging.info('\nTrain for Epoch: {}/{}'.format(epoch, cfg.num_epochs)) train_loss, train_acc = train(epoch, model, device, dloader_train, optimizer, scheduler, criterion, experiment_dir, writer) logging.info( 'Train Epoch: {} Avg Loss: {:.4f} \t Avg Acc: {:.4f}'.format( epoch, train_loss, train_acc)) # validate after every epoch # print('\nValidate for Epoch: {}/{}'.format(epoch,cfg.num_epochs)) logging.info('\nValidate for Epoch: {}/{}'.format( epoch, cfg.num_epochs)) val_loss, val_acc = validate(epoch, model, device, dloader_val, criterion, experiment_dir, writer) logging.info( 'Val Epoch: {} Avg Loss: {:.4f} \t Avg Acc: {:.4f}'.format( epoch, val_loss, val_acc)) # for name, weight in model.named_parameters(): # writer.add_histogram(name,weight, epoch) # writer.add_histogram(f'{name}.grad',weight.grad, epoch) is_best = val_loss < best_loss best_loss = min(val_loss, best_loss) if epoch % cfg.save_intermediate_weights == 0 or is_best: utils.save_checkpoint({'Epoch': epoch,'state_dict': model.state_dict(), 'optim_dict' : optimizer.state_dict()}, is_best, experiment_dir, checkpoint='{}_epoch{}_checkpoint.pth'.format( cfg.network.lower(),str(epoch)),\ best_model='{}_best.pth'.format(cfg.network.lower()) ) # print('\nEvaluate on test') logging.info('\nEvaluate test result on best ckpt') state_dict = torch.load(os.path.join(experiment_dir,'{}_best.pth'.format(cfg.network.lower())),\ map_location=device) model.load_state_dict(state_dict['state_dict'], strict=False) test_loss, test_acc = test(model, device, dloader_test, criterion, experiment_dir) logging.info('Test: Avg Loss: {:.4f} \t Avg Acc: {:.4f}'.format( test_loss, test_acc)) writer.add_text('test performance on best ckpt', 'test_loss {}; test_acc {}'.format(test_loss, test_acc)) writer.close() # save the configuration file within that experiment directory utils.save_yaml(cfg, save_path=os.path.join(experiment_dir, 'config_linear.yaml')) logging.info('-----------End of Experiment------------')
def main(): config = utils.load_yaml(args.config) task = config['task'] EPOCHS = config['epoch'] N_FOLDS = 5 BATCH_SIZE = config['batchsize'] IMAGE_SIZE = config['image_size'] model_name = config['model'] optimizer_name = config['optimizer'] loss = config['loss'] lr = float(config['lr']) n_class = config['n_class'] lr_scheduler = config.get('lr_scheduler') azure_run = None tb_writer = None num_workers = 64 experiment_name = datetime.strftime(datetime.now(), '%Y%m%d%H%M%S') print(f'found {torch.cuda.device_count()} gpus !!') try: if args.debug: print('running in debug mode') EPOCHS = 1 N_FOLDS = 2 if args.debug: result_dir = Path(utils.RESULT_DIR) / ('debug-' + experiment_name) else: result_dir = Path(utils.RESULT_DIR) / experiment_name ws = Workspace.from_config('.aml_config/config.json') exp = Experiment(workspace=ws, name='kaggle-aptos2019') azure_run = exp.start_logging() azure_run.log('experiment name', experiment_name) azure_run.log('epoch', EPOCHS) azure_run.log('batch size', BATCH_SIZE) azure_run.log('image size', IMAGE_SIZE) azure_run.log('model', model_name) azure_run.log('optimizer', optimizer_name) azure_run.log('loss_name', loss['name']) azure_run.log('lr', lr) azure_run.log('lr_scheduler', lr_scheduler) azure_run.log('task', task) if args.cv: azure_run.log('cv', N_FOLDS) else: azure_run.log('cv', 0) if args.multi: print('use multi gpu !!') os.mkdir(result_dir) print(f'created: {result_dir}') utils.save_yaml(result_dir / Path(args.config).name, config) # if not args.debug: # tb_writer = SummaryWriter(log_dir=result_dir) device = torch.device("cuda:0") config = { 'epochs': EPOCHS, 'multi': args.multi, 'batch_size': BATCH_SIZE, 'image_size': IMAGE_SIZE, 'model_name': model_name, 'n_class': n_class, 'optimizer_name': optimizer_name, 'loss': loss, 'lr': lr, 'lr_scheduler': lr_scheduler, 'task': task, 'device': device, 'num_workers': num_workers, } print(config) if not args.debug: slack.notify_start(experiment_name, config) train_df = pd.read_csv(utils.TRAIN_CSV_PATH) if args.debug: train_df = train_df[:1000] config['df'] = train_df skf = StratifiedKFold(n_splits=N_FOLDS, random_state=41, shuffle=True) indices = list(skf.split(train_df, train_df['diagnosis'])) if not args.cv: print('do not use cross validation') indices = [indices[0]] # cross validation oof_preds = np.zeros((len(train_df), n_class)) for i_fold, (train_index, valid_index) in tqdm(enumerate(indices)): model_path = result_dir / f'model_fold{i_fold}' config['train_index'] = train_index config['valid_index'] = valid_index config['model_path'] = str(model_path) if azure_run: if i_fold == 0: config['azure_run'] = azure_run y_pred, y_true = utils.run_model(**config) else: with azure_run.child_run() as child: config['azure_run'] = child y_pred, y_true = utils.run_model(**config) else: y_pred, y_true = utils.run_model(**config) if args.cv: oof_preds[valid_index] = y_pred if args.cv: valid_preds = oof_preds valid_true = train_df['diagnosis'] else: valid_preds = y_pred valid_true = y_true if task == 'class': round_valid_preds = np.argmax(valid_preds, axis=1) elif task == 'reg': print('optimizing threshold ...') optR = utils.OptimizedRounder() optR.fit(valid_preds, valid_true) coef = optR.coefficients() print(f'best coef: {coef}') if azure_run: azure_run.log('coef', coef) round_valid_preds = optR.predict(valid_preds, coef) val_kappa = cohen_kappa_score(round_valid_preds, valid_true, weights='quadratic') print(f'best val kappa: {val_kappa}') if azure_run: azure_run.log('best val kappa', val_kappa) test_csv = pd.read_csv(utils.TEST_CSV_PATH) #test_tfms = utils.build_transform(size=IMAGE_SIZE, mode='test') test_tfms = utils.build_transform(size=IMAGE_SIZE, mode='val') test_dataset = RetinopathyDataset(df=test_csv, mode='test', transform=test_tfms, auto_crop=True, add_blur=True) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, pin_memory=True, num_workers=num_workers) test_preds = np.zeros((len(test_csv), n_class)) for i in range(len(indices)): model = utils.load_pytorch_model(model_name, result_dir / f'model_fold{i}', n_class) test_preds += utils.predict(model, test_loader, n_class=n_class, device=device, tta=1) test_preds /= len(indices) if task == 'class': round_test_preds = np.argmax(test_preds, axis=1) elif task == 'reg': round_test_preds = optR.predict(test_preds, coef) submission_csv = pd.read_csv(utils.SAMPLE_SUBMISSION_PATH) submission_csv['diagnosis'] = round_test_preds submission_csv.to_csv(result_dir / 'submission.csv', index=False) print('finish!!!') if not args.debug: slack.notify_finish(experiment_name, config, val_kappa) except KeyboardInterrupt as e: if not args.debug: slack.notify_fail(experiment_name, config, e.__class__.__name__, str(e)) except Exception as e: if azure_run: azure_run.fail(e) if not args.debug: slack.notify_fail(experiment_name, config, e.__class__.__name__, str(e)) raise finally: if azure_run: azure_run.complete() print('close azure_run') if tb_writer: tb_writer.export_scalars_to_json( os.path.join(result_dir, 'all_scalars.json')) tb_writer.close() print('close tb_writer')
"scale_limit": 0.15, "rotate_limit": 10, "p": 0.5 }, "RandomBrightnessContrast": { "p": 0.5 } } } flags = Flags().update(flags_dict) debug = flags.debug outdir = Path(flags.outdir) os.makedirs(str(outdir), exist_ok=True) flags_dict = dataclasses.asdict(flags) save_yaml(outdir / "flags.yaml", flags_dict) inputdir = Path("./") datadir = inputdir / "vinbigdata-chest-xray-abnormalities-detection" imgdir = inputdir / flags.imgdir_name train_df = pd.read_csv(datadir / "train.csv") train = train_df train_data_type = flags.train_data_type if flags.use_class14: thing_classes.append("No finding") split_mode = flags.split_mode if split_mode == "all_train": DatasetCatalog.register(
default= "/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/code/fMRI/template.yml", help= "Path to the yaml containing the parameters of the script execution.") args = parser.parse_args() parameters = read_yaml(args.yaml_file) input_path = parameters['input'] output_path_ = parameters['output'] subject = get_subject_name(parameters['subject']) output_path = get_output_name(output_path_, parameters['language'], subject, parameters['model_name']) logs = Logger( get_output_name(output_path_, parameters['language'], subject, parameters['model_name'], 'logs.txt')) save_yaml(parameters, output_path + 'config.yml') logs.info("Fetching maskers...", end='\n') kwargs = { 'detrend': parameters['detrend'], 'standardize': parameters['standardize'], 'high_pass': parameters['high_pass'], 'low_pass': parameters['low_pass'], 'mask_strategy': parameters['mask_strategy'], #'dtype': parameters['dtype'], 'memory_level': parameters['memory_level'], 'smoothing_fwhm': parameters['smoothing_fwhm'], 'verbose': parameters['verbose'], 't_r': parameters['tr'] } masker = fetch_masker(parameters['masker_path'],
def train(args): TIME = args.datasets_folder + '_' + datetime.datetime.now().strftime( "%Y%m%d-%H%M") DeepCAD_model_path = args.DeepCAD_model_folder + '//' + 'pb_unet3d_' + TIME + '//' if not os.path.exists(args.DeepCAD_model_folder): os.mkdir(args.DeepCAD_model_folder) if not os.path.exists(DeepCAD_model_path): os.mkdir(DeepCAD_model_path) yaml_name = DeepCAD_model_path + '//para.yaml' save_yaml(args, yaml_name) results_path = args.results_folder + '//' + 'unet3d_' + TIME + '//' if not os.path.exists(args.results_folder): os.mkdir(args.results_folder) if not os.path.exists(results_path): os.mkdir(results_path) name_list, noise_img, coordinate_list = train_preprocess_lessMemoryMulStacks( args) data_size = len(name_list) sess = tf.Session() input_shape = [1, args.img_h, args.img_w, args.img_s, args.img_c] input = tf.placeholder(tf.float32, shape=input_shape, name='input') # output = tf.placeholder(tf.float32, shape=input_shape, name='output') output_GT = tf.placeholder(tf.float32, shape=input_shape, name='output_GT') # net = Network(training = args.is_training) output = autoencoder(input, height=args.img_h, width=args.img_w, length=args.img_s) L2_loss = tf.reduce_mean(tf.square(output - output_GT)) L1_loss = tf.reduce_sum(tf.losses.absolute_difference(output, output_GT)) loss = tf.add(L1_loss, L2_loss) optimizer = tf.train.AdamOptimizer(learning_rate=args.lr) train_step = optimizer.minimize(loss) start_time = time.time() with sess.as_default(): sess.run(tf.global_variables_initializer()) for i in range(args.train_epochs): name_list = shuffle_datasets_lessMemory(name_list) for index in range(data_size): single_coordinate = coordinate_list[name_list[index]] init_h = single_coordinate['init_h'] end_h = single_coordinate['end_h'] init_w = single_coordinate['init_w'] end_w = single_coordinate['end_w'] init_s = single_coordinate['init_s'] end_s = single_coordinate['end_s'] noise_patch1 = noise_img[init_s:end_s:2, init_h:end_h, init_w:end_w] noise_patch2 = noise_img[init_s + 1:end_s:2, init_h:end_h, init_w:end_w] train_input = np.expand_dims( np.expand_dims(noise_patch1.transpose(1, 2, 0), 3), 0) train_GT = np.expand_dims( np.expand_dims(noise_patch2.transpose(1, 2, 0), 3), 0) # print(train_input.shape) data_name = name_list[index] sess.run(train_step, feed_dict={ input: train_input, output_GT: train_GT }) if index % 100 == 0: output_img, L1_loss_va, L2_loss_va = sess.run( [output, L1_loss, L2_loss], feed_dict={ input: train_input, output_GT: train_GT }) print('--- Epoch ', i, ' --- Step ', index, '/', data_size, ' --- L1_loss ', L1_loss_va, ' --- L2_loss ', L2_loss_va, ' --- Time ', (time.time() - start_time)) print('train_input ---> ', train_input.max(), '---> ', train_input.min()) print('output_img ---> ', output_img.max(), '---> ', output_img.min()) train_input = train_input.squeeze().astype( np.float32) * args.normalize_factor train_GT = train_GT.squeeze().astype( np.float32) * args.normalize_factor output_img = output_img.squeeze().astype( np.float32) * args.normalize_factor train_input = np.clip(train_input, 0, 65535).astype('uint16') train_GT = np.clip(train_GT, 0, 65535).astype('uint16') output_img = np.clip(output_img, 0, 65535).astype('uint16') result_name = results_path + str(i) + '_' + str( index) + '_' + data_name + '_output.tif' noise_img1_name = results_path + str(i) + '_' + str( index) + '_' + data_name + '_noise1.tif' noise_img2_name = results_path + str(i) + '_' + str( index) + '_' + data_name + '_noise2.tif' io.imsave(result_name, output_img.transpose(2, 0, 1)) io.imsave(noise_img1_name, train_input.transpose(2, 0, 1)) io.imsave(noise_img2_name, train_GT.transpose(2, 0, 1)) ''' variable_names = [v.name for v in tf.trainable_variables()] values = sess.run(variable_names) for k,v in zip(variable_names, values): if len(v.shape)==5: print("Variable: ", k, "Shape: ", v.shape,"value: ",v[0][0][0][0][0]) if len(v.shape)==1: print("Variable: ", k, "Shape: ", v.shape,"value: ",v[0]) ''' ''' aaaaa=0 for op in tf.get_default_graph().get_operations(): aaaaa=aaaaa+1 if aaaaa<50: # print('-----> ',op.name) print('-----> ',op.values()) ''' if index % 1000 == 0: DeepCAD_model_name = DeepCAD_model_path + '//' + str( i) + '_' + str(index) + '//' builder = tf.saved_model.builder.SavedModelBuilder( DeepCAD_model_name) input0 = { 'input0': tf.saved_model.utils.build_tensor_info(input) } output0 = { 'output0': tf.saved_model.utils.build_tensor_info(output) } method_name = tf.saved_model.signature_constants.PREDICT_METHOD_NAME my_signature = tf.saved_model.signature_def_utils.build_signature_def( input0, output0, method_name) builder.add_meta_graph_and_variables( sess, ["3D_N2N"], signature_def_map={'my_signature': my_signature}) builder.add_meta_graph( ["3D_N2N"], signature_def_map={'my_signature': my_signature}) builder.save()
def train_and_evaluate(cfg): #Training settings experiment_dir = os.path.join('experiments', cfg.exp_type, cfg.save_dir) if not os.path.exists(experiment_dir): os.makedirs(experiment_dir) utils.set_logger(os.path.join(experiment_dir, cfg.log)) logging.info('-----------Starting Experiment------------') use_cuda = cfg.use_cuda and torch.cuda.is_available() cfg.use_cuda = use_cuda device = torch.device( "cuda:{}".format(cfg.cuda_num) if use_cuda else "cpu") # initialize the tensorbiard summary writer writer = SummaryWriter(experiment_dir + '/tboard') ## get the dataloaders dloader_train, dloader_val, dloader_test = dataloaders.get_dataloaders( cfg, val_split=.2) # Load the model model = models.get_model(cfg) if cfg.ssl_pretrained_exp_path: ssl_exp_dir = experiment_dir = os.path.join('experiments',\ 'self-supervised',cfg.ssl_pretrained_exp_path) state_dict = torch.load(os.path.join(ssl_exp_dir,cfg.ssl_weight),\ map_location=device) # the stored dict has 3 informations - epoch,state_dict and optimizer state_dict = state_dict['state_dict'] del state_dict['fc.weight'] del state_dict['fc.bias'] model.load_state_dict(state_dict, strict=False) # Only finetune fc layer for name, param in model.named_parameters(): if 'fc' not in name: param.requires_grad = False model = model.to(device) images, _, _, _ = next(iter(dloader_train)) images = images.to(device) writer.add_graph(model, images) # follow the same setting as RotNet paper optimizer = optim.SGD(model.parameters(), lr=float(cfg.lr), momentum=float(cfg.momentum), weight_decay=5e-4, nesterov=True) if cfg.scheduler: scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[60, 120, 160, 200], gamma=0.2) else: scheduler = None criterion = nn.CrossEntropyLoss() best_loss = 1000 for epoch in range(cfg.num_epochs + 1): # print('\nTrain for Epoch: {}/{}'.format(epoch,cfg.num_epochs)) logging.info('\nTrain for Epoch: {}/{}'.format(epoch, cfg.num_epochs)) train_loss, train_acc = train(epoch, model, device, dloader_train, optimizer, scheduler, criterion, experiment_dir, writer) # validate after every epoch # print('\nValidate for Epoch: {}/{}'.format(epoch,cfg.num_epochs)) logging.info('\nValidate for Epoch: {}/{}'.format( epoch, cfg.num_epochs)) val_loss, val_acc = validate(epoch, model, device, dloader_val, criterion, experiment_dir, writer) logging.info( 'Val Epoch: {} Avg Loss: {:.4f} \t Avg Acc: {:.4f}'.format( epoch + 1, val_loss, val_acc)) is_best = val_loss < best_loss best_loss = min(val_loss, best_loss) if epoch % cfg.save_intermediate_weights == 0: utils.save_checkpoint({'Epoch': epoch,'state_dict': model.state_dict(), 'optim_dict' : optimizer.state_dict()}, is_best, experiment_dir, checkpoint='{}_{}rot_epoch{}_checkpoint.pth'.format( cfg.network.lower(), str(cfg.num_rot),str(epoch)),\ best_model='{}_{}rot_epoch{}_best.pth'.format(cfg.network.lower(), str(cfg.num_rot),str(epoch)) ) writer.close() # print('\nEvaluate on test') logging.info('\nEvaluate on test') test_loss, test_acc = test(model, device, dloader_test, criterion, experiment_dir) logging.info('Test: Avg Loss: {:.4f} \t Avg Acc: {:.4f}'.format( test_loss, test_acc)) # save the configuration file within that experiment directory utils.save_yaml(cfg, save_path=os.path.join(experiment_dir, 'config_sl.yaml')) logging.info('-----------End of Experiment------------')
def train_and_evaluate(cfg): #Training settings experiment_dir = os.path.join('experiments',cfg.exp_type,cfg.save_dir) if not os.path.exists(experiment_dir): os.makedirs(experiment_dir) utils.set_logger(os.path.join(experiment_dir,cfg.log)) logging.info('-----------Starting Experiment------------') use_cuda = cfg.use_cuda and torch.cuda.is_available() cfg.use_cuda=use_cuda device = torch.device("cuda:{}".format(cfg.cuda_num) if use_cuda else "cpu") # initialize the tensorbiard summary writer #writer = SummaryWriter(experiment_dir + '/tboard' ) logs=os.path.join('experiments',cfg.exp_type,'tboard_sup_demo') writer = SummaryWriter(logs + '/rotnet_without_pretrain' ) ## get the dataloaders dloader_train,dloader_val,dloader_test = dataloaders.get_dataloaders(cfg) # Load the model model = models.get_model(cfg) # for name, param in model.named_parameters(): # param.requires_grad = False # print(name) # model.avgpool=nn.AdaptiveAvgPool2d(output_size=(1, 1)) #model.fc=nn.Linear(in_features=512, out_features=5, bias=True) if cfg.use_pretrained: pretrained_path = os.path.join('experiments','supervised',cfg.pretrained_dir,cfg.pretrained_weights) state_dict = torch.load(pretrained_path,map_location=device) model.load_state_dict(state_dict, strict=False) logging.info('loading pretrained_weights {}'.format(cfg.pretrained_weights)) if cfg.use_ssl: ssl_exp_dir = os.path.join('experiments',\ 'self-supervised',cfg.ssl_pretrained_exp_path) state_dict = torch.load(os.path.join(ssl_exp_dir,cfg.ssl_weight),\ map_location=device) # the stored dict has 3 informations - epoch,state_dict and optimizer state_dict=state_dict['state_dict'] print(state_dict.keys()) del state_dict['fc.weight'] del state_dict['fc.bias'] del state_dict['layer4.0.conv1.weight'] del state_dict['layer4.0.conv2.weight'] del state_dict['layer4.1.conv1.weight'] del state_dict['layer4.1.conv2.weight'] del state_dict['layer3.0.conv1.weight'] del state_dict['layer3.0.conv2.weight'] del state_dict['layer3.1.conv1.weight'] del state_dict['layer3.1.conv2.weight'] #del state_dict['layer2.0.conv1.weight'] #del state_dict['layer2.0.conv2.weight'] #del state_dict['layer2.1.conv1.weight'] #del state_dict['layer2.1.conv2.weight'] model.load_state_dict(state_dict, strict=False) # Only finetune fc layer #layers_list=['fc','avgpool','layer3.0.conv']#,'layer3.1.conv','layer4.0.conv','layer4.1.conv'] #params_update=[] for name, param in model.named_parameters(): #for l in layers_list: if 'fc' or 'layer3.0.conv' or 'layer3.1.conv' or'layer4.0.conv' or 'layer4.1.conv' in name: param.requires_grad = True ### print(name) else: param.requires_grad = False # print(name) # params_update.append(param) # print(param.requires_grad) model = model.to(device) images,_ ,_,_ = next(iter(dloader_train)) images = images.to(device) writer.add_graph(model, images) # follow the same setting as RotNet paper #model.parameters() if cfg.opt=='sgd': optimizer = optim.SGD(model.parameters(), lr=float(cfg.lr), momentum=float(cfg.momentum), weight_decay=5e-4, nesterov=True) elif cfg.opt=='adam': optimizer = optim.Adam(model.parameters(), lr=float(cfg.lr))#, momentum=float(cfg.momentum), weight_decay=5e-4, nesterov=True) if cfg.scheduler: scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[60, 120, 160, 200], gamma=0.2) else: scheduler=None criterion = nn.CrossEntropyLoss() global iter_cnt iter_cnt=0 best_loss = 1000 for epoch in range(cfg.num_epochs): # print('\nTrain for Epoch: {}/{}'.format(epoch,cfg.num_epochs)) logging.info('\nTrain for Epoch: {}/{}'.format(epoch,cfg.num_epochs)) train_loss,train_acc = train(epoch, model, device, dloader_train, optimizer, scheduler, criterion, experiment_dir, writer) # validate after every epoch # print('\nValidate for Epoch: {}/{}'.format(epoch,cfg.num_epochs)) logging.info('\nValidate for Epoch: {}/{}'.format(epoch,cfg.num_epochs)) val_loss,val_acc = validate(epoch, model, device, dloader_val, criterion, experiment_dir, writer) logging.info('Val Epoch: {} Avg Loss: {:.4f} \t Avg Acc: {:.4f}'.format(epoch, val_loss, val_acc)) # for name, weight in model.named_parameters(): # writer.add_histogram(name,weight, epoch) # writer.add_histogram(f'{name}.grad',weight.grad, epoch) is_best = val_loss < best_loss best_loss = min(val_loss, best_loss) if epoch % cfg.save_intermediate_weights==0 or is_best: utils.save_checkpoint({'Epoch': epoch,'state_dict': model.state_dict(), 'optim_dict' : optimizer.state_dict()}, is_best, experiment_dir, checkpoint='{}_epoch{}_checkpoint.pth'.format( cfg.network.lower(),str(epoch)),\ best_model='{}_best.pth'.format(cfg.network.lower()) ) writer.close() # print('\nEvaluate on test') logging.info('\nEvaluate test result on best ckpt') state_dict = torch.load(os.path.join(experiment_dir,'{}_best.pth'.format(cfg.network.lower())),\ map_location=device) model.load_state_dict(state_dict, strict=False) test_loss,test_acc = test(model, device, dloader_test, criterion, experiment_dir) logging.info('Test: Avg Loss: {:.4f} \t Avg Acc: {:.4f}'.format(test_loss, test_acc)) # save the configuration file within that experiment directory utils.save_yaml(cfg,save_path=os.path.join(experiment_dir,'config_sl.yaml')) logging.info('-----------End of Experiment------------')
if __name__ == '__main__': parser = argparse.ArgumentParser( description="Fine-tune a RoBerta model for a specific NLP task.") parser.add_argument( '--yaml_file', type=str, help='''Path to the yaml file containing additional information on how the dataset is structured.''' ) args = parser.parse_args() # Fetch parameters parameters = read_yaml(args.yaml_file) check_folder(parameters['output_dir']) save_yaml(parameters, os.path.join(parameters['output_dir'], 'config.yml')) logging.basicConfig(filename=os.path.join(parameters['output_dir'], parameters['log_file']), filemode='w+', level=logging.INFO) logging.info("Parameters fetched.") logging.info("Setting seed for reproductibility...") set_seed(parameters['seed']) logging.info("\tDone.") logging.info("Set and retrieve the device on which to run...") device = get_device() task = parameters['task'].lower() logging.info("\tDone.")
def train_eval(config, exp_path): dataset = MarkerExpressionDataset(config) if dataset.data_clean is not None: with open(os.path.join(exp_path, 'dirty_data.txt'), 'w') as f: f.write('---data clean method: %s---\n' % dataset.data_clean) for marker, item in dataset.outlier_samples.items(): f.write('marker %s:\n' % marker) for class_id in dataset.classes: f.write('class %s:\n' % class_id) for sample_id in item.keys(): if item[sample_id]['class'] == class_id: f.write('\t%s\n' % sample_id) if dataset.feature_selection is not None or dataset.feature_transformation is not None: with open( os.path.join(exp_path, 'feature_selection_and_transformation.txt'), 'w') as f: if dataset.feature_selection is not None: f.write('---feature selection method: %s---\n' % dataset.feature_selection['method']) if 'kwargs' in dataset.feature_selection: f.write('---feature selection kwargs: %s---\n' % str(dataset.feature_selection['kwargs'])) if dataset.feature_transformation is not None: f.write('---feature transformation method: %s---\n' % dataset.feature_transformation['method']) if 'kwargs' in dataset.feature_transformation: f.write('---feature transformation kwargs: %s---\n' % str(dataset.feature_transformation['kwargs'])) for marker in dataset.markers: f.write('marker %s:\n' % marker) if dataset.fs_metric_params is not None: f.write( '---feature selection and transformation kwargs: %s---\n' % str(dataset.fs_metric_params[marker])) if dataset.feature_selection is not None: features = dataset.features feature_index = 0 f.write('---selected features---\n') if dataset.feature_selection['method'] == 'custom': support_flags = dataset.feature_selection['selection'][ marker] else: support_flags = dataset.feature_selector[ marker].get_support() for flag in support_flags: f.write('%s:\t%s\n' % (features[feature_index], flag)) feature_index = (feature_index + 1) % len(features) if dataset.feature_transformation is not None: components = dataset.feature_transformer[ marker].components_ f.write('---feature transformation components---:\n%s' % components.tolist()) # if 'feature_mean' in config: # feature_mean = config['feature_mean'] # coefficients = np.abs(feature_mean*components.sum(axis=0)).\ # reshape([len(dataset.features), -1]).sum(axis=0) # else: # coefficients = np.abs(components.sum(axis=0)).reshape([len(dataset.features), -1]).sum(axis=0) # coefficients = coefficients / coefficients.sum() # # f.write('---feature transformation coefficients---:\n%s' % coefficients.tolist()) threshold = config.get('threshold', 'roc_optimal') metrics_names = ['sensitivity', 'specificity', 'roc_auc_score'] metrics_avg_names = ['roc_auc_score_avg', 'roc_auc_score_avg_std'] fig, ax = plt.subplots(9, len(dataset.markers), squeeze=False, figsize=(6 * len(dataset.markers), 40)) metrics_file = open(os.path.join(exp_path, 'metrics.txt'), 'w') metrics_fig_filename = os.path.join(exp_path, 'conf_mat.png') best_params = dict() all_marker_train_metrics = [] all_marker_test_metrics = [] for i, marker in enumerate(dataset.markers): model = get_model(config) if 'model_kwargs_search' in config: # parameter search print('parameter search for marker %s...' % marker) all_x, all_y, cv_index = dataset.get_all_data(marker) best_model = GridSearchCV(model, param_grid=config['model_kwargs_search'], cv=cv_index, scoring='roc_auc_ovr') best_model.fit(all_x, all_y) best_params[marker] = best_model.best_params_ print('search done') else: best_model = model best_params[marker] = config['model_kwargs'] # run train and test train_xs = [] train_ys = [] train_ys_score = [] test_xs = [] test_ys = [] test_ys_score = [] for fold_i, (train_x, train_y, test_x, test_y) in enumerate(dataset.get_split_data(marker)): model = base.clone(model) model.set_params(**best_params[marker]) model.fit(train_x, train_y) # model.classes_ = dataset.classes train_xs += train_x train_ys += train_y test_xs += test_x test_ys += test_y train_y_score = model.predict_proba(train_x).tolist() train_ys_score += train_y_score test_y_score = model.predict_proba(test_x).tolist() test_ys_score += test_y_score # model_filename = os.path.join(exp_path, 'model', '%s_%s_fold_%d.pkl' # % (config['model'], marker, fold_i)) # maybe_create_path(os.path.dirname(model_filename)) # with open(model_filename, 'wb') as f: # pickle.dump(model, f) train_metrics = eval_results(train_ys, train_ys_score, labels=dataset.classes, average='macro', threshold=threshold, num_fold=dataset.num_fold) test_metrics = eval_results(test_ys, test_ys_score, labels=dataset.classes, average='macro', threshold=train_metrics['used_threshold'], num_fold=dataset.num_fold) all_marker_train_metrics.append(train_metrics) all_marker_test_metrics.append(test_metrics) # print metrics to console and file double_print('marker: %s' % marker, metrics_file) double_print('metrics on training set:', metrics_file) for j, class_j in enumerate(dataset.classes): log_str = '[class: %s. threshold: %1.1f] ' % ( class_j, 100 * train_metrics['used_threshold'][j]) for metrics_name in metrics_names: log_str += '%s: %1.1f. ' % (metrics_name, train_metrics[metrics_name][j]) double_print(log_str, metrics_file) for metrics_name in metrics_avg_names: double_print( '%s: %1.1f' % (metrics_name, train_metrics[metrics_name]), metrics_file) double_print('metrics on test set:', metrics_file) for j, class_j in enumerate(dataset.classes): log_str = '[class: %s. threshold: %1.1f] ' % ( class_j, 100 * test_metrics['used_threshold'][j]) for metrics_name in metrics_names: log_str += '%s: %1.1f. ' % (metrics_name, test_metrics[metrics_name][j]) double_print(log_str, metrics_file) for metrics_name in metrics_avg_names: double_print( '%s: %1.1f' % (metrics_name, test_metrics[metrics_name]), metrics_file) # generate figure current_ax = ax[0, i] dataset.plot_data_clean_distribution(current_ax, marker) current_ax.set_title('data cleaning on marker %s' % marker) current_ax = ax[1, i] contour_flag = len(train_xs[0]) == 2 # dup_reduced = list(tuple(tuple([train_xs[j] + [train_ys[j]] for j in range(len(train_xs))]))) # dup_reduced_train_xs = [item[:-1] for item in dup_reduced] # dup_reduced_train_ys = [item[-1] for item in dup_reduced] # dup_reduced_train_ys_str = [str(item) for item in dup_reduced_train_ys] dup_reduced_train_xs = train_x + test_x dup_reduced_train_ys = train_y + test_y dup_reduced_train_ys_str = [str(item) for item in dup_reduced_train_ys] classes_str = [str(item) for item in dataset.classes] plot_feature_distribution( dup_reduced_train_xs, ax=current_ax, t_sne=True, hue=dup_reduced_train_ys_str, hue_order=classes_str, style=dup_reduced_train_ys_str, style_order=classes_str, # x_lim='box', y_lim='box', x_lim='min_max_extend', y_lim='min_max_extend', contour=contour_flag, z_generator=best_model.predict) current_ax.set_title('%s trained on whole set' % marker) current_ax = ax[2, i] metrics.ConfusionMatrixDisplay( train_metrics['conf_mat'], display_labels=dataset.classes).plot(ax=current_ax) current_ax.set_title('%s on train set of all folds' % marker) current_ax = ax[3, i] for j in range(len(dataset.classes)): roc_curve = train_metrics['roc_curve'][j] roc_auc_score = train_metrics['roc_auc_score'][j] class_id = dataset.classes[j] sen = train_metrics['sensitivity'][j] / 100 spe = train_metrics['specificity'][j] / 100 metrics.RocCurveDisplay(fpr=roc_curve[0], tpr=roc_curve[1], roc_auc=roc_auc_score, estimator_name='class %s' % class_id).plot(ax=current_ax) current_ax.scatter(1 - spe, sen) current_ax = ax[4, i] table_val_list = [ dataset.classes, [100 * item for item in train_metrics['used_threshold']] ] row_labels = ['cls', 'thr'] for metrics_name in metrics_names: table_val_list.append(train_metrics[metrics_name]) row_labels.append(metrics_name[:min(3, len(metrics_name))]) additional_text = [] for metrics_name in metrics_avg_names: additional_text.append('%s: %1.1f' % (metrics_name, train_metrics[metrics_name])) additional_text.append(best_params[marker]) plot_table(table_val_list, row_labels, ax=current_ax, additional_text=additional_text) current_ax = ax[5, i] contour_flag = len(train_xs[0]) == 2 test_y_str = [str(item) for item in test_y] classes_str = [str(item) for item in dataset.classes] plot_feature_distribution( test_x, ax=current_ax, t_sne=True, hue=test_y_str, hue_order=classes_str, style=test_y_str, style_order=classes_str, # x_lim='box', y_lim='box', x_lim='min_max_extend', y_lim='min_max_extend', contour=contour_flag, z_generator=model.predict) current_ax.set_title('%s on test set of the last fold' % marker) current_ax = ax[6, i] metrics.ConfusionMatrixDisplay( test_metrics['conf_mat'], display_labels=dataset.classes).plot(ax=current_ax) current_ax.set_title('%s on test set of all folds' % marker) current_ax = ax[7, i] for j in range(len(dataset.classes)): roc_curve = test_metrics['roc_curve'][j] roc_auc_score = test_metrics['roc_auc_score'][j] class_id = dataset.classes[j] sen = test_metrics['sensitivity'][j] / 100 spe = test_metrics['specificity'][j] / 100 metrics.RocCurveDisplay(fpr=roc_curve[0], tpr=roc_curve[1], roc_auc=roc_auc_score, estimator_name='class %s' % class_id).plot(ax=current_ax) current_ax.scatter(1 - spe, sen) current_ax = ax[8, i] table_val_list = [ dataset.classes, [100 * item for item in test_metrics['used_threshold']] ] row_labels = ['cls', 'thr'] for metrics_name in metrics_names: table_val_list.append(test_metrics[metrics_name]) row_labels.append(metrics_name[:min(3, len(metrics_name))]) additional_text = [] for metrics_name in metrics_avg_names: additional_text.append('%s: %1.1f' % (metrics_name, test_metrics[metrics_name])) plot_table(table_val_list, row_labels, ax=current_ax, additional_text=additional_text) for metrics_name in metrics_avg_names: all_marker_values = [ item[metrics_name] for item in all_marker_train_metrics ] double_print( 'overall train %s: %1.1f' % (metrics_name, sum(all_marker_values) / len(all_marker_values)), metrics_file) for metrics_name in metrics_avg_names: all_marker_values = [ item[metrics_name] for item in all_marker_test_metrics ] double_print( 'overall test %s: %1.1f' % (metrics_name, sum(all_marker_values) / len(all_marker_values)), metrics_file) metrics_file.close() save_yaml(os.path.join(exp_path, 'best_params.yaml'), best_params) fig.savefig(metrics_fig_filename, bbox_inches='tight', pad_inches=1)
metrics_file) for metrics_name in metrics_avg_names: all_marker_values = [ item[metrics_name] for item in all_marker_test_metrics ] double_print( 'overall test %s: %1.1f' % (metrics_name, sum(all_marker_values) / len(all_marker_values)), metrics_file) metrics_file.close() save_yaml(os.path.join(exp_path, 'best_params.yaml'), best_params) fig.savefig(metrics_fig_filename, bbox_inches='tight', pad_inches=1) if __name__ == '__main__': exp_path = os.path.join('exp', args.config, args.sub_setting) if not args.overwrite_config and os.path.exists( os.path.join(exp_path, 'config.yaml')): config = load_ymal(os.path.join(exp_path, 'config.yaml')) else: config = load_ymal(os.path.join('config', args.config + '.yaml')) save_yaml(os.path.join(exp_path, 'config.yaml'), config) if not args.retrain: if os.path.exists(os.path.join(exp_path, 'model')) \ or len(glob.glob(os.path.join(exp_path, 'model', '*.pkl'))) != 0: raise FileExistsError('there are already models saved in %s.' % exp_path) maybe_create_path(exp_path) train_eval(config, exp_path)
def train(config: dict = None): # fix the seed for reproduce results SEED = config["SEED"] torch.manual_seed(SEED) torch.cuda.manual_seed(SEED) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False random.seed(SEED) # parse config parameters DATA_ROOT = config["DATA_ROOT"] COCO_PATH = config["COCO_PATH"] DATA_ROOT_VAL = config["DATA_ROOT_VAL"] COCO_PATH_VAL = config["COCO_PATH_VAL"] EXPERIMENT_NAME = config["EXPERIMENT_NAME"] OPTIMIZER_NAME = config["OPTIMIZER_NAME"] OPTIMIZER_WEIGHT_DECAY = config["OPTIMIZER_WEIGHT_DECAY"] OPTIMIZER_MOMENTUM = config["OPTIMIZER_MOMENTUM"] OPTIMIZER_BETAS = config["OPTIMIZER_BETAS"] OPTIMIZER_EPS = config["OPTIMIZER_EPS"] OPTIMIZER_AMSGRAD = config["OPTIMIZER_AMSGRAD"] OPTIMIZER_ADABOUND_GAMMA = config["OPTIMIZER_ADABOUND_GAMMA"] OPTIMIZER_ADABOUND_FINAL_LR = config["OPTIMIZER_ADABOUND_FINAL_LR"] LEARNING_RATE = config["LEARNING_RATE"] LEARNING_RATE_STEP_SIZE = config["LEARNING_RATE_STEP_SIZE"] LEARNING_RATE_GAMMA = config["LEARNING_RATE_GAMMA"] TRAINABLE_BACKBONE_LAYERS = config["TRAINABLE_BACKBONE_LAYERS"] RPN_ANCHOR_SIZES = config["RPN_ANCHOR_SIZES"] RPN_ANCHOR_ASPECT_RATIOS = config["RPN_ANCHOR_ASPECT_RATIOS"] RPN_PRE_NMS_TOP_N_TRAIN = config["RPN_PRE_NMS_TOP_N_TRAIN"] RPN_PRE_NMS_TOP_N_TEST = config["RPN_PRE_NMS_TOP_N_TEST"] RPN_POST_NMS_TOP_N_TRAIN = config["RPN_POST_NMS_TOP_N_TRAIN"] RPN_POST_NMS_TOP_N_TEST = config["RPN_POST_NMS_TOP_N_TEST"] RPN_NMS_THRESH = config["RPN_NMS_THRESH"] RPN_FG_IOU_THRESH = config["RPN_FG_IOU_THRESH"] RPN_BG_IOU_THRESH = config["RPN_BG_IOU_THRESH"] BOX_DETECTIONS_PER_IMAGE = config["BOX_DETECTIONS_PER_IMAGE"] LOG_FREQ = config["LOG_FREQ"] COCO_AP_TYPE = config["COCO_AP_TYPE"] TRAIN_SPLIT_RATE = config["TRAIN_SPLIT_RATE"] BATCH_SIZE = config["BATCH_SIZE"] NUM_EPOCH = config["NUM_EPOCH"] DEVICE = config["DEVICE"] NUM_WORKERS = config["NUM_WORKERS"] # init directories directories = Directories(experiment_name=EXPERIMENT_NAME) # copy config file to experiment dir yaml_path = os.path.join(directories.experiment_dir, "config.yml") save_yaml(config, yaml_path) # init tensorboard summary writer writer = SummaryWriter(directories.tensorboard_dir) # set pytorch device device = torch.device(DEVICE) if "cuda" in DEVICE and not torch.cuda.is_available(): print("CUDA not available, switching to CPU") device = torch.device("cpu") # use our dataset and defined transformations dataset = COCODataset( DATA_ROOT, COCO_PATH, get_transforms(config=config, mode="train") ) if COCO_PATH_VAL: dataset_val = COCODataset( DATA_ROOT_VAL, COCO_PATH_VAL, get_transforms(config=config, mode="val") ) else: dataset_val = COCODataset( DATA_ROOT, COCO_PATH, get_transforms(config=config, mode="val") ) # +1 for background class num_classes = dataset.num_classes + 1 config["NUM_CLASSES"] = num_classes # add category mappings to config, will be used at prediction category_mapping = get_category_mapping_from_coco_file(COCO_PATH) config["CATEGORY_MAPPING"] = category_mapping # split the dataset in train and val set if val path is not defined if not COCO_PATH_VAL: indices = torch.randperm(len(dataset)).tolist() num_train = int(len(indices) * TRAIN_SPLIT_RATE) train_indices = indices[:num_train] val_indices = indices[num_train:] dataset = torch.utils.data.Subset(dataset, train_indices) dataset_val = torch.utils.data.Subset(dataset_val, val_indices) # define training and val data loaders data_loader_train = torch.utils.data.DataLoader( dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, collate_fn=core.utils.collate_fn, ) data_loader_val = torch.utils.data.DataLoader( dataset_val, batch_size=1, shuffle=False, num_workers=NUM_WORKERS, collate_fn=core.utils.collate_fn, ) # get the model using our helper function model = get_torchvision_maskrcnn( num_classes=num_classes, trainable_backbone_layers=TRAINABLE_BACKBONE_LAYERS, anchor_sizes=RPN_ANCHOR_SIZES, anchor_aspect_ratios=RPN_ANCHOR_ASPECT_RATIOS, rpn_pre_nms_top_n_train=RPN_PRE_NMS_TOP_N_TRAIN, rpn_pre_nms_top_n_test=RPN_PRE_NMS_TOP_N_TEST, rpn_post_nms_top_n_train=RPN_POST_NMS_TOP_N_TRAIN, rpn_post_nms_top_n_test=RPN_POST_NMS_TOP_N_TEST, rpn_nms_thresh=RPN_NMS_THRESH, rpn_fg_iou_thresh=RPN_FG_IOU_THRESH, rpn_bg_iou_thresh=RPN_BG_IOU_THRESH, box_detections_per_img=BOX_DETECTIONS_PER_IMAGE, pretrained=True, ) # move model to the right device model.to(device) # construct an optimizer params = [p for p in model.parameters() if p.requires_grad] optimizer_factory = OptimizerFactory( learning_rate=LEARNING_RATE, momentum=OPTIMIZER_MOMENTUM, weight_decay=OPTIMIZER_WEIGHT_DECAY, betas=OPTIMIZER_BETAS, eps=OPTIMIZER_EPS, amsgrad=OPTIMIZER_AMSGRAD, adabound_gamma=OPTIMIZER_ADABOUND_GAMMA, adabound_final_lr=OPTIMIZER_ADABOUND_FINAL_LR, ) optimizer = optimizer_factory.get(params, OPTIMIZER_NAME) # and a learning rate scheduler lr_scheduler = torch.optim.lr_scheduler.StepLR( optimizer, step_size=LEARNING_RATE_STEP_SIZE, gamma=LEARNING_RATE_GAMMA ) # create coco index print("Creating COCO index...") coco_api_train = get_coco_api_from_dataset(data_loader_train.dataset) coco_api_val = get_coco_api_from_dataset(data_loader_val.dataset) # train it for NUM_EPOCH epochs for epoch in range(NUM_EPOCH): best_bbox_05095_ap = -1 # train for one epoch, printing every PRINT_FREQ iterations train_one_epoch( model=model, optimizer=optimizer, data_loader=data_loader_train, coco_api=coco_api_train, device=device, epoch=epoch, log_freq=LOG_FREQ, coco_ap_type=COCO_AP_TYPE, writer=writer, ) # update the learning rate lr_scheduler.step() # get iteration number num_images = len(data_loader_train.dataset) iter_num = epoch * num_images # evaluate on the val dataset loss_lists, coco_evaluator = evaluate( model=model, data_loader=data_loader_val, coco_api=coco_api_val, device=device, iter_num=iter_num, coco_ap_type=COCO_AP_TYPE, writer=writer, ) # update best model if it has the best bbox 0.50:0.95 AP bbox_05095_ap = coco_evaluator.coco_eval["bbox"].stats[0] if bbox_05095_ap > best_bbox_05095_ap: model_dict = {"state_dict": model.state_dict(), "config": config} torch.save(model_dict, directories.best_weight_path) best_bbox_05095_ap = bbox_05095_ap # save final model model_dict = {"state_dict": model.state_dict(), "config": config} torch.save(model_dict, directories.last_weight_path)
def main(): parser = create_parser() args = utils.parse_args(parser) if args.logging_config is not None: logging.config.dictConfig(utils.load_yaml(args.logging_config)) logger = logging.getLogger("multirun") save_dir = pathlib.Path(args.save_dir) if (not args.overwrite and save_dir.exists() and utils.has_element(save_dir.glob("*"))): raise FileExistsError(f"save directory ({save_dir}) is not empty") save_dir.mkdir(exist_ok=True, parents=True) utils.save_yaml(vars(args), save_dir.joinpath("args.yml")) logger.info("preparing dataset...") data_dir = pathlib.Path(args.data_dir) data = { split: utils.load_json(data_dir.joinpath(f"{split}.json")) for split in ("train", "dev", "test") } data = { split: [ datasets.DSTDialog.from_dialog(datasets.Dialog.from_json(d)) for d in dialogs ] for split, dialogs in data.items() } logger.info("verifying dataset...") for split, dialogs in data.items(): for dialog in dialogs: dialog.validate() processor = dst_datasets.DSTDialogProcessor( sent_processor=datasets.SentProcessor( bos=True, eos=True, lowercase=True, max_len=30)) processor.prepare_vocabs( list(itertools.chain(*(data["train"], data["dev"], data["test"])))) logger.info("saving processor object...") utils.save_pickle(processor, save_dir.joinpath("processor.pkl")) train_dataset = dst_datasets.DSTDialogDataset(dialogs=data["train"], processor=processor) train_dataloader = dst_datasets.create_dataloader( train_dataset, batch_size=args.batch_size, shuffle=False, pin_memory=True) dev_dataloader = dst_run.TestDataloader(dialogs=data["dev"], processor=processor, max_batch_size=args.batch_size) test_dataloader = dst_run.TestDataloader(dialogs=data["test"], processor=processor, max_batch_size=args.batch_size) logger.info("preparing model...") torchmodels.register_packages(models) torchmodels.register_packages(dst_models) model_cls = torchmodels.create_model_cls(dst, args.model_path) model: dst.AbstractDialogStateTracker = model_cls(processor.vocabs) if args.gpu is None: device = torch.device("cpu") else: device = torch.device(f"cuda:{args.gpu}") model = model.to(device) logger.info(str(model)) logger.info(f"number of parameters: {utils.count_parameters(model):,d}") logger.info(f"running {args.runs} trials...") all_results = [] for idx in range(args.runs): logger.info(f"running trial-{idx + 1}...") run_save_dir = save_dir.joinpath(f"run-{idx + 1:03d}") logger.info("resetting parameters...") model.reset_parameters() logger.info("preparing trainer...") runner = dst_run.Runner( model=model, processor=processor, device=device, save_dir=run_save_dir, epochs=args.epochs, scheduler=(None if not args.scheduled_lr else functools.partial( getattr(op.lr_scheduler, args.scheduler_cls), **json.loads(args.scheduler_kwargs))), loss=args.loss, l2norm=args.l2norm, gradient_clip=args.gradient_clip, train_validate=args.train_validate, early_stop=args.early_stop, early_stop_criterion=args.early_stop_criterion, early_stop_patience=args.early_stop_patience, asr_method=args.asr_method, asr_sigmoid_sum_order=args.asr_sigmoid_sum_order, asr_topk=args.asr_topk) logger.info("commencing training...") record = runner.train( train_dataloader=train_dataloader, dev_dataloader=dev_dataloader, test_fn=runner.test_asr if args.validate_asr else None) logger.info("final summary: ") logger.info(pprint.pformat(record.to_json())) utils.save_json(record.to_json(), run_save_dir.joinpath("summary-final.json")) logger.info("commencing testing...") with torch.no_grad(): eval_results = runner.test(test_dataloader) logger.info("test results: ") logger.info(pprint.pformat(eval_results)) if args.test_asr: logger.info("commencing testing (asr)...") with torch.no_grad(): eval_results = runner.test_asr(test_dataloader) logger.info("test(asr) results: ") logger.info(pprint.pformat(eval_results)) eval_results["epoch"] = int(record.epoch) eval_results["criterion"] = record.value logger.info("test evaluation: ") logger.info(pprint.pformat(eval_results)) if args.save_ckpt: logger.info("saving checkpoint...") torch.save({k: v.cpu() for k, v in model.state_dict().items()}, run_save_dir.joinpath("ckpt.pth")) logger.info("done!") utils.save_json(eval_results, run_save_dir.joinpath("eval.json")) all_results.append(eval_results) logger.info("aggregating results...") summary = reduce_json(all_results) pprint.pprint({k: v["stats"]["mean"] for k, v in summary.items()}) utils.save_json(summary, save_dir.joinpath("summary.json")) logger.info("done!")
print('the parameter of your training ----->') print(opt) ######################################################################################################################## if not os.path.exists(opt.output_dir): os.mkdir(opt.output_dir) current_time = opt.datasets_folder + '_' + datetime.datetime.now().strftime( "%Y%m%d-%H%M") output_path = opt.output_dir + '/' + current_time pth_path = 'pth//' + current_time if not os.path.exists(output_path): os.mkdir(output_path) if not os.path.exists(pth_path): os.mkdir(pth_path) yaml_name = pth_path + '//para.yaml' save_yaml(opt, yaml_name) os.environ["CUDA_VISIBLE_DEVICES"] = str(opt.GPU) batch_size = opt.batch_size lr = opt.lr name_list, noise_img, coordinate_list = train_preprocess_lessMemoryMulStacks( opt) # print('name_list -----> ',name_list) ######################################################################################################################## L1_pixelwise = torch.nn.L1Loss() L2_pixelwise = torch.nn.MSELoss() ######################################################################################################################## denoise_generator = Network_3D_Unet(in_channels=1, out_channels=1, final_sigmoid=True)