def testing(test_path, test_label): #保存文件地址 logs_test_dir = './generated_file/test' logs_train_dir = './generated_file/train' # 初始化测试集在get batch时的index和epoch_completed test_index = 0 test_epoch_completed = 0 print('start testing...') # 每批取batch_size个测试样本 X = tf.placeholder(tf.float32, shape=[batch_size, 224, 224, 3], name="X") Y = tf.placeholder(tf.int32, shape=[batch_size], name="Y") test_logits = models.VGG16(X) test_loss = train.losses(test_logits, Y) test_acc = train.evaluation(test_logits, Y) # 释放事件文件,合并即时数据 summary_op = tf.summary.merge_all() saver = tf.train.Saver() with tf.Session() as sess: accuracy = [] ckpt = tf.train.get_checkpoint_state(logs_train_dir) if ckpt and ckpt.model_checkpoint_path: #加载模型, 对最新的模型进行测试验证 global_step = ckpt.model_checkpoint_path.split('/')[-1].split( '-')[-1] saver.restore(sess, ckpt.model_checkpoint_path) print('Loading success, global_step is %s' % global_step) else: print('No checkpoint file found') # 写入图表本身和即时数据具体值 test_writer = tf.summary.FileWriter(logs_test_dir, sess.graph) for step in range(test_max_epoch): test_batch_X, test_batch_Y, test_index, test_epoch_completed = data.get_batch_data( \ test_path, test_label, test_index, test_epoch_completed) # test_batch_X = np.array(test_batch_X, dtype=np.float32) feed_dict_test = {X: test_batch_X, Y: test_batch_Y} summary_str, te_loss, te_acc = sess.run(fetches=[summary_op, test_loss, test_acc], \ feed_dict=feed_dict_test) test_writer.add_summary(summary_str, step) accuracy.append(te_acc) if step % 50 == 0 or (step + 1) == test_max_epoch: print('step %d, test loss = %.2f, test accuracy = %.2f%%' % (step, te_loss, te_acc * 100.0)) # summary_str = sess.run(summary_op) # if (step + 1) == test_max_epoch: # # 保存chenckpoints文件(保存测试结果/模型参数的文件) # checkpoint_path = os.path.join(logs_test_dir, 'test.ckpt') # saver.save(sess, checkpoint_path, global_step=step+1) test_total_acc = np.array(accuracy) test_total_acc = np.mean(test_total_acc) * 100 return test_total_acc
def __init__(self, emb_size, verbose=False): super(Model, self).__init__() # self.pretrained_model = models.Squeezenet(emb_size=emb_size) self.pretrained_model = models.VGG16() self.linear = nn.Sequential(nn.Linear(4096, emb_size), ) self.emb_size = emb_size
def model_dispatcher(base_model): if base_model == 'se_resnext101_32x4d': return models.SE_ResNext101_32x4d(pretrained=True, n_class=3) elif base_model == 'vgg16': return models.VGG16(pretrained=True, n_class=3) elif base_model == 'resnet34': return models.ResNet34(pretrained=True, n_class=3) elif base_model == 'se_resnext101_32x4d_sSE': return models.se_resnext101_32x4d_sSE(pretrained=True, n_class=3)
def test(): with tf.Graph().as_default(): n_test = 10000 images, labels = input_data.read_cifar10(data_dir=data_dir, is_train=False, batch_size=BATCH_SIZE, shuffle=False) logits = models.VGG16(images, N_CLASSES, IS_PRETRAIN) correct = utils.num_correct_prediction(logits, labels) saver = tf.train.Saver(tf.global_variables()) with tf.Session() as sess: print("Reading checkpoints...") ckpt = tf.train.get_checkpoint_state(train_log_dir) if ckpt and ckpt.model_checkpoint_path: global_step = ckpt.model_checkpoint_path.split('/')[-1].split( '-')[-1] saver.restore(sess, ckpt.model_checkpoint_path) print('Loading success, global_step is %s' % global_step) else: print('No checkpoint file found!') return coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) try: print('Testing......') num_step = int(math.floor(n_test / BATCH_SIZE)) num_sample = num_step * BATCH_SIZE step = 0 total_correct = 0 while step < num_step and not coord.should_stop(): batch_correct = sess.run(correct) total_correct += np.sum(batch_correct) step += 1 print('Total testing samples: %d' % num_sample) print('Total correct predictions: %d' % total_correct) print('Average accuracy: %.2f%%' % (100 * total_correct / num_sample)) except Exception as e: coord.request_stop(e) finally: coord.request_stop() coord.join(threads)
def model_dispatcher(if_pretrain, base_model, nclass): if base_model == 'se_resnext101_32x4d': return models.SE_ResNext101_32x4d(pretrained=if_pretrain, n_class=nclass) elif base_model == 'vgg16': return models.VGG16(pretrained=if_pretrain, n_class=nclass) elif base_model == 'resnet34': return models.ResNet34(pretrained=if_pretrain, n_class=nclass) elif base_model == 'se_resnext101_32x4d_sSE': return models.se_resnext101_32x4d_sSE(pretrained=if_pretrain, n_class=nclass) elif base_model == 'EfficientNet_B6': return models.EfficientNet_B6(pretrained=if_pretrain, n_class=nclass)
def __init__(self, verbose=False): super(Model, self).__init__() self.vgg = models.VGG16() self.encoder = nn.Sequential(nn.Linear(3072 + 4096, 4096), nn.ReLU(True)) self.path1 = nn.Sequential( nn.MaxPool2d(4, 4), nn.Conv2d(3, 96, 8, 4), nn.MaxPool2d(6, 2), Flatten(), ) self.path2 = nn.Sequential( nn.MaxPool2d(8, 8), nn.Conv2d(3, 96, 8, 4), nn.MaxPool2d(3, 1), Flatten(), ) self.dropout = nn.Dropout(p=0.2)
def get_model(): return models.VGG16(n_classes)
channels=n_channels) elif model == 'unet3': model = models.Unet3(height, width, loss=loss, optimizer=optimizer, metrics=metrics, fc_size=fc_size, channels=n_channels) elif model == 'vgg': model = models.VGG16(height, width, pretrained=True, freeze_pretrained=False, loss=loss, optimizer=optimizer, metrics=metrics) else: print("Incorrect model name") def myGenerator(train_generator, train_mask_generator, remove_mean_imagenet=True, rescale_mask=True, use_hsv=False): while True: train_gen = next(train_generator) train_mask = next(train_mask_generator)
def main(): # set the path to pre-trained model and output pre_trained_net = './pre_trained/' + args.net_type + '_' + args.dataset + '.pth' args.outf = args.outf + args.net_type + '_' + args.dataset + '/' if os.path.isdir(args.outf) == False: os.mkdir(args.outf) torch.cuda.manual_seed(0) torch.cuda.set_device(args.gpu) # check the in-distribution dataset if args.dataset == 'cifar100': args.num_classes = 100 if args.dataset == 'svhn': out_dist_list = ['cifar10', 'imagenet_resize', 'lsun_resize'] else: out_dist_list = ['svhn', 'imagenet_resize', 'lsun_resize'] # load networks if args.net_type == 'densenet': if args.dataset == 'svhn': model = models.DenseNet3(100, int(args.num_classes)) model.load_state_dict( torch.load(pre_trained_net, map_location="cuda:" + str(args.gpu))) else: model = torch.load(pre_trained_net, map_location="cuda:" + str(args.gpu)) in_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((125.3 / 255, 123.0 / 255, 113.9 / 255), (63.0 / 255, 62.1 / 255.0, 66.7 / 255.0)), ]) elif args.net_type == 'resnet': model = models.ResNet34(num_c=args.num_classes) model.load_state_dict( torch.load(pre_trained_net, map_location="cuda:" + str(args.gpu))) in_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) elif args.net_type == 'vgg16': model = models.VGG16(int(args.num_classes)) model.load_state_dict( torch.load(pre_trained_net, map_location="cuda:" + str(args.gpu))) in_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), ]) model.cuda() print('load model: ' + args.net_type) # load dataset print('load target data: ', args.dataset) train_loader, test_loader = data_loader.getTargetDataSet( args.dataset, args.batch_size, in_transform, args.dataroot) # measure the performance M_list = [ 0, 0.0005, 0.001, 0.0014, 0.002, 0.0024, 0.005, 0.01, 0.05, 0.1, 0.2 ] T_list = [1, 10, 100, 1000] base_line_list = [] ODIN_best_tnr = [0, 0, 0] ODIN_best_results = [0, 0, 0] ODIN_best_temperature = [-1, -1, -1] ODIN_best_magnitude = [-1, -1, -1] for T in T_list: for m in M_list: magnitude = m temperature = T lib_generation.get_posterior(model, args.net_type, test_loader, magnitude, temperature, args.outf, True) out_count = 0 print('Temperature: ' + str(temperature) + ' / noise: ' + str(magnitude)) for out_dist in out_dist_list: out_test_loader = data_loader.getNonTargetDataSet( out_dist, args.batch_size, in_transform, args.dataroot) print('Out-distribution: ' + out_dist) lib_generation.get_posterior(model, args.net_type, out_test_loader, magnitude, temperature, args.outf, False) if temperature == 1 and magnitude == 0: test_results = callog.metric(args.outf, ['PoT']) base_line_list.append(test_results) else: val_results = callog.metric(args.outf, ['PoV']) if ODIN_best_tnr[out_count] < val_results['PoV']['TNR']: ODIN_best_tnr[out_count] = val_results['PoV']['TNR'] ODIN_best_results[out_count] = callog.metric( args.outf, ['PoT']) ODIN_best_temperature[out_count] = temperature ODIN_best_magnitude[out_count] = magnitude out_count += 1 # print the results mtypes = ['TNR', 'AUROC', 'DTACC', 'AUIN', 'AUOUT'] print('Baseline method: in_distribution: ' + args.dataset + '==========') count_out = 0 for results in base_line_list: print('out_distribution: ' + out_dist_list[count_out]) for mtype in mtypes: print(' {mtype:6s}'.format(mtype=mtype), end='') print('\n{val:6.2f}'.format(val=100. * results['PoT']['TNR']), end='') print(' {val:6.2f}'.format(val=100. * results['PoT']['AUROC']), end='') print(' {val:6.2f}'.format(val=100. * results['PoT']['DTACC']), end='') print(' {val:6.2f}'.format(val=100. * results['PoT']['AUIN']), end='') print(' {val:6.2f}\n'.format(val=100. * results['PoT']['AUOUT']), end='') print('') count_out += 1 print('ODIN method: in_distribution: ' + args.dataset + '==========') count_out = 0 for results in ODIN_best_results: print('out_distribution: ' + out_dist_list[count_out]) for mtype in mtypes: print(' {mtype:6s}'.format(mtype=mtype), end='') print('\n{val:6.2f}'.format(val=100. * results['PoT']['TNR']), end='') print(' {val:6.2f}'.format(val=100. * results['PoT']['AUROC']), end='') print(' {val:6.2f}'.format(val=100. * results['PoT']['DTACC']), end='') print(' {val:6.2f}'.format(val=100. * results['PoT']['AUIN']), end='') print(' {val:6.2f}\n'.format(val=100. * results['PoT']['AUOUT']), end='') print('temperature: ' + str(ODIN_best_temperature[count_out])) print('magnitude: ' + str(ODIN_best_magnitude[count_out])) print('') count_out += 1
testinglistIDs = [int(re.findall(r'[0-9]+', file)[0]) for file in os.listdir(testDir) if '.h5' in file] print('whole testing list IDs: ' + str(testinglistIDs)) #epochs = [i for i in range(1, 51)] epochs = [33] #CHANGE EPOCHS FOR 200 EPOCH RUNS a = len(testinglistIDs) print('total of {} testing images'.format(a)) batch_size = 2 sideLength = 48 test_generator = testDataGenerator(testinglistIDs, testDir, batch_size=batch_size, v_size=sideLength) model = models.VGG16(sideLength) for epoch in epochs: model.load_weights("/data/lung_seg/FPR/VGG16/aug2/2021-01-04_03:41:48/checkpoints/vgg_aug_{}.hd5f".format(str(epoch).zfill(2))) model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) prediction = model.predict(test_generator, verbose=1) savePath = '/media/data_crypt_2/VGG/VGG_AUG_eval-final' if not os.path.isdir(savePath): os.mkdir(savePath) f = csv.writer(open(os.path.join(savePath, 'predictions_epoch{}.csv'.format(epoch)), 'w+')) f.writerow(prediction) y_truth = [] for i in testinglistIDs: path = os.path.join(testDir, '{}.txt'.format(i)) with open(path, 'r') as f:
import models MODEL_DISPATCHER = { # # 'resnet34': models.ResNet34(pretrained=True, n_class=3), # 'resnet34_eval': models.ResNet34(pretrained=False, n_class=3), # 'vgg16': models.VGG16(pretrained=True, n_class=3), 'vgg16_eval': models.VGG16(pretrained=False, n_class=3), # # 'vgg16_binary_eval': models.VGG16_binary(pretrained=False, n_class=2), # 'se_resnext101_32x4d':models.SE_ResNext101_32x4d(pretrained=True, n_class=3), # 'se_resnext101_32x4d_eval':models.SE_ResNext101_32x4d(pretrained=False, n_class=3), # # 'se_resnext101_32x4d_sSE':models.SE_ResNext101_32x4d_sSE(pretrained=True, n_class=3), # 'se_resnext101_32x4d_sSE_eval':models.SE_ResNext101_32x4d_sSE(pretrained=False, n_class=3) }
def main(): parser = argparse.ArgumentParser() # default value args parser.add_argument('-n', '--name', type=str, default=None, help='name of checkpoint folder') parser.add_argument('-g', '--gpu', type=int, default=0, help='gpu number; -1 for cpu') parser.add_argument('-c', '--config', type=int, default=1, choices=configurations.keys()) parser.add_argument( "-dir", "--data_dir", type=str, default='/opt/visualai/rkdoshi/ZeroshotSemanticSegmentation', help='path for storing dataset, logs, and models') parser.add_argument( "-tb", "--tb_dir", type=str, default='/opt/visualai/rkdoshi/ZeroshotSemanticSegmentation/tb', help='path to tensorboard directory') # override cfg args parser.add_argument('-m', '--mode', type=str, choices=['train', 'test_fcn', 'test_all'], help='choose among five training/testing mode choices') parser.add_argument("-d", "--dataset", type=str, choices=['pascal', 'context'], help='dataset name') parser.add_argument( '-tu', '--train_unseen', type=str, help='delimited list input for zero-shot train split unseen classes') parser.add_argument( '-vu', '--val_unseen', type=str, help='delimited list input for zero-shot val split unseen classes') parser.add_argument('-e', '--embed_dim', type=int, choices=[2, 5, 10, 20, 21, 50, 100, 200, 300], help='dimensionality of joint embeddings space') parser.add_argument('-ve', '--fcn_epochs', type=int, help='maximum number of training epochs for FCN') parser.add_argument('-lr', '--fcn_learning_rate', type=float, help='FCN\'s learning rate') parser.add_argument('-loss', '--fcn_loss', type=str, choices=['cos', 'mse', 'cross_entropy'], help='FCN training loss function if using embeddings') parser.add_argument('-o', '--fcn_optim', type=str, choices=['sgd', 'adam'], help='optimizer for updating FCN model') parser.add_argument( '-se', '--seenmask_epochs', type=int, help='max number of training epochs for the seenmask classifier') parser.add_argument('-slr', '--seenmask_learning_rate', type=float, help='seenmask layer learning rate') # update optional cfg arg parser.add_argument( '-oh', '--one_hot_embed', help='make embeddings one-hot embeddings for updating model', action='store_true') parser.add_argument( '-fu', '--forced_unseen', help='only predict along unseen classes for unseen pixel during val', action='store_true') parser.add_argument('-r', '--resume', type=str, help='fcn model checkpoint path') # parse args and update cfg args = parser.parse_args() name, gpu, cfg, data_dir, tb_dir = args.name, args.gpu, configurations[ args.config], args.data_dir, args.tb_dir # extract default value args cfg = update_cfg_with_args(cfg, args) validate_cfg(cfg) # initialize logging and tensorboard writer log_dir = get_log_dir(name, args.config, cfg, data_dir) run_name = log_dir.split('/')[-1] tb_path = osp.join(tb_dir, run_name) tb_writer = SummaryWriter(tb_path) output_cfg(cfg, log_dir, tb_writer) # initialize CUDA os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu) cuda = torch.cuda.is_available() if cuda == -1: cuda = False torch.manual_seed(1337) if cuda: torch.cuda.manual_seed(1337) # 1. dataset kwargs = { 'val_unseen': cfg['val_unseen'], 'transform': True, 'embed_dim': cfg['embed_dim'], 'one_hot_embed': cfg['one_hot_embed'], 'data_dir': data_dir } if cfg['dataset'] == "pascal": pascal_dataset.download(data_dir) train_dataset = pascal_dataset.PascalVOC(split='train', **kwargs) train_seen_dataset = pascal_dataset.PascalVOC( split='train_seen', train_unseen=cfg['train_unseen'], **kwargs) val_dataset = pascal_dataset.PascalVOC(split='val', **kwargs) elif cfg['dataset'] == "context": context_dataset.download(data_dir) train_dataset = context_dataset.PascalContext(split='train', **kwargs) train_seen_dataset = context_dataset.PascalContext( split='train_seen', train_unseen=cfg['train_unseen'], **kwargs) val_dataset = context_dataset.PascalContext(split='val', **kwargs) kwargs = {'num_workers': 8, 'pin_memory': True} if cuda else {} train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=1, shuffle=True, **kwargs) train_seen_loader = torch.utils.data.DataLoader( train_seen_dataset, batch_size=1, shuffle=True, **kwargs) # TODO: add val_unseen to everything val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, shuffle=False, **kwargs) label_names = train_dataset.class_names # output tb/log counts on train_seen, train_unseen, val n_train_seen = str(len(train_seen_loader)) n_train_unseen = str(len(train_loader) - len(train_seen_loader)) n_val = str(len(val_loader)) tb_writer.add_text('num/train_seen', n_train_seen) tb_writer.add_text('num/train_unseen', n_train_unseen) tb_writer.add_text('num/val', n_val) if not osp.exists(osp.join(log_dir, 'counts.csv')): with open(osp.join(log_dir, 'counts.csv'), 'w') as f: f.write(','.join(['train_seen', 'train_unseen', 'val']) + '\n') f.write(','.join([n_train_seen, n_train_unseen, n_val]) + '\n') # 2. model if cfg['embed_dim']: model = models.FCN32s(n_class=cfg['embed_dim']) else: model = models.FCN32s(n_class=21) start_epoch = 0 start_iteration = 0 # load fcn with saved weights checkpoint = None if cfg['load_fcn_path']: load_path = osp.join(data_dir, 'logs', cfg['load_fcn_path'], 'best') checkpoint = torch.load(load_path) model.load_state_dict( checkpoint['model_state_dict'], strict=False) # strict is False for backwards compatibility start_epoch = checkpoint['epoch'] start_iteration = checkpoint['iteration'] # initialize fcn with vgg weights else: vgg16 = models.VGG16(pretrained=True, data_dir=data_dir) model.copy_params_from_vgg16(vgg16) if cuda: model = model.cuda() # 3. fcn optimizer and trainer if cfg['fcn_optim'] == "sgd": params = [{ 'params': get_parameters(model, bias=False) }, { 'params': get_parameters(model, bias=True), 'lr': cfg['fcn_lr'] * 2, 'weight_decay': 0 }] # conv2d bias optim = torch.optim.SGD(params, lr=cfg['fcn_lr'], momentum=.99, weight_decay=0.0005) elif cfg['fcn_optim'] == "adam": params = [{ 'params': get_parameters(model, bias=False) }, { 'params': get_parameters(model, bias=True), 'lr': cfg['fcn_lr'] * 2 }] # conv2d bias optim = torch.optim.Adam(params, lr=cfg['fcn_lr']) if cfg['load_fcn_path']: optim.load_state_dict(checkpoint['optim_state_dict']) # train fcn all_unseen = cfg['train_unseen'] + cfg['val_unseen'] fcn_trainer = trainer_fcn.Trainer( cuda=cuda, model=model, optimizer=optim, train_loader=train_seen_loader, val_loader=val_loader, log_dir=log_dir, dataset=cfg['dataset'], max_epoch=cfg['fcn_epochs'], pixel_embeddings=cfg['embed_dim'], loss_func=cfg['fcn_loss'], tb_writer=tb_writer, unseen=all_unseen, val_unseen=cfg['val_unseen'], label_names=label_names, forced_unseen=cfg['forced_unseen'], ) fcn_trainer.epoch, fcn_trainer.iteration = start_epoch, start_iteration if cfg['mode'] == 'train': if cfg['fcn_epochs'] > 0: fcn_trainer.train() # 4. train seenmask if cfg['seenmask_epochs'] > 0: # fix fcn's VGG weights. learn final linear layer mapping fc7 to seenmask for param in model.parameters(): param.requires_grad = False for p in model.seenmask_score.parameters(): p.requires_grad = True for p in model.seenmask_upscore.parameters(): p.requires_grad = True # optimizer params = [{'params': get_parameters(model, seenmask=True)}] optim = torch.optim.Adam(params, lr=cfg['seenmask_lr']) if not checkpoint: load_path = osp.join(data_dir, 'logs', run_name, 'best') checkpoint = torch.load(load_path) seenmask_trainer = trainer_seenmask.Trainer( cuda=cuda, model=model, optimizer=optim, train_loader=train_loader, val_loader=val_loader, log_dir=log_dir, dataset=cfg['dataset'], max_epoch=cfg['seenmask_epochs'], tb_writer=tb_writer, checkpoint=checkpoint, unseen=cfg['train_unseen'], ) seenmask_trainer.train() # 5. test elif cfg['mode'] == 'test_fcn': fcn_trainer.validate(both_fcn_and_seenmask=False) elif cfg['mode'] == 'test_all': fcn_trainer.validate(both_fcn_and_seenmask=True)
print "Resized val jacc coef : {:.4f}".format(jacc) if do_filter: print 'Use Classifier model to filter the masks' size_1_2 = 256 base_folder = './datasets/isic2018_crop_{}_{}_{}'.format( attribute, size_1_2, size_1_2) image_folder = os.path.join(base_folder, 'image') val = ISIC.load_images(val_list, size_1_2, size_1_2, image_folder) # filter by pre_cls model pre_cls_model_filename = './weights2018_task2/{}_{}_crop_pre_cls.h5'.format( 'vgg', size_1_2) pre_cls_model = models.VGG16(size_1_2, size_1_2, pretrained=True, freeze_pretrained=False, loss='binary_crossentropy', optimizer=Adam(lr=1e-5), metrics=['accuracy']) pre_cls_model.load_weights(pre_cls_model_filename) step1_mean = pkl.load( open('./datasets/task2_step1_train_mean.pkl', 'rb')) prob_pre_val = pre_cls_model.predict(val - step1_mean) print prob_pre_val.shape # filter by cls model cls_model_filename = './weights2018_task2/{}_{}_{}_crop_cls.h5'.format( attribute, 'vgg', size_1_2) cls_model = models.VGG16(size_1_2, size_1_2, pretrained=True, freeze_pretrained=False,
def train(): # Setup device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") #Setup model model = models.UNet(n_channels=3,n_classes=5) #用预训练的Vgg16网络初始化FCN32s的参数 vgg16 = models.VGG16(pretrained=True) model.copy_params_from_vgg16(vgg16) # Setup Dataloader,训练集和验证集数据 """data.picFulPath('/home/mlxuan/project/DeepLearning/data/benchmark/benchmark_RELEASE/dataset/train.txt', '/home/mlxuan/project/DeepLearning/data/benchmark/benchmark_RELEASE/dataset/img/', '/home/mlxuan/project/DeepLearning/data/benchmark/benchmark_RELEASE/dataset/cls/') train_dataset = data.SBDClassSeg('/home/mlxuan/project/DeepLearning/FCN/fcn_mlx/data/ImagAndLal.txt') trainloader = DataLoader(train_dataset, batch_size=4, shuffle=False, drop_last=True) data.picFulPath('/home/mlxuan/project/DeepLearning/data/VOCtrainval_11-May-2012/VOCdevkit/VOC2012/ImageSets/Segmentation/val.txt', '/home/mlxuan/project/DeepLearning/data/VOCtrainval_11-May-2012/VOCdevkit/VOC2012/JPEGImages/', '/home/mlxuan/project/DeepLearning/data/VOCtrainval_11-May-2012/VOCdevkit/VOC2012/SegmentationClass/', destPath='/home/mlxuan/project/DeepLearning/FCN/fcn_mlx/data/ValImagAndLal.txt', ImgFix='.jpg',lblFix='.png') val_dataset = data.VOCClassSeg('/home/mlxuan/project/DeepLearning/FCN/fcn_mlx/data/ValImagAndLal.txt',train=False) valloader = DataLoader(val_dataset,batch_size=1,shuffle=False)""" train_dataset = data.RSDataClassSeg('/home/mlxuan/project/DeepLearning/FCN/fcn_mlx/Data/trainFullPath.txt') trainloader = DataLoader(train_dataset, batch_size=4, shuffle=False, drop_last=True) val_dataset = data.RSDataClassSeg('/home/mlxuan/project/DeepLearning/FCN/fcn_mlx/Data/validFullPath.txt',train=False) valloader = DataLoader(val_dataset, batch_size=1, shuffle=False) # Setup optimizer, lr_scheduler and loss function(优化器、学习率调整策略、损失函数) def cross_entropy2d(input, target, weight=None, size_average=True): # input: (n, c, h, w), target: (n, h, w) n, c, h, w = input.size() # log_p: (n, c, h, w) if LooseVersion(torch.__version__) < LooseVersion('0.3'):#简单的版本比较操作,此处传入的时torch.__version__,所以比较的时torch的版本 # ==0.2.X log_p = F.log_softmax(input) else: # >=0.3 log_p = F.log_softmax(input, dim=1) # log_p: (n*h*w, c) log_p是对input做log_softmax后的结果,表示每个类的概率。tensor.transpose将tensor的维度交换,如行变成列,列变成行 log_p = log_p.transpose(1, 2).transpose(2, 3).contiguous() log_p = log_p[target.view(n, h, w, 1).repeat(1, 1, 1, c) >= 0] log_p = log_p.view(-1, c) # target: (n*h*w,) mask = target >= 0 target = target[mask] loss = F.nll_loss(log_p, target, weight=weight) if size_average: loss /= mask.data.sum() return loss lossFun = cross_entropy2d def get_parameters(model, bias=False): import torch.nn as nn modules_skipped = ( nn.ReLU, nn.MaxPool2d, nn.Dropout2d, nn.Sequential, models.FCN32s, ) for m in model.modules(): if isinstance(m, nn.Conv2d): if bias: yield m.bias else: yield m.weight elif isinstance(m, nn.ConvTranspose2d): # weight is frozen because it is just a bilinear upsampling if bias: assert m.bias is None elif isinstance(m, modules_skipped): continue else: raise ValueError('Unexpected module: %s' % str(m)) optim = torch.optim.SGD( [ {'params': get_parameters(model, bias=False)}, {'params': get_parameters(model, bias=True), 'lr': 1.0e-5* 2, 'weight_decay': 0}, ], lr=1.0e-5, momentum=0.99, weight_decay=0.0005) #定义学习率调整策略 scheduler = lr_scheduler.ReduceLROnPlateau(optim, mode='min', patience=0,min_lr=10e-10,eps=10e-8) # min表示当指标不在降低时,patience表示可以容忍的step次数 utils.ModelLoad('/home/mlxuan/project/DeepLearning/FCN/fcn_mlx/output/Model.path/20181227_220035.852449model_best.pth.tar',model,optim) trainer = models.Trainer( cuda =True, model=model, optimizer=optim, loss_fcn=lossFun, train_loader=trainloader, val_loader=valloader, out='./output/', max_iter=40000, scheduler = scheduler, interval_validate=2000 ) trainer.train()#进入训练
monitor_metric = 'val_jacc_coef' elif model_type == 'unet2': model = models.Unet2(height, width, custom_loss=custom_loss, optimizer=optimizer, custom_metrics=custom_metric, fc_size=fc_size, channels=channels) monitor_metric = 'val_jacc_coef' elif model_type == 'vgg': VGG16_WEIGHTS_NOTOP = project_path + 'pretrained_weights/vgg16_notop.h5' model = models.VGG16(height, width, pretrained=VGG16_WEIGHTS_NOTOP, freeze_pretrained=False, custom_loss=custom_loss, optimizer=optimizer, custom_metrics=custom_metric) monitor_metric = 'val_jacc_coef' if do_stage == 1: split_ratio = [4, 1, 1] tr_list, val_list, te_list = ISIC.split_isic_train(tr_folder, split_ratio) val_folder = tr_folder val_mask_folder = tr_mask_folder te_folder = tr_folder te_mask_folder = tr_mask_folder base_tr_folder = resized_image_folder + "/train_{}_{}".format( height, width) base_val_folder = resized_image_folder + "/validation_{}_{}".format(
train = train - train_mean val = val - train_mean print "Saving task2 step2 train mean for attribute", attribute pkl.dump(train_mean, open('./datasets/task2_step2_train_mean.pkl', 'wb')) optimizer = Adam(lr=1e-5) model_filename = "weights2018_task2/{}_{}_{}_crop_cls.h5".format( attribute, model_name, size) print 'Create model' if model == 'vgg': loss = 'binary_crossentropy' model = models.VGG16(height, width, pretrained=True, freeze_pretrained=False, loss=loss, optimizer=optimizer, metrics=['accuracy']) else: print "Incorrect model name" def myGenerator(train_generator, show=False): while True: train_gen = next(train_generator) if show: # use True to show images for i in range(train_gen.shape[0]): img = train_gen[i] img = img[0:3]
def main(args): num_classes = 8 size = (224, 224, 3) # size of images learning_rate = args.learning_rate display_step = 3 # Runtime initialization will not allocate all memory on GPU physical_devices = tf.config.list_physical_devices('GPU') try: tf.config.experimental.set_memory_growth(physical_devices[0], True) except: # Invalid device or cannot modify virtual devices once initialized. pass model = models.VGG16(input_shape=size, num_classes=num_classes) model.build(input_shape=size) model.summary() loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) optimizer = tf.optimizers.Adam(learning_rate) training_dataset = ISICClassification(args.dataset, 'training', args.batch_size, size[:-1]) validation_dataset = ISICClassification(args.dataset, 'validation', args.batch_size, size[:-1], shuffle=False) train_loss = tf.keras.metrics.Mean(name='train_loss') train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy( name='train_accuracy') val_accuracy = tf.keras.metrics.SparseCategoricalAccuracy( name='val_accuracy') best_accuracy = 0. for e in range(1, args.epochs + 1): train_loss.reset_states() train_accuracy.reset_states() val_accuracy.reset_states() for step, (images, labels) in enumerate(training_dataset, 1): # Run the optimization to update W and b values with tf.GradientTape() as tape: pred = model(images, is_training=True) loss = loss_fn(labels, pred) train_loss.update_state(loss) train_accuracy.update_state(labels, pred) gradients = tape.gradient(loss, model.trainable_variables) # Update W and b following gradients optimizer.apply_gradients(zip(gradients, model.trainable_variables)) if step % display_step == 0: print( "\rTraining {:d}/{:d} (batch {:d}/{:d}) - Loss: {:.4f} - Accuracy: {:.4f}" .format(e, args.epochs, step, len(training_dataset), train_loss.result(), train_accuracy.result()), end="", flush=True) print('\n') # Do validation print("Validation {:d}/{:d}".format(e, args.epochs), end="", flush=True) for step, (images, labels) in enumerate(validation_dataset, 1): pred = model(images, is_training=False) val_accuracy.update_state(labels, pred) # Compute accuracy and save checkpoints accuracy = val_accuracy.result() print(" - Accuracy: {:.4f}".format(accuracy), flush=True) if accuracy > best_accuracy: print("Saving checkpoints") best_accuracy = accuracy model.save_weights("checkpoint.tf", save_format='tf') return
args = pickle.load(f) args.resume = resume print(args) para = {"num_workers":8, "pin_memory":True} if args.cuda else {} train_loader = torch.utils.data.DataLoader( dataloaders.ImageCaptionDataset(args.data_train), batch_size=args.batch_size, shuffle=True, **para) val_loader = torch.utils.data.DataLoader( dataloaders.ImageCaptionDataset(args.data_val, image_conf={'center_crop':True}), batch_size=args.batch_size, shuffle=False, **para) audio_model = models.Davenet() image_model = models.VGG16(pretrained=args.pretrained_image_model) if not bool(args.exp_dir): print("exp_dir not specified, automatically creating one...") now = datetime.datetime.now(dateutil.tz.tzlocal()) timestamp = now.strftime('%Y_%m_%d_%H_%M_%S') args.exp_dir = "exp/Data-%s/AudioModel-%s_ImageModel-%s_Optim-%s_LR-%s_Epochs-%s_%s" % ( os.path.basename(args.data_train), args.audio_model, args.image_model, args.optim, args.lr, args.n_epochs, timestamp) if not args.resume: print("\nexp_dir: %s" % args.exp_dir) os.makedirs("%s/models" % args.exp_dir) with open("%s/args.pkl" % args.exp_dir, "wb") as f: pickle.dump(args, f)
args = parser.parse_args() print(args) # Setup loaders, models and loss train_loader = torch.utils.data.DataLoader( dataloaders.ImageCaptionDataset(args.data_train, audio_conf={'target_length': args.input_length}, image_conf={'center_crop': True}), batch_size=args.batch_size, shuffle=True, num_workers=8, pin_memory=True) val_loader = torch.utils.data.DataLoader( dataloaders.ImageCaptionDataset(args.data_val, audio_conf={'target_length': args.input_length}, image_conf={'center_crop': True}), batch_size=args.batch_size, shuffle=False, num_workers=8, pin_memory=True) audio_model = models.ConvX3AudioNet(input_length=args.input_length) image_model = models.VGG16() if bool(args.train_path): audio_model.load_state_dict(torch.load("%s/models/best_audio_model.pth" % args.train_path), strict=False) criterion = DotLoss() # Set up the optimizer audio_trainables = [p for p in audio_model.parameters() if p.requires_grad] image_trainables = [p for p in image_model.parameters() if p.requires_grad] trainables = audio_trainables + image_trainables if args.optim == 'sgd': optimizer = torch.optim.SGD(trainables, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) elif args.optim == 'adam':
def classify(model_data_path, image_pathFolder): '''Classify the given images using VGG16.''' #print(model_data_path) #print(image_pathFolder) image_paths = [] for filename in os.listdir(image_pathFolder): image_paths.append(image_pathFolder + filename) #print(image_paths) # Get the data specifications for the VggNet model spec = models.get_data_spec(model_class=models.VGG16) ##print(spec) # Create a placeholder for the input image input_node = tf.placeholder(tf.float32, shape=(None, spec.crop_size, spec.crop_size, spec.channels)) #print(input_node) # Construct the network net = models.VGG16({'data': input_node}) #print("net---------------------") # Create an image producer (loads and processes images in parallel) image_producer = dataset.ImageProducer(image_paths=image_paths, data_spec=spec) #print(image_producer) config = tf.ConfigProto() config.gpu_options.allow_growth = True #tf.ConfigProto() log_device_placement = True # 是否打印设备分配日志 allow_soft_placement = False # 如果你指定的设备不存在,允许TF自动分配设备 config = tf.ConfigProto(log_device_placement=True, allow_soft_placement=False) with tf.Session(config=config) as sesh: #print('start -----------------------------------------------------------------%s' % datetime.now()) #sesh.run(tf.global_variables_initializer()) # Start the image processing workers coordinator = tf.train.Coordinator() threads = image_producer.start(session=sesh, coordinator=coordinator) # Load the converted parameters #print('Loading the model -----------------------------------------------------------------%s' % datetime.now()) net.load(model_data_path, sesh) # Load the input image #print('Loading the images-----------------------------------------------------------------%s' % datetime.now()) indices, input_images = image_producer.get(sesh) # Perform a forward pass through the network to get the class probabilities print( 'Classifying -----------------------------------------------------------------%s' % datetime.now()) probs = sesh.run(net.get_output(), feed_dict={input_node: input_images}) print( 'Classifying END -----------------------------------------------------------------%s' % datetime.now()) display_results([image_paths[i] for i in indices], probs) # Stop the worker threads coordinator.request_stop() coordinator.join(threads, stop_grace_period_secs=2)
def run_training(image_dir, tv_proportion): #分别初始化测试集和验证集在get batch时的index和epoch_completed train_index = 0 train_epoch_completed = 0 val_index = 0 val_epoch_completed = 0 # 保存生成文件的目录 logs_train_dir = './generated_file/train' logs_val_dir = './generated_file/val' #从train data中划分出验证集和测试集 test_label, test_path, val_path, val_label, train_path, train_label = \ data.get_test_and_val(image_dir, tv_proportion) #分别为image和label创建占位符 X = tf.placeholder(tf.float32, shape=[batch_size, 224, 224, 3], name="X") Y = tf.placeholder(tf.int32, shape=[batch_size], name="Y") #定义计算图op logit = models.VGG16(X) loss = losses(logit, Y) acc = evaluation(logit, Y) #初始化learning_rate,没有则创建变量 learning_rate = tf.get_variable(name='lr', initializer=0.001) train_op = training(loss, learning_rate) #开启会话,并初始化全局变量 sess = tf.Session() sess.run(tf.global_variables_initializer()) #写入图表本身和即时数据具体值 train_writer = tf.summary.FileWriter(logs_train_dir, sess.graph) val_writer = tf.summary.FileWriter(logs_val_dir, sess.graph) #汇总数据,进行保存 summary_op = tf.summary.merge_all() saver = tf.train.Saver() # 读入预训练数据 data_dict = np.load('Data/VGG_imagenet.npy', encoding="latin1").item() print('start loading the pre-trained model...') for key in data_dict: with tf.variable_scope(key, reuse=True): for subkey in data_dict[key]: # print("assign pretrain model " + subkey + " to " + key) try: var = tf.get_variable(subkey) # 把相应层数的参数替换为预训练好的参数,从data_dict到var sess.run(var.assign(data_dict[key][subkey])) print("assign pretrain model " + subkey + " to " + key) except ValueError: print("ignore " + key) print("Start training...") accuracy = [] #进行迭代 for step in range(train_max_epoch): #每批取batch_size个训练样本,放入字典 train_batch_X, train_batch_Y, train_index, train_epoch_completed = data.get_batch_data(\ train_path, train_label, train_index, train_epoch_completed) feed_dict_train = {X: train_batch_X, Y: train_batch_Y} #每10k步,学习率衰减为原来的0.1倍 if step != 0 and step % 10000 == 0: sess.run(learning_rate.assign(sess.run(learning_rate) * 0.1)) # print('lr:', learning_rate.eval(session=sess)) #feed数据,进行训练,求损失和准确率,保存数据 _, summary_str, tra_loss, tra_acc = sess.run( fetches=[train_op, summary_op, loss, acc], feed_dict=feed_dict_train) train_writer.add_summary(summary_str, step) #每10步打印一次训练结果 if step % 10 == 0 or (step + 1) == train_max_epoch: print('step %d, train loss = %.2f, train accuracy = %.2f%%' % (step, tra_loss, tra_acc * 100.0)) # summary_str = sess.run(summary_op) #每40步,进行一次验证,保存并打印结果,保存数据 if step % 40 == 0 or (step + 1) == train_max_epoch: val_batch_X, val_batch_Y, val_index, val_epoch_completed = data.get_batch_data( \ val_path, val_label, val_index, val_epoch_completed) feed_dict_val = {X: val_batch_X, Y: val_batch_Y} summary_str, val_loss, val_acc = sess.run(fetches=[summary_op, loss, acc], \ feed_dict=feed_dict_val) accuracy.append(val_acc) print('step %d, val loss = %.2f, val accuracy = %.2f%%' % (step, val_loss, val_acc * 100.0)) # summary_str = sess.run(summary_op) val_writer.add_summary(summary_str, step) #生成chenckpoints文件,保存最后一次迭代的结果 if (step + 1) == train_max_epoch: checkpoint_path = os.path.join(logs_train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step + 1) #计算验证集的平均准确率 val_total_acc = np.array(accuracy) val_total_acc = np.mean(val_total_acc) * 100 sess.close() return val_total_acc
def train(): with tf.name_scope('input'): train_image_batch, train_label_batch = input_data.read_cifar10( data_dir=data_dir, is_train=True, batch_size=BATCH_SIZE, shuffle=True) test_image_batch, test_label_batch = input_data.read_cifar10( data_dir=data_dir, is_train=False, batch_size=BATCH_SIZE, shuffle=False) logits = models.VGG16(train_image_batch, N_CLASSES, IS_PRETRAIN) loss = utils.loss(logits, train_label_batch) accuracy = utils.accuracy(logits, train_label_batch) my_global_step = tf.Variable(0, name='global_step', trainable=False) train_op = utils.optimize(loss, learning_rate, my_global_step) x = tf.placeholder(tf.float32, shape=[BATCH_SIZE, IMG_W, IMG_H, 3]) y_ = tf.placeholder(tf.int16, shape=[BATCH_SIZE, N_CLASSES]) saver = tf.train.Saver(tf.global_variables()) summary_op = tf.summary.merge_all() sess = tf.Session() sess.run(tf.global_variables_initializer()) # load the parameter file, assign the parameters, skip the specific layers utils.load_with_skip(pre_trained_weights, sess, ['fc6', 'fc7', 'fc8']) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) train_summary_writer = tf.summary.FileWriter(train_log_dir, sess.graph) try: for step in np.arange(MAX_STEP): if coord.should_stop(): break train_images, train_labels = sess.run( [train_image_batch, train_label_batch]) _, train_loss, train_acc = sess.run([train_op, loss, accuracy], feed_dict={ x: train_images, y_: train_labels }) if step % 50 == 0 or (step + 1) == MAX_STEP: print('Step: %d, train_loss: %.4f, train_accuracy: %.4f%%' % (step, train_loss, train_acc)) summary_str = sess.run(summary_op) train_summary_writer.add_summary(summary_str, step) if step % 200 == 0 or (step + 1) == MAX_STEP: test_images, test_labels = sess.run( [test_image_batch, test_label_batch]) test_loss, test_acc = sess.run([loss, accuracy], feed_dict={ x: test_images, y_: test_labels }) print( '** Step: %d, test_loss: %.2f, test_accuracy: %.2f%% **' % (step, test_loss, test_acc)) if step % 2000 == 0 or (step + 1) == MAX_STEP: checkpoint_path = os.path.join(train_log_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) except tf.errors.OutOfRangeError: print('Done training -- epoch limit reached') finally: coord.request_stop() coord.join(threads) sess.close()
def predict_challenge(challenge_folder, challenge_predicted_folder, task1predicted_folder, plot=False): challenge_list = ISIC.list_from_folder(challenge_folder) challenge_crop_resized_folder = challenge_folder + "_crop_{}_{}".format( height, width) crop_inds_file = os.path.join(challenge_crop_resized_folder, 'inds.pkl') if not os.path.exists(challenge_crop_resized_folder) or not os.path.isfile( crop_inds_file): print "Cropping and resizing images" inds = ISIC.crop_resize_images( challenge_list, input_image_folder=challenge_folder, input_mask_folder=None, input_gtcrop_folder=task1predicted_folder, output_image_folder=challenge_crop_resized_folder, output_mask_folder=None, height=height, width=width) pkl.dump(inds, open(crop_inds_file, 'wb')) inds = pkl.load(open(crop_inds_file, 'rb')) challenge_images = ISIC.load_images(challenge_list, height, width, challenge_crop_resized_folder) if pre_proc: challenge_images = my_PreProc(challenge_images) challenge_images = challenge_images - train_mean model_name = model_filename.split('.')[1].split('/')[2] try: if test_aug: mask_pred_challenge = pkl.load( open( os.path.join(challenge_predicted_folder, model_name + '_testaug.pkl'), 'rb')) else: mask_pred_challenge = pkl.load( open( os.path.join(challenge_predicted_folder, model_name + '.pkl'), 'rb')) except: print 'making prediction...' model.load_weights(model_filename) if test_aug: print "Predicting using test data augmentation" mask_pred_challenge = np.array( [my_predict(model, x) for x in tqdm(challenge_images)]) print mask_pred_challenge.shape else: mask_pred_challenge = model.predict(challenge_images, batch_size=batch_size) mask_pred_challenge = mask_pred_challenge[:, 0, :, :] print mask_pred_challenge.shape if do_filter: print "Using step2 and step1 classifiers" size_1_2 = 256 challenge_crop_resized_folder = challenge_folder + "_crop_{}_{}".format( size_1_2, size_1_2) if not os.path.exists(challenge_crop_resized_folder): print "Cropping and resizing images" _ = ISIC.crop_resize_images( challenge_list, input_image_folder=challenge_folder, input_mask_folder=None, input_gtcrop_folder=task1predicted_folder, output_image_folder=challenge_crop_resized_folder, output_mask_folder=None, height=size_1_2, width=size_1_2) challenge_images = ISIC.load_images(challenge_list, size_1_2, size_1_2, challenge_crop_resized_folder) # filter by pre model pre_cls_model_filename = './weights2018_task2/{}_{}_crop_pre_cls.h5'.format( 'vgg', size_1_2) pre_cls_model = models.VGG16(size_1_2, size_1_2, pretrained=True, freeze_pretrained=False, loss='binary_crossentropy', optimizer=Adam(lr=1e-5), metrics=['accuracy']) pre_cls_model.load_weights(pre_cls_model_filename) step1_mean = pkl.load( open('./datasets/task2_step1_train_mean.pkl', 'rb')) prob_pre = pre_cls_model.predict(challenge_images - step1_mean) print prob_pre.shape # filter by cls model cls_model_filename = './weights2018_task2/{}_{}_{}_crop_cls.h5'.format( attribute, 'vgg', size_1_2) cls_model = models.VGG16(size_1_2, size_1_2, pretrained=True, freeze_pretrained=False, loss='binary_crossentropy', optimizer=Adam(lr=1e-5), metrics=['accuracy']) cls_model.load_weights(cls_model_filename) step2_mean = pkl.load( open('./datasets/task2_step2_train_mean.pkl', 'rb')) prob_pred = cls_model.predict(challenge_images - step2_mean) print prob_pred.shape # prob product mask_pred_challenge = mask_pred_challenge * prob_pred[:, np. newaxis] * prob_pre[:, np . newaxis] if test_aug: with open( os.path.join(challenge_predicted_folder, model_name + '_testaug.pkl'), 'wb') as f: pkl.dump(mask_pred_challenge, f) else: with open( os.path.join(challenge_predicted_folder, model_name + '.pkl'), 'wb') as f: pkl.dump(mask_pred_challenge, f) cutoff = 0.5 mask_pred_challenge_256 = (np.where(mask_pred_challenge >= cutoff, 1, 0) * 255).astype(np.uint8) if test_aug: challenge_predicted_folder = os.path.join(challenge_predicted_folder, model_name + '_testaug') else: challenge_predicted_folder = os.path.join(challenge_predicted_folder, model_name) if not os.path.exists(challenge_predicted_folder): os.makedirs(challenge_predicted_folder) print "Start predicting masks of original shapes" imgs = [] mask_preds = [] for i in trange(len(challenge_list)): img, mask_pred = ISIC.show_crop_images_full_sized( challenge_list, img_mask_pred_array=mask_pred_challenge_256, image_folder=challenge_folder, inds=inds, mask_folder=None, index=i, output_folder=challenge_predicted_folder, attribute=attribute, plot=plot)
def main(): global args, best_prec1, best_prec5 args = parser.parse_args() args.cuda = not args.no_cuda and torch.cuda.is_available() #horovod initialize hvd.init() log = None if hvd.rank() == 0: log = SummaryWriter(log_dir=args.log_dir) print('The Training Model is %s' % args.arch) # Check the save_dir exists or not if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) if args.cuda: torch.cuda.set_device(hvd.local_rank()) normalize = transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) # Horovod: limit # of CPU threads to be used per worker. torch.set_num_threads(1) kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} # When supported, use 'forkserver' to spawn dataloader workers instead of 'fork' to prevent # issues with Infiniband implementations that are not fork-safe if (kwargs.get('num_workers', 0) > 0 and hasattr(mp, '_supports_context') and mp._supports_context and 'forkserver' in mp.get_all_start_methods()): kwargs['multiprocessing_context'] = 'forkserver' train_dataset = datasets.CIFAR10('data-%d'%hvd.local_rank(), train=True, transform=transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ]), download=True) val_dataset = datasets.CIFAR10('data-%d'%hvd.local_rank(), train=False,transform=transforms.Compose([ transforms.ToTensor(), normalize, ])) #Horovod Partition the training data train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset, num_replicas=hvd.size(), rank=hvd.rank()) val_sampler = torch.utils.data.distributed.DistributedSampler( val_dataset, num_replicas=hvd.size(), rank=hvd.rank()) train_loader = torch.utils.data.DataLoader( train_dataset,batch_size=args.batch_size,sampler=train_sampler,**kwargs) val_loader = torch.utils.data.DataLoader( val_dataset,batch_size=args.batch_size,sampler=val_sampler,**kwargs) # model = torch.nn.DataParallel(resnet.__dict__[args.arch]()) if args.arch in resnet.__dict__: model = resnet.__dict__[args.arch]() elif args.arch == 'alexnet': model = models.AlexNet() elif args.arch == 'vgg16': model = models.VGG16() if hvd.rank() == 0: numel = sum(p.numel() for p in model.parameters()) print('Total params: {:d}'.format(numel)) lr_scaler = hvd.size() if args.cuda: model.cuda() if args.use_adasum and hvd.nccl_built(): lr_scaler = hvd.local_size() # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})" .format(args.evaluate, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() if args.half: model.half() criterion.half() base_optimizer = torch.optim.SGD(model.parameters(), args.lr * lr_scaler, momentum=args.momentum, weight_decay=args.weight_decay) # lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(base_optimizer, # milestones=[100, 150], last_epoch=args.start_epoch - 1) # Horovod: broadcast parameters & optimizer state. hvd.broadcast_parameters(model.state_dict(), root_rank=0) hvd.broadcast_optimizer_state(base_optimizer, root_rank=0) #Compression # compression = Allgather(MGCCompressor(0.05), ResidualMemory(), hvd.size()) # compression = Allgather(TernGradCompressor(), ResidualMemory(), hvd.size()) compression = Allreduce(NoneCompressor(), NoneMemory()) # compression = Allgather(DgcCompressor(0.01), ResidualMemory(), hvd.size()) # compression = Allgather(LowQSGDCompressor(), ResidualMemory(), hvd.size()) # Horovod: wrap optimizer with DistributedOptimizer. optimizer = hvd.DistributedOptimizer(base_optimizer, compression, named_parameters=model.named_parameters()) if hvd.rank() == 0: log.add_scalar('train/accuracy', 0., 0) log.add_scalar('test/accuracy', 0., 0) for epoch in range(args.start_epoch + 1, args.epochs + 1): adjust_learning_rate(optimizer, epoch, size=lr_scaler) if hvd.rank() == 0: print('current lr {:.5e}'.format(optimizer.param_groups[0]['lr'])) # train for one epoch train(train_loader, model, criterion, optimizer, epoch, log=log) # evaluate on validation set prec1, prec5 = validate(val_loader, model, criterion, epoch, log=log) # remember best prec@1 and save checkpoint best_prec1 = max(prec1, best_prec1) best_prec5 = max(prec5, best_prec5) if hvd.rank() == 0: print('Best Pred@1:{:.2f}%, Prec@5:{:.2f}%\n'.format(best_prec1, best_prec5)) # if epoch > 0 and epoch % args.save_every == 0: # save_checkpoint({ # 'epoch': epoch + 1, # 'state_dict': model.state_dict(), # 'best_prec1': best_prec1, # }, is_best, filename=os.path.join(args.save_dir, 'checkpoint.th')) # # save_checkpoint({ # 'state_dict': model.state_dict(), # 'best_prec1': best_prec1, # }, is_best, filename=os.path.join(args.save_dir, 'model.th')) if hvd.rank() == 0: log.close()
batch_size=args.batch_size, shuffle=True, num_workers=8, pin_memory=True) val_loader = torch.utils.data.DataLoader(dataloaders.ImageCaptionDataset( args.data_val, audio_conf={'target_length': args.input_length}, image_conf={'center_crop': True}), batch_size=args.batch_size, shuffle=False, num_workers=8, pin_memory=True) audio_model = models.DaveNet(embedding_dim=args.input_length) image_model = models.VGG16(embedding_dim=args.input_length, pretrained=args.pretrained_image_model) if bool(args.train_path): audio_model.load_state_dict(torch.load("%s/models/best_audio_model.pth" % args.train_path), strict=False) criterion = MatchMapLoss() # Set up the optimizer audio_trainables = [p for p in audio_model.parameters() if p.requires_grad] image_trainables = [p for p in image_model.parameters() if p.requires_grad] trainables = audio_trainables + image_trainables if args.optim == 'sgd': optimizer = torch.optim.SGD(trainables, args.lr,
def main(): # set the path to pre-trained model and output pre_trained_net = './pre_trained/' + args.net_type + '_' + args.dataset + '.pth' args.outf = args.outf + args.net_type + '_' + args.dataset + '/' if os.path.isdir(args.outf) == False: os.mkdir(args.outf) torch.cuda.manual_seed(0) torch.cuda.set_device(args.gpu) # check the in-distribution dataset if args.dataset == 'cifar100': args.num_classes = 100 if args.dataset == 'svhn': out_dist_list = ['cifar10', 'imagenet_resize', 'lsun_resize'] else: out_dist_list = ['svhn', 'imagenet_resize', 'lsun_resize'] # load networks if args.net_type == 'densenet': if args.dataset == 'svhn': model = models.DenseNet3(100, int(args.num_classes)) model.load_state_dict(torch.load(pre_trained_net, map_location = "cuda:" + str(args.gpu))) else: model = torch.load(pre_trained_net, map_location = "cuda:" + str(args.gpu)) in_transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((125.3/255, 123.0/255, 113.9/255), (63.0/255, 62.1/255.0, 66.7/255.0)),]) elif args.net_type == 'resnet': model = models.ResNet34(num_c=args.num_classes) model.load_state_dict(torch.load(pre_trained_net, map_location = "cuda:" + str(args.gpu))) in_transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),]) elif args.net_type == 'vgg16': model = models.VGG16(int(args.num_classes)) model.load_state_dict(torch.load(pre_trained_net, map_location = "cuda:" + str(args.gpu))) in_transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),]) model.cuda() print('load model: ' + args.net_type) # load dataset print('load target data: ', args.dataset) train_loader, test_loader = data_loader.getTargetDataSet(args.dataset, args.batch_size, in_transform, args.dataroot) # set information about feature extaction model.eval() temp_x = torch.rand(2,3,32,32).cuda() temp_x = Variable(temp_x) temp_list = model.feature_list(temp_x)[1] num_output = len(temp_list) feature_list = np.empty(num_output) count = 0 for out in temp_list: feature_list[count] = out.size(1) count += 1 print('get sample mean and covariance') sample_mean, precision = lib_generation.sample_estimator(model, args.num_classes, feature_list, train_loader) print('get Mahalanobis scores') m_list = [0.0, 0.01, 0.005, 0.002, 0.0014, 0.001, 0.0005] for magnitude in m_list: print('Noise: ' + str(magnitude)) for i in range(num_output): M_in = lib_generation.get_Mahalanobis_score(model, test_loader, args.num_classes, args.outf, \ True, args.net_type, sample_mean, precision, i, magnitude) M_in = np.asarray(M_in, dtype=np.float32) if i == 0: Mahalanobis_in = M_in.reshape((M_in.shape[0], -1)) else: Mahalanobis_in = np.concatenate((Mahalanobis_in, M_in.reshape((M_in.shape[0], -1))), axis=1) for out_dist in out_dist_list: out_test_loader = data_loader.getNonTargetDataSet(out_dist, args.batch_size, in_transform, args.dataroot) print('Out-distribution: ' + out_dist) for i in range(num_output): M_out = lib_generation.get_Mahalanobis_score(model, out_test_loader, args.num_classes, args.outf, \ False, args.net_type, sample_mean, precision, i, magnitude) M_out = np.asarray(M_out, dtype=np.float32) if i == 0: Mahalanobis_out = M_out.reshape((M_out.shape[0], -1)) else: Mahalanobis_out = np.concatenate((Mahalanobis_out, M_out.reshape((M_out.shape[0], -1))), axis=1) Mahalanobis_in = np.asarray(Mahalanobis_in, dtype=np.float32) Mahalanobis_out = np.asarray(Mahalanobis_out, dtype=np.float32) Mahalanobis_data, Mahalanobis_labels = lib_generation.merge_and_generate_labels(Mahalanobis_out, Mahalanobis_in) file_name = os.path.join(args.outf, 'Mahalanobis_%s_%s_%s.npy' % (str(magnitude), args.dataset , out_dist)) Mahalanobis_data = np.concatenate((Mahalanobis_data, Mahalanobis_labels), axis=1) np.save(file_name, Mahalanobis_data)