def get_model(model_name, pho_size=299, num_classes=110): if model_name == "vgg16": model = VGG(num_classes=num_classes, pho_size=299) elif model_name == "resnet101": model = resnet101(num_classes=num_classes) elif model_name == "densenet": model = DenseNet(growth_rate=12, block_config=[(100 - 4) // 6 for _ in range(3)], num_classes=num_classes, small_inputs=False, efficient=True, pho_size=pho_size) elif model_name == "InceptionResNetV2": model = InceptionResNetV2(num_classes=num_classes) elif model_name == "InceptionV4": model = InceptionV4(num_classes=num_classes) elif model_name == "Inception3": model = Inception3(num_classes=num_classes) elif model_name == "denoise": model = get_denoise() elif model_name == "Mymodel": model = Mymodel() elif model_name == 'Comdefend': model = ComDefend() elif model_name == 'Rectifi': model = Rectifi() return model
def load_model(path): """Load and return a copy of the model stored at the specified path.""" cp = torch.load(path) model = DenseNet(**cp['model_args']) model.load_state_dict(cp['state_dict']) model.device = cp['model_device'] model.to(model.device) return model
def build_module(self): x = torch.zeros(self.densenetParameters.input_shape) out = x self.layer_dict['encoder'] = DenseNet( input_shape=self.densenetParameters.input_shape, growth_rate=self.densenetParameters.growth_rate, block_config=self.densenetParameters.block_config, compression=self.densenetParameters.compression, num_init_features=self.densenetParameters.num_init_features, bottleneck_factor=self.densenetParameters.bottleneck_factor, drop_rate=self.densenetParameters.drop_rate, num_classes=self.densenetParameters.num_classes, no_classification=True, use_se=self.densenetParameters.use_se, increasing_dilation=True, small_inputs=False) out = self.layer_dict['encoder'].forward(out) self.layer_dict['decoder'] = AutoDecoder( block_config=self.densenetParameters.block_config, use_bias=self.densenetParameters.use_bias, input_shape=out.shape, growth_rate=self.densenetParameters.growth_rate, compression=self.densenetParameters.compression, num_of_layers=1) out = self.layer_dict['decoder'].forward(out) self.layer_dict['conv_final_a'] = nn.ConvTranspose2d(out.shape[1], out_channels=3, kernel_size=7, stride=4, padding=3, output_padding=3, bias=True) out = self.layer_dict['conv_final_a'].forward(out) self.layer_dict['bn_1'] = nn.BatchNorm2d(out.shape[1]) out = self.layer_dict['bn_1'].forward(out) self.layer_dict['conv_final_b'] = nn.ConvTranspose2d(out.shape[1], out_channels=3, kernel_size=7, stride=1, padding=3, bias=True) out = self.layer_dict['conv_final_b'].forward(out) # self.layer_dict['bn_2'] = nn.BatchNorm2d(out.shape[1]) # out = self.layer_dict['bn_2'].forward(out) return out
def test_hello(): train_x, train_y, test_x, test_y = get_data() x = Variable(torch.FloatTensor(convert(train_x[0:32]))) model: nn.Module = DenseNet( 24, 0.5, n_class=2, blocks=[20, 20, 20], fc_size=13632) # TransitionLayer(in_filter=3, out_filter=3) pred = model(x) print('Model Layer:', len(list(model.parameters()))) print(pred.size(), x.size())
def get_densenet(batchsize): model = DenseNet(161) model = Wrapper(model) x = np.random.uniform(size=(batchsize, 3, 224, 224)).astype('f') x = chainer.as_variable(x) t = np.random.randint(size=(batchsize, ), low=0, high=1000).astype(np.int32) t = chainer.as_variable(t) return [x, t], model
def get_new_model(tmp_scale=True, num_classes=args.num_classes): if args.model == 'resnet18': return ResNet18(tmp_scale=tmp_scale, num_classes=num_classes) elif args.model == 'resnet50': return ResNet50(tmp_scale=tmp_scale, num_classes=num_classes) elif args.model == 'resnet101': return ResNet101(tmp_scale=tmp_scale, num_classes=num_classes) elif args.model == 'inceptionv4': return inceptionv4(tmp_scale=tmp_scale, num_classes=num_classes) elif args.model == 'densenet': return DenseNet(tmp_scale=tmp_scale)
def main(): config_path = '/home2/jypark/CIFAR1000/config.yaml' keras_session() config = Config(config_path) model = DenseNet(config=config) trainer = Trainer(config=config, model=model()) trainer.train()
def build(self, params): depth = params[0] nb_dense_block = params[1] growth_rate = params[2] dropout_rate = params[3] nb_filter = params[4] lr = 10. ** params[5] densenet = DenseNet(nb_classes=3, img_dim=self.input_shape, depth=depth, nb_dense_block=nb_dense_block, growth_rate = growth_rate, dropout_rate = dropout_rate, nb_filter = nb_filter) optimizer = Adam(lr = lr) densenet.compile(loss='categorical_crossentropy', optimizer = optimizer) return densenet.to_json()
def get_model(depth=100, growth_rate=12, efficient=True, valid_size=5000, n_epochs=300, batch_size=64, seed=None): """ A demo to show off training of efficient DenseNets. Trains and evaluates a DenseNet-BC on CIFAR-10. Args: data (str) - path to directory where data should be loaded from/downloaded (default $DATA_DIR) save (str) - path to save the model to (default /tmp) depth (int) - depth of the network (number of convolution layers) (default 40) growth_rate (int) - number of features added per DenseNet layer (default 12) efficient (bool) - use the memory efficient implementation? (default True) valid_size (int) - size of validation set n_epochs (int) - number of epochs for training (default 300) batch_size (int) - size of minibatch (default 256) seed (int) - manually set the random seed (default None) """ depth = config.NET_DEPTH if (depth - 4) % 3: raise Exception('Invalid depth') block_config = [(depth - 4) // 6 for _ in range(3)] model = DenseNet( growth_rate=growth_rate, block_config=block_config, num_init_features=growth_rate * 2, num_classes=config.N_CLASS, small_inputs=True, efficient=efficient, ) epoch_start = dir_utils.index_max("cnn_model") model_path = "cnn_model/" + str(epoch_start) + "model.pth" print('load cnn model:', model_path) if (epoch_start > 0): model.load_state_dict( torch.load(model_path, map_location=torch.device('cpu'))) # model.cuda() model.eval() return model.eval()
def create_bottleneck(self): if self.FLAGS.architecture.lower() == 'densenet': densenet_info = create_densenet_info( nb_blocks_layers=[6, 12, 48, 32], filters=24, output_dim=self.bottleneck_dim) densenet = DenseNet(self.input_tensor, self.training_flag, self.FLAGS.dropout_rate, densenet_info) feature_tensor = densenet.model elif self.FLAGS.architecture.lower() == 'inception_resnet_v2': feature_tensor, _ = inception_resnet_v2(self.input_tensor, self.training_flag, 0.8, self.bottleneck_dim) return feature_tensor
def get_model(learning_rate=1e-3): model = DenseNet(growth_rate=12, block_config=(8, 12, 10), num_init_features=48, bn_size=4, drop_rate=0.25, final_drop_rate=0.25, num_classes=200) # set the first layer not trainable model.features.conv0.weight.requires_grad = False # the last fc layer weights = [ p for n, p in model.named_parameters() if 'classifier.weight' in n ] biases = [model.classifier.bias] # all conv layers except the first weights_to_be_quantized = [ p for n, p in model.named_parameters() if 'conv' in n and ('dense' in n or 'transition' in n) ] # parameters of batch_norm layers bn_weights = [ p for n, p in model.named_parameters() if 'norm' in n and 'weight' in n ] bn_biases = [ p for n, p in model.named_parameters() if 'norm' in n and 'bias' in n ] params = [{ 'params': weights, 'weight_decay': 1e-4 }, { 'params': weights_to_be_quantized }, { 'params': biases }, { 'params': bn_weights }, { 'params': bn_biases }] optimizer = optim.Adam(params, lr=learning_rate) loss = nn.CrossEntropyLoss().cuda() model = model.cuda() # move the model to gpu return model, loss, optimizer
def __init__(self, img_size, output_size): self.losses = [] base_model = DenseNet(weights=None, input_shape=(img_size[0], img_size[1], 3), include_top=False) self.classifier = Sequential() # Batchnorm input self.classifier.add( BatchNormalization(input_shape=(img_size[0], img_size[1], 3))) # Base model self.classifier.add(base_model) # Classifier #self.classifier.add(Flatten()) self.classifier.add(Dense(output_size, activation='sigmoid'))
def __init__(self): super(ModuleJudgerDense, self).__init__() self.module_name = "judger" self.module_shape = (2, 1) self.data_type_in = (("object", "objects", "none"), ("object", "objects", "none"), "image") self.data_type_out = "answer" self.trainable = True self.weight_frozen = False self.weight_frozen_tmp = False self.color_refs = {} self.independent_branch = True if self.independent_branch: self.densenet = DenseNetSP() else: self.densenet = DenseNet()
def ConsturctNetwork(NetworkType, resume): """ Creates a Neural Network of the specified type. Parameters ---------- NetworkType : :obj:`str` the type of Neural Network resume : :obj:'str' the path of trained model """ if NetworkType == 'CifarResNeXt': model = CifarResNeXt(num_classes=17, depth=29, cardinality=8) elif NetworkType == 'ShallowResNeXt': model = ShallowResNeXt(num_classes=17, depth=11, cardinality=16) elif NetworkType == 'se_resnet50_shallow_sia': model = se_resnet50_shallow_sia(17, None) elif NetworkType == 'se_resnext50_32x4d': model = se_resnet50_shallow_sia(17, None) elif NetworkType == 'SimpleNet': model = SimpleNet(17) elif NetworkType == 'SimpleNetGN': model = SimpleNetGN(17) elif NetworkType == 'DenseNet': model = DenseNet(num_classes=17) elif NetworkType == 'DenseNetSia': model = DenseNetSia(num_classes=17) elif NetworkType == 'nasnetamobile': model = nasnetamobile(num_classes=17, pretrained=None) elif NetworkType == 'Shake_Shake': model = Shake_Shake(num_classes=17) else: raise ValueError('Neural Network type %s not supported' % (NetworkType)) if resume: model.load_state_dict(torch.load(args.resume)) return model
def evaluate_hw2(path_=None): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") cnn = DenseNet(growthRate=12, depth=48, reduction=0.5,bottleneck=True, nClasses=1) cnn = cnn.to(device) params = {'batch_size': 50, 'shuffle': False, 'num_workers': 4} if path_: data = glob.glob(os.path.join(path_, '*.jpg')) else: data = glob.glob(os.path.join(data_root, 'test', '*.jpg')) list_IDs = list(map(lambda x: x[-12:-6], data)) labels = list(map(lambda x: int(x[-5:-4]), data)) channel_means = (0.5226, 0.4494, 0.4206) channel_stds = (0.2411, 0.2299, 0.2262) testTransform = transforms.Compose([ transforms.Resize((64, 64), interpolation=Image.BICUBIC), transforms.ToTensor(), transforms.Normalize(channel_means, channel_stds) ]) # Generators testing_set = MaskDataset(list_IDs, labels, testTransform,train=False) testing_generator = DataLoader(testing_set, **params) cnn.load_state_dict(torch.load('latest.pkl', map_location=lambda storage, loc: storage)) cnn.eval() # Test the Model correct = 0 total = 0 predictions = [] y_true =[] for idx, (images, labels) in enumerate(testing_generator): images = images.to(device) outputs = cnn(images).view(-1).detach().cpu() predicted = outputs.round().numpy().astype(int) predictions.extend(predicted) y_true.extend(labels) print('F1 score of the model on the {} test images: {:.4f}'.format(testing_generator.__len__(),f1_score(y_true,predictions))) data = [os.path.split(f)[-1] for f in data] return zip(np.array(data),predictions)
def test_batches_with_labels(config_file, **kwargs): test_data = kwargs.get('testing_data') x = T.tensor4('input', theano.config.floatX) y = T.ivector('output') net = DenseNet(config_file) net.load_params() shape = (net.testing_batch_size, net.input_shape[1], net.input_shape[2], net.input_shape[3]) placeholder_x = theano.shared(np.zeros(shape, 'float32'), 'input_placeholder') placeholder_y = theano.shared( np.zeros((net.testing_batch_size, ), 'int32'), 'label_placeholder') p_y_given_x_test = net.inference(x) cost = net.build_cost(p_y_given_x_test, y, **{'params': net.regularizable}) accuracy = (1. - metrics.mean_classification_error(p_y_given_x_test, y)) * 100. test_network = net.compile([], [cost, accuracy], givens={ x: placeholder_x, y: placeholder_y }, name='test_densenet', allow_input_downcast=True) num_test_batches = test_data[0].shape[0] // net.testing_batch_size cost = 0. accuracy = 0. dm = utils.DataManager(config_file, (placeholder_x, placeholder_y)) dm.testing_set = test_data dm.num_test_data = test_data[0].shape[0] dm.batch_size = net.testing_batch_size batches = dm.get_batches(training=False) print('Testing %d batches...' % num_test_batches) for x, y in batches: dm.update_input((x, y)) c, a = test_network() cost += c accuracy += a cost /= num_test_batches accuracy /= num_test_batches print( 'Testing finished. Testing cost is %.2f. Testing accuracy is %.2f %%.' % (cost, accuracy))
def make_model(dataloader_args, model_args, training_args): """Make model function. Data loaders are first created using dataloader_args dict (See get_dataloaders function for arguments). DenseNet model is initialized with model_args dict (See DenseNet class for initialization arguments). Training on the model is carried out according to training_args dict (See train_model function for arguments). The learning curve is plotted after training is completed. Finally, the trained model is returned. """ train_loader, test_loader, _ = get_dataloaders(**dataloader_args) model = DenseNet(**model_args) history = train_model(model, train_loader, test_loader, **training_args) plot_history(history, metric='accuracy') return model
def test_image(config_file, image): image = np.array(image) assert len(image.shape) == 3 and image.shape[ -1] == 3, 'Testing image must be an RGB color image.' x = T.tensor4('input', theano.config.floatX) net = DenseNet(config_file) net.load_params() p_y_given_x_test = net.inference(x) prediction = T.argmax( p_y_given_x_test, 1) if p_y_given_x_test.ndim > 1 else p_y_given_x_test >= 0.5 test_network = net.compile([x], prediction, name='test_densenet', allow_input_downcast=True) prediction = test_network(np.expand_dims(image, 0).transpose((0, 3, 1, 2))) print('The image belongs to class %d' % prediction)
def test_densenet(nb_classes=3, img_dim=(150, 94, 5), depth=10, nb_dense_block=3, growth_rate=12, dropout_rate=0.00, nb_filter=16, lr=1e-3): densenet = DenseNet(nb_classes=nb_classes, img_dim=img_dim, depth=depth, nb_dense_block=nb_dense_block, growth_rate=growth_rate, dropout_rate=dropout_rate, nb_filter=nb_filter) optimizer = Adam(lr=lr) densenet.compile(loss='categorical_crossentropy', optimizer=optimizer) return densenet.to_json()
def get_model(): model = DenseNet( growth_rate=12, block_config=(8, 12, 10), num_init_features=48, bn_size=4, drop_rate=0.25, final_drop_rate=0.25, num_classes=200 ) # create different parameter groups weights = [ p for n, p in model.named_parameters() if 'conv' in n or 'classifier.weight' in n ] biases = [model.classifier.bias] bn_weights = [ p for n, p in model.named_parameters() if 'norm' in n and 'weight' in n ] bn_biases = [ p for n, p in model.named_parameters() if 'norm' in n and 'bias' in n ] # parameter initialization for p in weights: kaiming_uniform(p) for p in biases: constant(p, 0.0) for p in bn_weights: constant(p, 1.0) for p in bn_biases: constant(p, 0.0) params = [ {'params': weights, 'weight_decay': 1e-4}, {'params': biases}, {'params': bn_weights}, {'params': bn_biases} ] optimizer = optim.SGD(params, lr=1e-1, momentum=0.95, nesterov=True) loss = nn.CrossEntropyLoss().cuda() model = model.cuda() # move the model to gpu return model, loss, optimizer
def __init__(self): super(NewDensenet, self).__init__() self.newdense = DenseNet(growth_rate = 12, block_config = (6, 12, 12, 12),\ num_init_features=48, bn_size=4, drop_rate=0) num_init_features = 48 growth_rate = 12 block_config = [6, 12, 12, 12] feature_dim = num_init_features feature_dim += growth_rate * block_config[0] feature_dim //= 2 feature_dim += growth_rate * block_config[1] feature_dim //= 2 feature_dim += growth_rate * block_config[2] feature_dim //= 2 feature_dim += growth_rate * block_config[3] print('feature_dim ', feature_dim) self.softmax = nn.LogSoftmax(dim=1) self.conv_segment = nn.Conv2d(feature_dim, 4, 1, stride=1, padding=0)
def load_DN_model(): base_model = DenseNet(depth=10, weights=None, growth_rate=12, dropout_rate=0.4, nb_dense_block=3, classes=64) # Default GR = 12 output_layer = base_model.output predictions = Dense(nb_classes, activation='sigmoid', name='meta_preds')(output_layer) model = Model(inputs=base_model.input, outputs=predictions) adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False) model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy']) return model
def train_densenet_cifar(self, epochs=200, n_models=4, device="cuda:0", start_num=0): # Based off: https://github.com/kuangliu/pytorch-cifar start_num = int(start_num) PATH = Path('/media/rene/data') save_path = PATH / 'models' save_path.mkdir(parents=True, exist_ok=True) epochs = int(epochs) num_workers = 4 batch_size = 50 for i in range(start_num, n_models + start_num): dataloaders, dataset_sizes = make_batch_gen_cifar( str(PATH), batch_size, num_workers, valid_name='valid') model_name = 'densenet_' + str(i) model = DenseNet(growthRate=24, depth=121, reduction=0.5, bottleneck=True, nClasses=10) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=.1, momentum=0.9, weight_decay=5e-4) scheduler = lr_scheduler.StepLR(optimizer, step_size=int(epochs / 3), gamma=0.1) best_acc, model = train_model(model, criterion, optimizer, scheduler, epochs, dataloaders, dataset_sizes, device) torch.save(model.state_dict(), str(save_path / model_name))
def build_densenet(): model = DenseNet(input_shape=(None, utils.STRIP_SIZE * 2, 1), depth=19, nb_dense_block=3, growth_rate=12, nb_filter=-1, nb_layers_per_block=-1, bottleneck=False, reduction=0.0, dropout_rate=0.0, weight_decay=1e-4, subsample_initial_block=False, include_top=True, weights=None, input_tensor=None, classes=1, activation='sigmoid') model.compile( keras.optimizers.SGD(lr=0.1, momentum=0.9), loss='binary_crossentropy', # TODO metrics=['accuracy'], ) model.summary() return model
def main(args): assert((args.depth - args.block - 1) % args.block == 0) n_layer = (args.depth - args.block - 1) / args.block if args.dataset == 'cifar10': mean = numpy.asarray((125.3,123.0,113.9))#from fb.resnet.torch std = numpy.asarray((63.0, 62.1, 66.7))# Did the std data computed from 0 padding images? train, test = dataset.EXget_cifar10(scale=255,mean=mean,std=std) n_class = 10 elif args.dataset == 'cifar100': mean = numpy.asarray((129.3,124.1,112.4))#from fb.resnet.torch std = numpy.asarray((68.2, 65.4, 70.4)) train, test = dataset.EXget_cifar100(scale=255,mean=mean,std=std) n_class = 100 elif args.dataset == 'SVHN': raise NotImplementedError() train = PreprocessedDataset(train, random=True) test = PreprocessedDataset(test) train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize) test_iter = chainer.iterators.MultiprocessIterator( test, args.batchsize, repeat=False, shuffle=False) model = chainer.links.Classifier(DenseNet(n_layer, args.growth_rate, n_class, args.drop_ratio, 16, args.block)) if args.init_model: serializers.load_npz(args.init_model, model) import EXoptimizers optimizer = EXoptimizers.originalNesterovAG(lr=args.lr / len(args.gpus), momentum=0.9) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(args.weight_decay)) devices = {'main': args.gpus[0]} if len(args.gpus) > 1: for gid in args.gpus[1:]: devices['gpu%d' % gid] = gid updater = training.ParallelUpdater(train_iter, optimizer, devices=devices) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.dir) val_interval = (1, 'epoch') log_interval = (1, 'epoch') def lr_shift(): # DenseNet specific! if updater.epoch == 151 or updater.epoch == 226: optimizer.lr *= 0.1 return optimizer.lr trainer.extend(Evaluator( test_iter, model, device=args.gpus[0]), trigger=val_interval) trainer.extend(extensions.observe_value( 'lr', lambda _: lr_shift()), trigger=(1, 'epoch')) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot_object( model, 'epoch_{.updater.epoch}.model'), trigger=val_interval) trainer.extend(extensions.snapshot_object( optimizer, 'epoch_{.updater.epoch}.state'), trigger=val_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) start_time = time.time() trainer.extend(extensions.observe_value( 'time', lambda _: time.time() - start_time), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'time', 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'lr', ]), trigger=log_interval) trainer.extend(extensions.observe_value( 'graph', lambda _: create_fig(args.dir)), trigger=(2, 'epoch')) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.run()
std = np.std(X[:, :, :, i]) X_train[:, :, :, i] = (X_train[:, :, :, i] - mean) / std X_test[:, :, :, i] = (X_test[:, :, :, i] - mean) / std # set default parameters batch_size = 64 nb_epoch = 300 depth = 40 nb_dense_block = 3 nb_filter = 16 growth_rate = 12 dropout_rate = 0.2 learning_rate = 0.1 weight_decay = 1e-4 model = DenseNet(10, img_dim, depth, nb_dense_block, growth_rate, nb_filter, dropout_rate, weight_decay) opt = SGD(lr=learning_rate, momentum=0.9, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=["accuracy"]) offset = 0 save_path = os.path.join('./log', 'densenet') callbacks = [] callbacks.append(ModelCheckpoint(save_path + '.h5')) steps = [nb_epoch / 2 - offset, 3 * nb_epoch / 4 - offset] schedule = Step(steps, [learning_rate, 0.1 * learning_rate, 0.01 * learning_rate], verbose=1) callbacks.append(schedule)
def main(): # Training settings parser = argparse.ArgumentParser( description='Obtain confidence scores from trained networks') parser.add_argument('--test-batch-size', type=int, default=200, metavar='N', help='input batch size for testing (default: 200)') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument('--arch', type=str, default='wrn', metavar='ARC', help='neural network arch') parser.add_argument('--data', type=str, default='cifar10', metavar='DT', help='dataset the classifier was trained with') parser.add_argument('--test-data', type=str, default='fakedata', metavar='DT', help='dataset used to test the classifier') parser.add_argument('--lsun-path', type=str, help='path to LSUN dataset') parser.add_argument('--save_path', type=str, default='data', metavar='SP', help='path to save the produced confidence') parser.add_argument('--print-time', type=bool, default=False, metavar='PT', help='print elapsed time per batch') parser.add_argument( '--mode', type=str, default='msp', metavar='M', help= 'available modes: msp (maximum softmax probability), tmsp (t-softmax msp), tmsp0.5, tmsp5, tmsp10 (where number indicates nu value) odin. default: msp' ) parser.add_argument('--T', type=float, default=1000.0, metavar='T', help='ODIN temperature scaling') parser.add_argument('--eps', type=float, default=0.001, metavar='EPS', help='ODIN epsilon value') args = parser.parse_args() use_cuda = not args.no_cuda and torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") torch.manual_seed(args.seed) np.random.seed(args.seed) if device.type == 'cuda': torch.cuda.manual_seed(args.seed) kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} mean, std = get_normal(args.data) if args.test_data == "cifar10": test_transform = trn.Compose( [trn.ToTensor(), trn.Normalize(mean, std)]) test_loader = torch.utils.data.DataLoader( datasets.CIFAR10('../data', download=True, train=False, transform=test_transform), batch_size=args.test_batch_size, shuffle=True, **kwargs) elif args.test_data == "cifar10_bw": test_transform = trn.Compose([ trn.Grayscale(), trn.Resize((28, 28)), trn.ToTensor(), trn.Normalize(mean, std) ]) test_loader = torch.utils.data.DataLoader( datasets.CIFAR10('../data', download=True, train=False, transform=test_transform), batch_size=args.test_batch_size, shuffle=True, **kwargs) elif args.test_data == "svhn": test_transform = trn.Compose( [trn.ToTensor(), trn.Normalize(mean, std)]) test_loader = torch.utils.data.DataLoader( datasets.SVHN('../data', download=True, split='test', transform=test_transform), batch_size=args.test_batch_size, shuffle=True, **kwargs) elif args.test_data == "fakedata": test_transform = trn.Compose( [trn.ToTensor(), trn.Normalize(mean, std)]) test_loader = torch.utils.data.DataLoader( datasets.FakeData(size=10000, image_size=(3, 32, 32), transform=test_transform), batch_size=args.test_batch_size, shuffle=True, **kwargs) elif args.test_data == "fakedata_bw": test_transform = trn.Compose( [trn.ToTensor(), trn.Normalize(mean, std)]) test_loader = torch.utils.data.DataLoader( datasets.FakeData(size=10000, image_size=(1, 28, 28), transform=test_transform), batch_size=args.test_batch_size, shuffle=True, **kwargs) elif args.test_data == "fakedata_wm": # wrong mean normalization mean = (50, 50, 50) test_transform = trn.Compose( [trn.ToTensor(), trn.Normalize(mean, std)]) test_loader = torch.utils.data.DataLoader( datasets.FakeData(size=10000, image_size=(3, 32, 32), transform=test_transform), batch_size=args.test_batch_size, shuffle=True, **kwargs) elif args.test_data == "fmnist": test_transform = trn.Compose( [trn.ToTensor(), trn.Normalize(mean, std)]) test_loader = torch.utils.data.DataLoader( datasets.FashionMNIST('../data', download=True, train=False, transform=test_transform), batch_size=args.test_batch_size, shuffle=True, **kwargs) elif args.test_data == "kmnist": test_transform = trn.Compose( [trn.ToTensor(), trn.Normalize(mean, std)]) test_loader = torch.utils.data.DataLoader( datasets.KMNIST('../data', download=True, train=False, transform=test_transform), batch_size=args.test_batch_size, shuffle=True, **kwargs) elif args.test_data == "mnist": test_transform = trn.Compose([ trn.ToTensor(), # lambda x : x.repeat(3,1,1) * torch.rand(3,1,1), trn.Normalize(mean, std) ]) test_loader = torch.utils.data.DataLoader( datasets.MNIST('../data', download=True, train=False, transform=test_transform), batch_size=args.test_batch_size, shuffle=True, **kwargs) elif args.test_data == "lsun": test_transform = trn.Compose([ trn.Resize((32, 32)), #trn.CenterCrop(32), trn.ToTensor(), trn.Normalize(mean, std) ]) test_loader = torch.utils.data.DataLoader( datasets.LSUN(args.lsun_path, classes='test', transform=test_transform), batch_size=args.test_batch_size, shuffle=True, **kwargs) if args.data == "cifar10": Nc = 10 channels = 3 elif args.data == "svhn": Nc = 10 channels = 3 elif args.data == "fmnist": Nc = 10 channels = 1 elif args.data == "kmnist": Nc = 10 channels = 1 if (args.mode == 'msp') | (args.mode == 'odin') | (args.mode == 'cb'): nu = 0.0 elif args.mode == 'tmsp': nu = 1.0 elif args.mode == 'tmsp0.5': nu = 0.5 elif args.mode == 'tmsp5.0': nu = 5.0 elif args.mode == 'tmsp10.0': nu = 10.0 else: print('mode not recognized!') quit() model_path = 'models' model_path += '/' + args.data model_name = args.arch + 'nu{}'.format(nu) if args.arch == 'densenet': densenet_depth = 100 model = DenseNet(densenet_depth, Nc, nu=nu).to(device) elif args.arch == 'densenet_small': densenet_depth = 10 model = DenseNet(densenet_depth, Nc, nu=nu).to(device) elif args.arch == 'convnet': model = ConvNet(Nc, channels=channels, nu=nu).to(device) model.load_state_dict( torch.load(model_path + '/' + model_name + '.pt', map_location=device)) score, is_hit = test(args, model, device, test_loader) if not os.path.exists(args.save_path): os.makedirs(args.save_path) df = pd.DataFrame(data={'score': score, 'is_hit': is_hit}) df.to_csv('{}/{}_train{}_test{}_{}.csv'.format(args.save_path, args.arch, args.data, args.test_data, args.mode))
def main(chosen_model, model_checkpoint, alternate_scaling=False): data = "/data2/cdcm/preprocessed/test" if not alternate_scaling else "/data2/cdcm/rescaled/preprocessed/test" test_dataset = ImageAndPathDataset(root=data, loader=npy_loader, extensions=".npy") test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=128, # pin_memory=True ) # device = torch.device("cpu") device = (torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')) # model = _vgg("vgg16", "D", True) if "resnet50" in chosen_model.lower(): model = ResNet(Bottleneck, [3, 4, 6, 3], 4) elif chosen_model.lower() == "resnet34": model = ResNet(BasicBlock, [3, 4, 6, 3], 4) elif "resnet18" in chosen_model.lower(): model = ResNet(BasicBlock, [2, 2, 2, 2], 4) elif chosen_model.lower() == "basic": model = BasicNet() elif "vgg_dropout" in chosen_model.lower(): model = _vgg("vgg16", "D", True, dropout=True) elif "vgg" in chosen_model.lower(): model = _vgg("vgg16", "D", True) elif chosen_model.lower() == "mnasnet": model = MNASNet(1.0) elif chosen_model.lower() == "densenet": model = DenseNet() else: print(f"Invalid option for model: {chosen_model}") exit(0) model = nn.DataParallel(model) weight_path = f"/data2/cdcm/models/{chosen_model}-{model_checkpoint}.pt" if not alternate_scaling else f"/data2/cdcm/models/rescaled/{chosen_model}-{model_checkpoint}.pt" checkpoint = torch.load(weight_path, map_location=device) # from collections import OrderedDict # new_state_dict = OrderedDict() # for k, v in checkpoint["model_state_dict"].items(): # name = k[7:] # remove module. # new_state_dict[name] = v model = model.to(device=device) # model.load_state_dict(new_state_dict) model.load_state_dict(checkpoint["model_state_dict"]) model.eval() batch = next(iter(test_loader)) times = [] with torch.no_grad(): for sample in batch[0]: # print(sample) start = time.time() sample = torch.unsqueeze(sample, 0) end = time.time() times.append(end - start) prediction_time = mean(times) with open( f"./results/{chosen_model}-{model_checkpoint}-prediction-time.txt", "w") as f: f.write(str(prediction_time)) f.write("\n") # return evaluation.evaluate(model, test_loader, test_dataset.classes, device, "test", f"{chosen_model}-{model_checkpoint}") times = []
def main(): if args.data_type == "imagenet": args.num_classes = 1000 if args.depth == 121: units = [6, 12, 24, 16] elif args.depth == 169: units = [6, 12, 32, 32] elif args.depth == 201: units = [6, 12, 48, 32] elif args.depth == 161: units = [6, 12, 36, 24] else: raise ValueError( "no experiments done on detph {}, you can do it youself". format(args.depth)) symbol = DenseNet( units=units, num_stage=4, growth_rate=48 if args.depth == 161 else args.growth_rate, num_class=args.num_classes, data_type="imagenet", reduction=args.reduction, drop_out=args.drop_out, bottle_neck=True, bn_mom=args.bn_mom, workspace=args.workspace) elif args.data_type == "vggface": args.num_classes = 2613 if args.depth == 121: units = [6, 12, 24, 16] elif args.depth == 169: units = [6, 12, 32, 32] elif args.depth == 201: units = [6, 12, 48, 32] elif args.depth == 161: units = [6, 12, 36, 24] else: raise ValueError( "no experiments done on detph {}, you can do it youself". format(args.depth)) symbol = DenseNet( units=units, num_stage=4, growth_rate=48 if args.depth == 161 else args.growth_rate, num_class=args.num_classes, data_type="vggface", reduction=args.reduction, drop_out=args.drop_out, bottle_neck=True, bn_mom=args.bn_mom, workspace=args.workspace) elif args.data_type == "msface": args.num_classes = 79051 if args.depth == 121: units = [6, 12, 24, 16] elif args.depth == 169: units = [6, 12, 32, 32] elif args.depth == 201: units = [6, 12, 48, 32] elif args.depth == 161: units = [6, 12, 36, 24] else: raise ValueError( "no experiments done on detph {}, you can do it youself". format(args.depth)) symbol = DenseNet( units=units, num_stage=4, growth_rate=48 if args.depth == 161 else args.growth_rate, num_class=args.num_classes, data_type="msface", reduction=args.reduction, drop_out=args.drop_out, bottle_neck=True, bn_mom=args.bn_mom, workspace=args.workspace) elif args.data_type == 'cifar10': args.num_classes = 10 N = (args.depth - 4) // 6 units = [16, 16, 16] symbol = DenseNet(units, 3, 12, 10, 'cifar10') else: raise ValueError("do not support {} yet".format(args.data_type)) kv = mx.kvstore.create(args.kv_store) devs = mx.cpu() if args.gpus is None else [ mx.gpu(int(i)) for i in args.gpus.split(',') ] epoch_size = max(int(args.num_examples / args.batch_size / kv.num_workers), 1) begin_epoch = args.model_load_epoch if args.model_load_epoch else 0 if not os.path.exists("./model"): os.mkdir("./model") model_prefix = "model/densenet-{}-{}-{}".format(args.data_type, args.depth, kv.rank) checkpoint = mx.callback.do_checkpoint(model_prefix) arg_params = None aux_params = None if args.retrain: _, arg_params, aux_params = mx.model.load_checkpoint( model_prefix, args.model_load_epoch) # import pdb # pdb.set_trace() # print(symbol.debug_str()) # mx.viz.plot_network(symbol) train = mx.io.ImageRecordIter( path_imgrec=os.path.join(args.data_dir, "train.rec") if args.data_type == 'cifar10' else os.path.join(args.data_dir, "train_256_q90.rec") if args.aug_level == 1 else os.path.join(args.data_dir, "train_480_q90.rec"), label_width=1, data_name='data', label_name='softmax_label', data_shape=(3, 32, 32) if args.data_type == "cifar10" else (3, 224, 224), batch_size=args.batch_size, pad=4 if args.data_type == "cifar10" else 0, fill_value=127, # only used when pad is valid rand_crop=True, max_random_scale= 1.0, # 480 with imagnet and vggface, 384 with msface, 32 with cifar10 min_random_scale=1.0 if args.data_type == "cifar10" else 1.0 if args.aug_level == 1 else 0.667, # 256.0/480.0=0.533, 256.0/384.0=0.667 max_aspect_ratio=0 if args.data_type == "cifar10" else 0 if args.aug_level == 1 else 0.25, random_h=0 if args.data_type == "cifar10" else 0 if args.aug_level == 1 else 36, # 0.4*90 random_s=0 if args.data_type == "cifar10" else 0 if args.aug_level == 1 else 50, # 0.4*127 random_l=0 if args.data_type == "cifar10" else 0 if args.aug_level == 1 else 50, # 0.4*127 max_rotate_angle=0 if args.aug_level <= 2 else 10, max_shear_ratio=0 if args.aug_level <= 2 else 0.1, rand_mirror=True, shuffle=True, num_parts=kv.num_workers, part_index=kv.rank) val = mx.io.ImageRecordIter( path_imgrec=os.path.join(args.data_dir, "val.rec") if args.data_type == 'cifar10' else os.path.join(args.data_dir, "val_256_q90.rec"), label_width=1, data_name='data', label_name='softmax_label', batch_size=args.batch_size, data_shape=(3, 32, 32) if args.data_type == "cifar10" else (3, 224, 224), rand_crop=False, rand_mirror=False, num_parts=kv.num_workers, part_index=kv.rank) dshape = (64, 3, 32, 32) net_planned = memonger.search_plan(symbol, 1, data=dshape) c = get_cost(symbol, data=dshape) print('cost %d MB' % c) # dshape = (64, 3, 32, 32) # old_cost = memonger.get_cost(symbol, data=dshape) new_cost = memonger.get_cost(net_planned, data=dshape) # print('Old feature map cost=%d MB' % old_cost) print('New feature map cost=%d MB' % new_cost) model = mx.model.FeedForward( # net_planned, ctx=devs, symbol=net_planned, arg_params=arg_params, aux_params=aux_params, num_epoch=300 if args.data_type == "cifar10" else 125, begin_epoch=begin_epoch, learning_rate=args.lr, momentum=args.mom, wd=args.wd, optimizer='sgd', initializer=mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2), lr_scheduler=multi_factor_scheduler( begin_epoch, epoch_size, step=[220, 260, 280], factor=0.1) if args.data_type == 'cifar10' else multi_factor_scheduler( begin_epoch, epoch_size, step=[30, 60, 90, 95, 115, 120], factor=0.1), ) # import pdb # pdb.set_trace() model.fit(X=train, eval_data=val, eval_metric=['acc'] if args.data_type == 'cifar10' else ['acc', mx.metric.create('top_k_accuracy', top_k=5)], kvstore=kv, batch_end_callback=mx.callback.Speedometer( args.batch_size, args.frequent), epoch_end_callback=checkpoint)
def main(args): assert((args.depth - args.block - 1) % args.block == 0) n_layer = int((args.depth - args.block - 1) / args.block) if args.dataset == 'cifar10': train, test = cifar.get_cifar10() n_class = 10 elif args.dataset == 'cifar100': train, test = cifar.get_cifar100() n_class = 100 elif args.dataset == 'SVHN': raise NotImplementedError() images = convert.concat_examples(train)[0] mean = images.mean(axis=(0, 2, 3)) std = images.std(axis=(0, 2, 3)) train = PreprocessedDataset(train, mean, std, random=args.augment) test = PreprocessedDataset(test, mean, std) train_iter = chainer.iterators.SerialIterator( train, args.batchsize / args.split_size) test_iter = chainer.iterators.SerialIterator( test, args.batchsize / args.split_size, repeat=False, shuffle=False) model = chainer.links.Classifier(DenseNet( n_layer, args.growth_rate, n_class, args.drop_ratio, 16, args.block)) if args.init_model: serializers.load_npz(args.init_model, model) chainer.cuda.get_device(args.gpu).use() model.to_gpu() optimizer = chainer.optimizers.NesterovAG(lr=args.lr, momentum=0.9) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(args.weight_decay)) updater = StandardUpdater( train_iter, optimizer, (args.split_size, 'mean'), device=args.gpu) trainer = training.Trainer(updater, (300, 'epoch'), out=args.dir) val_interval = (1, 'epoch') log_interval = (1, 'epoch') def lr_shift(): # DenseNet specific! if updater.epoch == 150 or updater.epoch == 225: optimizer.lr *= 0.1 return optimizer.lr trainer.extend(Evaluator( test_iter, model, device=args.gpu), trigger=val_interval) trainer.extend(extensions.observe_value( 'lr', lambda _: lr_shift()), trigger=(1, 'epoch')) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot_object( model, 'epoch_{.updater.epoch}.model'), trigger=val_interval) trainer.extend(extensions.snapshot_object( optimizer, 'epoch_{.updater.epoch}.state'), trigger=val_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) start_time = time.time() trainer.extend(extensions.observe_value( 'time', lambda _: time.time() - start_time), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'time', 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'lr', ]), trigger=log_interval) trainer.extend(extensions.observe_value( 'graph', lambda _: create_fig(args.dir)), trigger=(1, 'epoch'), priority=50) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.run()