def get_model(): print('=> Building model..') if args.model == 'mobilenet': from models.mobilenet import MobileNet net = MobileNet(n_class=1000) elif args.model == 'mobilenet_0.5flops': from models.mobilenet import MobileNet net = MobileNet(n_class=1000, profile='0.5flops') else: raise NotImplementedError return net.cuda() if use_cuda else net
def get_model(n_class): print('=> Building model {}...'.format(args.model)) if args.model == 'mobilenet_0.5flops': net = MobileNet(n_class, profile='0.5flops') checkpoint_path = './checkpoints/mobilenet_imagenet_0.5flops_70.5.pth.tar' else: raise NotImplementedError print('=> Loading checkpoints..') checkpoint = torch.load(checkpoint_path) net.load_state_dict(checkpoint['state_dict']) # remove .module return net
def get_net(network: str, num_classes) -> torch.nn.Module: return VGG('VGG16', num_classes=num_classes) if network == 'VGG16' else \ ResNet34(num_classes=num_classes) if network == 'ResNet34' else \ PreActResNet18(num_classes=num_classes) if network == 'PreActResNet18' else \ GoogLeNet(num_classes=num_classes) if network == 'GoogLeNet' else \ densenet_cifar(num_classes=num_classes) if network == 'densenet_cifar' else \ ResNeXt29_2x64d(num_classes=num_classes) if network == 'ResNeXt29_2x64d' else \ MobileNet(num_classes=num_classes) if network == 'MobileNet' else \ MobileNetV2(num_classes=num_classes) if network == 'MobileNetV2' else \ DPN92(num_classes=num_classes) if network == 'DPN92' else \ ShuffleNetG2(num_classes=num_classes) if network == 'ShuffleNetG2' else \ SENet18(num_classes=num_classes) if network == 'SENet18' else \ ShuffleNetV2(1, num_classes=num_classes) if network == 'ShuffleNetV2' else \ EfficientNetB0( num_classes=num_classes) if network == 'EfficientNetB0' else None
def get_model_and_checkpoint(model, dataset, checkpoint_path, n_gpu=1): if model == 'mobilenet' and dataset == 'imagenet': from models.mobilenet import MobileNet net = MobileNet(n_class=1000) elif model == 'mobilenetv2' and dataset == 'imagenet': from models.mobilenet_v2 import MobileNetV2 net = MobileNetV2(n_class=1000) else: raise NotImplementedError sd = torch.load(checkpoint_path) if 'state_dict' in sd: # a checkpoint but not a state_dict sd = sd['state_dict'] sd = {k.replace('module.', ''): v for k, v in sd.items()} net.load_state_dict(sd) net = net.cuda() if n_gpu > 1: net = torch.nn.DataParallel(net, range(n_gpu)) return net, deepcopy(net.state_dict())
def get_model_and_checkpoint(model, dataset, checkpoint_path, n_gpu=1): if model == 'mobilenet' and dataset == 'imagenet': from models.mobilenet import MobileNet net = MobileNet(n_class=1000) elif model == 'mobilenetv2' and dataset == 'imagenet': from models.mobilenet_v2 import MobileNetV2 net = MobileNetV2(n_class=1000) else: raise NotImplementedError if torch.cuda.is_available(): sd = torch.load(checkpoint_path) else: sd = torch.load(checkpoint_path, map_location=torch.device('cpu')) if 'state_dict' in sd: # a checkpoint but not a state_dict sd = sd['state_dict'] sd = {k.replace('module.', ''): v for k, v in sd.items()} net.load_state_dict(sd) if torch.cuda.is_available(): net = net.cuda() if torch.cuda.is_available() and n_gpu > 1: net = torch.nn.DataParallel(net, range(n_gpu)) return net, deepcopy( net.state_dict()) # deepcopy的时候会将对象的每一层复制一个单独的个体出来,与之前的个体完全没有关系了
'./data', train=False, transform=transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.491399689874, 0.482158419622, 0.446530924224), (0.247032237587, 0.243485133253, 0.261587846975)) ])), batch_size=args.batch_size, shuffle=True, **kwargs) if args.model == 'SqueezeNet': net = SqueezeNet() elif args.model == 'MobileNet': net = MobileNet(num_classes=10) elif args.model == 'ShuffleNet': net = shufflenet(groups=2) elif args.model == 'MobileNetv2': net = MobileNetV2() elif args.model == 'SENet': net = SENet18() if args.pretrained is not None: print('Loading pretrained weights...') net.load_state_dict(torch.load(args.pretrained)) if args.cuda: net.cuda() # print(net)
import os import torch from models.mobilenet import MobileNet from config import configer from torch.cuda import is_available preinit="./init.pkl" net=MobileNet() if configer.cuda and is_available(): net.cuda() if not os.path.exists(preinit): torch.save(net.state_dict(),preinit) else: net.load_state_dict(torch.load(preinit))
def build_model(net='MobileNet', input_shape=(224, 224, 3), siamese_weights=None, share=True): if net == 'MobileNet': base_model = MobileNet(include_top=False, input_shape=input_shape) elif net == 'MobileNetV2': base_model = MobileNetV2(include_top=False, input_shape=input_shape) elif net == 'NASNetMobile': base_model = NASNetMobile(include_top=False, input_shape=input_shape) elif net == 'ResNet18': base_model = ResNet18(include_top=False, input_shape=input_shape) elif net == 'ResNet18V2': base_model = ResNet18V2(include_top=False, input_shape=input_shape) elif net == 'ResNet34': base_model = ResNet34(include_top=False, input_shape=input_shape) elif net == 'ResNet34V2': base_model = ResNet34V2(include_top=False, input_shape=input_shape) elif net == 'DenseNet21': base_model = DenseNet(include_top=False, blocks=[2, 2, 2, 2], input_shape=input_shape, name='a') if share == False: base_model_b = DenseNet(include_top=False, blocks=[2, 2, 2, 2], input_shape=input_shape, name='b') elif net == 'DenseNet69': base_model = DenseNet(include_top=False, blocks=[6, 8, 10, 8], input_shape=input_shape) if share == False: base_model_b = DenseNet(include_top=False, blocks=[6, 8, 10, 8], input_shape=input_shape, name='b') elif net == 'DenseNet109': base_model = DenseNet(include_top=False, blocks=[6, 12, 18, 16], input_shape=input_shape) if share == False: base_model_b = DenseNet(include_top=False, blocks=[6, 12, 18, 16], input_shape=input_shape, name='b') elif net == 'DenseShuffleV1_57_373': base_model = DenseShuffleV1(include_top=False, blocks=[6, 8, 12], input_shape=input_shape, num_shuffle_units=[3, 7, 3], scale_factor=1.0, bottleneck_ratio=1, dropout_rate=0.5) elif net == 'DenseShuffleV2_57_373': base_model = DenseShuffleV2(include_top=False, blocks=[6, 8, 12], input_shape=input_shape, num_shuffle_units=[3, 7, 3], scale_factor=1.0, bottleneck_ratio=1, dropout_rate=0.5) elif net == 'DenseShuffleV2_49_353': base_model = DenseShuffleV2(include_top=False, blocks=[6, 8, 8], input_shape=input_shape, num_shuffle_units=[3, 5, 3], scale_factor=1.0, bottleneck_ratio=1, dropout_rate=0.5) elif net == 'DenseShuffleV2_17_232': base_model = DenseShuffleV2(include_top=False, blocks=[2, 2, 2], input_shape=input_shape, num_shuffle_units=[2, 3, 2], scale_factor=1.0, bottleneck_ratio=1, dropout_rate=0.5) elif net == 'ShuffleNetV2': base_model = ShuffleNetV2(include_top=False, scale_factor=1.0, pooling='avg', input_shape=input_shape, num_shuffle_units=[3, 7, 3], bottleneck_ratio=1) elif net == 'ShuffleNet': base_model = ShuffleNet(include_top=False, scale_factor=1.0, pooling='avg', input_shape=input_shape, num_shuffle_units=[3, 7, 3], bottleneck_ratio=1) elif net == 'MobileNetV3Small': base_model = MobileNetV3Small(include_top=False, input_shape=input_shape) elif net == 'SqueezeNet': base_model = SqueezeNet(include_top=False, input_shape=input_shape) else: print('the network name you have entered is not supported yet') sys.exit() input_a = keras.layers.Input(shape=input_shape, name='input_a') input_b = keras.layers.Input(shape=input_shape, name='input_b') processed_a = base_model(input_a) if share: processed_b = base_model(input_b) else: processed_b = base_model_b(input_b) #processed_a = keras.layers.Activation('sigmoid', name='sigmoid_a')(processed_a) #processed_b = keras.layers.Activation('sigmoid', name='sigmoid_b')(processed_b) normalize = keras.layers.Lambda(lambda x: K.l2_normalize(x, axis=-1), name='normalize') processed_a = normalize(processed_a) processed_b = normalize(processed_b) distance = keras.layers.Lambda(euclidean_distance, output_shape=eucl_dist_output_shape, name='dist')([processed_a, processed_b]) model = keras.models.Model([input_a, input_b], distance) if siamese_weights is not None: print('load siamses weights ....') model.load_weights(siamese_weights) print('hahahaha') return model
def main(): parser = argparse.ArgumentParser( description="Parameters for Training CIFAR-10") parser.add_argument('--batch-size', type=int, default=64, metavar='N', help='input batch size for training (default: 64)') parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N', help='input batch size for testing (default: 1000)') parser.add_argument('--epochs', type=int, default=10, metavar='N', help='number of epochs to train (default: 10)') parser.add_argument('--lr', type=float, default=0.01, metavar='LR', help='learning rate (default: 0.01)') parser.add_argument('--momentum', type=float, default=0.5, metavar='M', help='SGD momentum (default: 0.5)') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument('--num-workers', type=int, default=1, metavar='N', help='number of workers for cuda') parser.add_argument('--model-no', type=int, default=1, metavar='N', help='number of workers for cuda') parser.add_argument( '--log-interval', type=int, default=10, metavar='N', help='how many batches to wait before logging training status') args = parser.parse_args() use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") cuda_args = { 'num_workers': args.num_workers, 'pin_memory': True } if use_cuda else {} data_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) cifar_training_data = datasets.CIFAR10("../data/CIFAR10", train=True, transform=data_transform, download=True) cifar_testing_data = datasets.CIFAR10("../data/CIFAR10", train=False, transform=data_transform) train_loader = torch.utils.data.DataLoader(cifar_training_data, batch_size=args.batch_size, shuffle=True, **cuda_args) test_loader = torch.utils.data.DataLoader(cifar_testing_data, batch_size=args.test_batch_size, shuffle=True, **cuda_args) model_no = args.model_no if model_no == 1: model = Net().to(device) elif model_no == 2: model = ResNet18().to(device) elif model_no == 3: model = MobileNet().to(device) elif model_no == 4: model = MobileNetV2().to(device) elif model_no == 5: model = VGG('VGG16').to(device) else: model = Net().to(device) optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) for epoch in range(1, args.epochs + 1): train(epoch, model, train_loader, optimizer, device, args.log_interval) test(model, test_loader, device)
def main(argv): np.random.seed(43) # to make the results reproductible tf.random.set_seed(42) # to make the results reproductible # Create working directories experiment_dir = os.path.join(FLAGS.output_dir, FLAGS.experiment_name) from pathlib import Path Path(experiment_dir).mkdir(parents=True, exist_ok=True) # Logging training informations logging.get_absl_handler().use_absl_log_file('logs', experiment_dir) # ======================= Read target problem data ========================= train_dataset, valid_dataset, test_dataset = target_dataset.load( FLAGS.data_dir, FLAGS.batch_size) # ========================= Do transfer learning =========================== model = MobileNet(fine_tune=FLAGS.fine_tune) model.build(input_shape=(FLAGS.batch_size, 224, 224, 3)) model.summary() # Create training operations loss_func = tf.losses.CategoricalCrossentropy() optimizer = tf.optimizers.Adam(FLAGS.learning_rate) train_accuracy = tf.metrics.Accuracy(name='train_accuracy') train_loss = tf.metrics.Mean() @tf.function def train_step(images, labels): with tf.GradientTape() as tape: logits = model(images) loss = loss_func(tf.one_hot(labels, 6), logits) gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) train_loss.update_state(loss) predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) train_accuracy.update_state(predictions, labels) # Create a metric to compute the accuracy on the validation set valid_loss = tf.metrics.Mean() valid_accuracy = tf.metrics.Accuracy(name='valid_accuracy') @tf.function def valid_step(images, labels): logits = model(images) loss = loss_func(tf.one_hot(labels, 6), logits) valid_loss.update_state(loss) predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) valid_accuracy.update_state(predictions, labels) test_loss = tf.metrics.Mean() test_accuracy = tf.metrics.Accuracy(name='test_accuracy') @tf.function def test_step(images, labels): logits = model(images) loss = loss_func(tf.one_hot(labels, 6), logits) test_loss.update_state(loss) predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) test_accuracy.update_state(predictions, labels) # =========================== Train the model ============================== # training for step, example in train_dataset.enumerate(FLAGS.initial_step): if step == FLAGS.final_step: break images, labels = example['image'], example['label'] train_step(images, labels) if step % FLAGS.info_freq == 0: template = 'step {} - loss: {:4.2f} - accuracy: {:5.2%}' logging.info( template.format(step, train_loss.result(), train_accuracy.result())) train_loss.reset_states() train_accuracy.reset_states() if step % FLAGS.valid_freq == 0: for example in valid_dataset: images, labels = example['image'], example['label'] valid_step(images, labels) template = '------------------------------------------- Validation: loss = {:5.2f}, accuracy {:5.2%}' logging.info( template.format(valid_loss.result(), valid_accuracy.result())) valid_loss.reset_states() valid_accuracy.reset_states() # validation for example in test_dataset: images, labels = example['image'], example['label'] test_step(images, labels) template = 'Test: loss = {:5.2f}, accuracy {:5.2%}' logging.info( template.format(test_loss.result(), test_accuracy.result()))
def train(log_dir): """Train a network and return the best validation accuracy observed.""" with tf.Graph().as_default(): # Data loading train_dataset = cifar100.get_data(FLAGS.data_dir, is_training=True, batch_size=FLAGS.batch_size) val_dataset = cifar100.get_data(FLAGS.data_dir, is_training=False, batch_size=FLAGS.batch_size) handle = tf.placeholder(tf.string, shape=[]) iterator = tf.data.Iterator.from_string_handle( handle, train_dataset.output_types, train_dataset.output_shapes) images, labels = iterator.get_next() train_iterator = train_dataset.make_one_shot_iterator() val_iterator = val_dataset.make_initializable_iterator() # Build model is_training = tf.placeholder(tf.bool, name='is_training') model = MobileNet(images, 100, is_training, labels) saver = tf.train.Saver() top_accuracy = 0 with tf.Session() as sess: sess.run(tf.global_variables_initializer()) train_handle, val_handle = sess.run( [train_iterator.string_handle(), val_iterator.string_handle()]) train_writer = tf.summary.FileWriter( os.path.join(log_dir, 'train'), sess.graph) val_writer = tf.summary.FileWriter(os.path.join(log_dir, 'val')) # Training sess.run(model.accuracy_reset) for step in range(FLAGS.max_steps): if step % FLAGS.log_steps == 0: _, _, summary = sess.run([ model.train_op, model.accuracy_update, model.train_summary_op ], feed_dict={ is_training: True, handle: train_handle }) # Log train summaries train_writer.add_summary(summary, global_step=step) metrics_summary = sess.run(model.metrics_summary_op) train_writer.add_summary(metrics_summary, global_step=step) # Evaluate model sess.run([val_iterator.initializer, model.accuracy_reset]) while True: try: sess.run(model.accuracy_update, feed_dict={ is_training: False, handle: val_handle }) except tf.errors.OutOfRangeError: # Finished pass over validation set break # Update top accuracy current_accuracy = sess.run(model.accuracy) top_accuracy = max(top_accuracy, current_accuracy) # Log validation summaries metrics_summary = sess.run(model.metrics_summary_op) val_writer.add_summary(metrics_summary, global_step=step) saver.save(sess, os.path.join(FLAGS.out_dir, 'model'), global_step=step) sess.run(model.accuracy_reset) else: sess.run([model.train_op, model.accuracy_update], feed_dict={ is_training: True, handle: train_handle }) if step % FLAGS.log_steps != 0: # Final evaluation sess.run([val_iterator.initializer, model.accuracy_reset]) while True: try: sess.run(model.accuracy_update, feed_dict={ is_training: False, handle: val_handle }) except tf.errors.OutOfRangeError: # Finished pass over validation set break # Update top accuracy current_accuracy = sess.run(model.accuracy) top_accuracy = max(top_accuracy, current_accuracy) # Log validation summaries metrics_summary = sess.run(model.metrics_summary_op) val_writer.add_summary(metrics_summary, global_step=step) saver.save(sess, os.path.join(FLAGS.out_dir, 'model'), global_step=step) return top_accuracy