def loss(images, labels, boxes, num_objects): with slim.arg_scope([slim.model_variable, slim.variable], device='/cpu:0'): train_model = model.MobileNetV2(is_training=True, input_size=FLAGS.image_size) feat_list = train_model._build_model(images) ratio_list = [(1.0, 2.0, 1.0 / 2.0) ] + [(1.0, 2.0, 1.0 / 2.0, 3.0, 1.0 / 3.0, 1.0)] * 5 box_output_list = [] cls_output_list = [] for k, (ratio, feat) in enumerate(zip(ratio_list, feat_list)): box_output = train_model.BoxPredictor(feat, len(ratio), k) box_output_list.append(box_output) cls_output = train_model.ClassPredictor(feat, len(ratio), k) cls_output_list.append(cls_output) anchor_concat = aml.make_anchor(cls_output_list, 0.2, 0.95, ratio_list) cls_loss, loc_loss = aml.anchor_matching_cls_loc_loss( anchor_concat, cls_output_list, box_output_list, labels, boxes, num_objects, positive_threshold=FLAGS.positive_threshold, negative_threshold=FLAGS.negative_threshold, num_classes=FLAGS.num_classes, max_boxes=FLAGS.max_boxes) return cls_loss, loc_loss
def load_model(self): self.checkpoint = torch.load(self.model_checkpoint_file_path, map_location=lambda storage, loc: storage) self.model_args = self.checkpoint['args'] self.num_classes = None if self.model_args.model_type == 'food179': self.num_classes = 179 elif self.model_args.model_type == 'nsfw': self.num_classes = 5 else: raise ('Not Implemented!') if self.model_args.model_arc == 'resnet18': self.model = model.resnet18(num_classes=self.num_classes, zero_init_residual=True) elif self.model_args.model_arc == 'resnet34': self.model = model.resnet34(num_classes=self.num_classes, zero_init_residual=True) elif self.model_args.model_arc == 'resnet50': self.model = model.resnet50(num_classes=self.num_classes, zero_init_residual=True) elif self.model_args.model_arc == 'resnet101': self.model = model.resnet101(num_classes=self.num_classes, zero_init_residual=True) elif self.model_args.model_arc == 'resnet152': self.model = model.resnet152(num_classes=self.num_classes, zero_init_residual=True) elif self.model_args.model_arc == 'mobilenet': self.model = model.MobileNetV2(n_class=self.num_classes, input_size=256) else: raise ('Not Implemented!') self.model = nn.DataParallel(self.model) self.model.load_state_dict(self.checkpoint['model_state_dict']) self.model_epoch = self.checkpoint['epoch'] self.model_test_acc = self.checkpoint['test_acc'] self.model_best_acc = self.checkpoint['best_acc'] self.model_test_acc_top5 = self.checkpoint['test_acc_top5'] self.model_class_to_idx = self.checkpoint['class_to_idx'] self.model_idx_to_class = { v: k for k, v in self.model_class_to_idx.items() } self.model_train_history_dict = self.checkpoint['train_history_dict'] self.mean = self.checkpoint['NORM_MEAN'] self.std = self.checkpoint['NORM_STD'] self.model.eval() return
def init_tf(logs_train_dir = './model_use/model.ckpt-15000'): global sess, pred, x # process image x = tf.placeholder(tf.float32, shape=[IMG_W, IMG_W, 3]) x_norm = tf.image.per_image_standardization(x) x_4d = tf.reshape(x_norm, [-1, IMG_W, IMG_W, 3]) # predict logit = model.MobileNetV2(x_4d, num_classes=N_CLASSES, is_training=False).output print("logit", np.shape(logit)) #logit = model.model4(x_4d, N_CLASSES, is_trian=False) #logit = model.model2(x_4d, batch_size=1, n_classes=N_CLASSES) pred = tf.nn.softmax(logit) saver = tf.train.Saver() sess = tf.Session(config=config) saver.restore(sess, logs_train_dir) print('load model done...')
def inference(images): with slim.arg_scope([slim.model_variable, slim.variable], device='/cpu:0'): inference_model = model.MobileNetV2(is_training=False, input_size=FLAGS.image_size) feat_list = inference_model._build_model(images) ratio_list = [(1.0, 2.0, 1.0 / 2.0) ] + [(1.0, 2.0, 1.0 / 2.0, 3.0, 1.0 / 3.0, 1.0)] * 5 box_output_list = [] cls_output_list = [] for k, (ratio, feat) in enumerate(zip(ratio_list, feat_list)): box_output = inference_model.BoxPredictor(feat, len(ratio), k) box_output_list.append(box_output) cls_output = inference_model.ClassPredictor(feat, len(ratio), k) cls_output_list.append(cls_output) anchor_concat = aml.make_anchor(cls_output_list, 0.2, 0.95, ratio_list) cls_pred, loc_pred = aml.decode_logits(anchor_concat, cls_output_list, box_output_list) return cls_pred, loc_pred
def main(): global NORM_MEAN, NORM_STD, coconut_model, train_history_dict for arg in vars(args): print(str(arg) + ': ' + str(getattr(args, arg))) print('=' * 100) # Build Model base on dataset and arc num_classes = None if args.model_type == 'food179': num_classes = 179 NORM_MEAN = FOOD179_MEAN NORM_STD = FOOD179_STD elif args.model_type == 'nsfw': num_classes = 5 NORM_MEAN = NSFW_MEAN NORM_STD = NSFW_STD else: raise ('Not Implemented!') if args.model_arc == 'resnet18': coconut_model = model.resnet18(num_classes=num_classes, zero_init_residual=True) elif args.model_arc == 'resnet34': coconut_model = model.resnet34(num_classes=num_classes, zero_init_residual=True) elif args.model_arc == 'resnet50': coconut_model = model.resnet50(num_classes=num_classes, zero_init_residual=True) elif args.model_arc == 'resnet101': coconut_model = model.resnet101(num_classes=num_classes, zero_init_residual=True) elif args.model_arc == 'resnet152': coconut_model = model.resnet152(num_classes=num_classes, zero_init_residual=True) elif args.model_arc == 'mobilenet': coconut_model = model.MobileNetV2(n_class=num_classes, input_size=256) else: raise ('Not Implemented!') coconut_model = nn.DataParallel(coconut_model) if args.cuda: coconut_model = coconut_model.cuda() torch.backends.benchmark = True print("CUDA Enabled") gpu_count = torch.cuda.device_count() print('Total of %d GPU available' % (gpu_count)) args.train_batch_size = args.train_batch_size * gpu_count args.test_batch_size = args.test_batch_size * gpu_count print('args.train_batch_size: %d' % (args.train_batch_size)) print('args.test_batch_size: %d' % (args.test_batch_size)) model_parameters = filter(lambda p: p.requires_grad, coconut_model.parameters()) params = sum([np.prod(p.size()) for p in model_parameters]) print('Total of %d parameters' % (params)) # Build Training start_epoch = 0 best_acc = 0 optimizer = None scheduler = None milestones = [50, 150, 250] if args.train_optimizer == 'sgd': optimizer = optim.SGD(coconut_model.parameters(), lr=args.lr, momentum=0.9, nesterov=True, weight_decay=args.l2_reg) scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=0.1) elif args.train_optimizer == 'adam': optimizer = optim.Adam(coconut_model.parameters(), lr=args.lr) scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=0.1) elif args.train_optimizer == 'adabound': optimizer = adabound.AdaBound(coconut_model.parameters(), lr=1e-3, final_lr=0.1) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=150, gamma=0.1, last_epoch=-1) global_steps = 0 if not args.start_from_begining: filename = args.model_checkpoint_path if args.load_gpu_model_on_cpu: checkpoint = torch.load(filename, map_location=lambda storage, loc: storage) else: checkpoint = torch.load(filename) coconut_model.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['model_optimizer']) best_acc = checkpoint['best_acc'] train_history_dict = checkpoint['train_history_dict'] scheduler.optimizer = optimizer # Not sure if this actually works start_epoch = checkpoint['epoch'] global_steps = checkpoint['global_steps'] print(filename + ' loaded!') data_loaders = load_datasets() train_ops(start_epoch=start_epoch, model=coconut_model, optimizer=optimizer, scheduler=scheduler, data_loaders=data_loaders, best_acc=best_acc, global_steps=global_steps)
def main(): global_step = tf.Variable(0, name='global_step', trainable=False) # label without one-hot batch_train, batch_labels = get_batch(train, train_label, IMG_W, IMG_H, BATCH_SIZE, CAPACITY) # network logits = model.MobileNetV2(batch_train, num_classes=N_CLASSES, is_training=True).output #logits = model.model2(batch_train, BATCH_SIZE, N_CLASSES) #logits = model.model4(batch_train, N_CLASSES, is_trian=True) print(logits.get_shape()) # loss cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=batch_labels) loss = tf.reduce_mean(cross_entropy, name='loss') tf.summary.scalar('train_loss', loss) # optimizer lr = tf.train.exponential_decay(learning_rate=init_lr, global_step=global_step, decay_steps=decay_steps, decay_rate=0.1) tf.summary.scalar('learning_rate', lr) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): optimizer = tf.train.AdamOptimizer(learning_rate=lr).minimize(loss, global_step=global_step) # accuracy correct = tf.nn.in_top_k(logits, batch_labels, 1) correct = tf.cast(correct, tf.float16) accuracy = tf.reduce_mean(correct) tf.summary.scalar('train_acc', accuracy) summary_op = tf.summary.merge_all() sess = tf.Session(config=config) train_writer = tf.summary.FileWriter(logs_train_dir, sess.graph) #saver = tf.train.Saver() var_list = tf.trainable_variables() g_list = tf.global_variables() bn_moving_vars = [g for g in g_list if 'moving_mean' in g.name] bn_moving_vars += [g for g in g_list if 'moving_variance' in g.name] var_list += bn_moving_vars saver = tf.train.Saver(var_list=var_list, max_to_keep=10) sess.run(tf.global_variables_initializer()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) #saver.restore(sess, logs_train_dir+'/model.ckpt-174000') try: for step in range(MAX_STEP): if coord.should_stop(): break _, learning_rate, tra_loss, tra_acc = sess.run([optimizer, lr, loss, accuracy]) if step % 100 == 0: print('Epoch %3d/%d, Step %6d/%d, lr %f, train loss = %.2f, train accuracy = %.2f%%' %(step/one_epoch_step, MAX_STEP/one_epoch_step, step, MAX_STEP, learning_rate, tra_loss, tra_acc*100.0)) summary_str = sess.run(summary_op) train_writer.add_summary(summary_str, step) variable_names = [v.name for v in tf.trainable_variables()[-6:-1]] values = sess.run(variable_names) for k,v in zip(variable_names, values): mean=np.mean(v) std=np.std(v) #mean, variance = tf.nn.moments(v, axes=[0,1]) print("Variable: ", k) print("Shape: ", v.shape) print('mean:',mean) print('var:',std) if step % 1000 == 0 or (step + 1) == MAX_STEP: checkpoint_path = os.path.join(logs_train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) except tf.errors.OutOfRangeError: print('Done training -- epoch limit reached') finally: coord.request_stop() coord.join(threads) sess.close()