def infer(): place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) class_nums = cfg.class_num model = model_builder.RRPN(add_conv_body_func=resnet.ResNet(), add_roi_box_head_func=resnet.ResNetC5(), use_pyreader=False, mode='infer') startup_prog = fluid.Program() infer_prog = fluid.Program() with fluid.program_guard(infer_prog, startup_prog): with fluid.unique_name.guard(): model.build_model() pred_boxes = model.eval_bbox_out() infer_prog = infer_prog.clone(True) exe.run(startup_prog) fluid.load(infer_prog, cfg.pretrained_model, exe) infer_reader = reader.infer(cfg.image_path) data_loader = model.data_loader data_loader.set_sample_list_generator(infer_reader, places=place) fetch_list = [pred_boxes] imgs = os.listdir(cfg.image_path) imgs.sort() for i, data in enumerate(data_loader()): result = exe.run(infer_prog, fetch_list=[v.name for v in fetch_list], feed=data, return_numpy=False) nmsed_out = result[0] im_info = np.array(data[0]['im_info'])[0] im_scale = im_info[2] outs = np.array(nmsed_out) draw_bounding_box_on_image(cfg.image_path, imgs[i], outs, im_scale, cfg.draw_threshold)
def construct_model_new(self, block_nums, solution): block_nums_new = [] for i in range(len(block_nums)): begin = sum(block_nums[:i]) end = sum(block_nums[:i + 1]) solution_stage = solution[begin:end] block_nums_new.append(int(sum(solution_stage))) # import pdb; pdb.set_trace() model_new = resnet.ResNet(resnet.Bottleneck, block_nums_new, num_classes=1000) return model_new, block_nums_new
def main(): # build model images = tf.placeholder(dtype=tf.float32, shape=[None, 224, 224, 3]) net = resnet.ResNet(images, is_training=False) net.build_model() logits = net.logit # restore model saver = tf.train.Saver(tf.global_variables()) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.visible_device_list = FLAGS.visiable_gpus config.log_device_placement = False sess = tf.Session(config=config) # load trained model saver.restore(sess, FLAGS.pretrain_ckpt) # inference types = 'center' with open(FLAGS.images) as f: img_paths = [l.strip().split('\t') for l in f] cache = {} if os.path.exists(FLAGS.result): with open(FLAGS.result, 'rb') as f: cache = pkl.load(f) unfinished = [(p, i) for p, i in img_paths if i not in cache] progress = tqdm(unfinished) n_errors = 0 for img_path, img_id in progress: raw_img = cv.imread(img_path) if raw_img is None or raw_img.data is None: n_errors += 1 imgs = preprocess(raw_img, types) output = sess.run(logits, {images: imgs}) output = np.squeeze(output[0]) if types == '10crop': output = np.mean(output, axis=0) idx = output.argsort()[::-1] output = idx[output[idx] > FLAGS.prob_thres].tolist() cache[img_id] = output progress.set_description("n_errors: {}".format(n_errors)) if len(cache) % FLAGS.flush_every == 0: with open(FLAGS.result, 'wb') as f: pkl.dump(cache, f, protocol=4) with open(FLAGS.result, 'wb') as f: pkl.dump(cache, f, protocol=4)
def __init__(self, config_file, model_name): self.configuration = conf.ConfigurationFile(config_file, model_name) #loading data mean_file = os.path.join(self.configuration.get_data_dir(), "mean.dat") shape_file = os.path.join(self.configuration.get_data_dir(), "shape.dat") # self.input_shape = np.fromfile(shape_file, dtype=np.int32) self.mean_image = np.fromfile(mean_file, dtype=np.float32) self.mean_image = np.reshape(self.mean_image, self.input_shape) #loading classifier model model = resnet.ResNet([3, 4, 6, 3], [64, 128, 256, 512], self.configuration.get_number_of_classes(), se_factor=0) input_image = tf.keras.Input( (self.input_shape[0], self.input_shape[1], self.input_shape[2]), name='input_image') model(input_image) model.summary() model.load_weights(self.configuration.get_checkpoint_file(), by_name=True, skip_mismatch=True) #create the sim-model with a customized layer #you can change output_layer_name output_layer_name = 'global_average_pooling2d' output = model.get_layer(output_layer_name).output self.sim_model = tf.keras.Model(model.input, output) self.sim_model.summary() print('sim_model was loaded OK') #defining process arch self.process_fun = imgproc.process_image #loading catalog self.ssearch_dir = os.path.join(self.configuration.get_data_dir(), 'ssearch') catalog_file = os.path.join(self.ssearch_dir, 'catalog.txt') assert os.path.exists(catalog_file), '{} does not exist'.format( catalog_file) print('loading catalog ...') self.load_catalog(catalog_file) print('loading catalog ok ...') self.enable_search = False
def eval(): place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) class_nums = cfg.class_num model = model_builder.RRPN(add_conv_body_func=resnet.ResNet(), add_roi_box_head_func=resnet.ResNetC5(), use_pyreader=False, mode='val') startup_prog = fluid.Program() infer_prog = fluid.Program() with fluid.program_guard(infer_prog, startup_prog): with fluid.unique_name.guard(): model.build_model() pred_boxes = model.eval_bbox_out() infer_prog = infer_prog.clone(True) exe.run(startup_prog) fluid.load(infer_prog, cfg.pretrained_model, exe) test_reader = reader.test(1) data_loader = model.data_loader data_loader.set_sample_list_generator(test_reader, places=place) fetch_list = [pred_boxes] res_list = [] keys = [ 'bbox', 'gt_box', 'gt_class', 'is_crowed', 'im_info', 'im_id', 'is_difficult' ] for i, data in enumerate(data_loader()): result = exe.run(infer_prog, fetch_list=[v.name for v in fetch_list], feed=data, return_numpy=False) pred_boxes_v = result[0] nmsed_out = pred_boxes_v outs = np.array(nmsed_out) res = get_key_dict(outs, data[0], keys) res_list.append(res) if i % 50 == 0: logger.info('test_iter {}'.format(i)) icdar_eval(res_list)
def tower_model(images, labels): model = resnet.ResNet( images, is_training=(FLAGS.mode == tf.estimator.ModeKeys.TRAIN)) model.build_model() # waring: Calculate loss, which includes softmax cross entropy and L2 regularization. cross_entropy = tf.losses.softmax_cross_entropy(logits=model.logit, onehot_labels=labels) tf.identity(cross_entropy, name='cross_entropy') tf.summary.scalar('cross_entropy', cross_entropy) # Add weight decay to the loss. We Add the batch norm variables into L2 normal because # in large scale data training this will improve the generalization power of model. loss = cross_entropy + FLAGS.weight_decay * tf.add_n([ tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'bn' not in v.name ]) + 0.1 * FLAGS.weight_decay * tf.add_n( [tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'bn' in v.name]) return model, loss
def demo(data, save, depth=30, valid_size=5000, n_epochs=25, batch_size=64, seed=None): """ A demo to show off training of efficient DenseNets. Trains and evaluates a DenseNet-BC on CIFAR-10. Args: data (str) - path to directory where data should be loaded from/downloaded (default $DATA_DIR) save (str) - path to save the model to (default /tmp) depth (int) - depth of the network (number of convolution layers) (default 40) growth_rate (int) - number of features added per DenseNet layer (default 12) efficient (bool) - use the memory efficient implementation? (default True) valid_size (int) - size of validation set n_epochs (int) - number of epochs for training (default 300) batch_size (int) - size of minibatch (default 256) seed (int) - manually set the random seed (default None) """ # Get densenet configuration # Data transforms mean = [0.5071, 0.4867, 0.4408] stdv = [0.2675, 0.2565, 0.2761] train_transforms = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=mean, std=stdv), ]) test_transforms = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=mean, std=stdv), ]) # Datasets train_set = datasets.CIFAR10(data, train=True, transform=train_transforms, download=True) test_set = datasets.CIFAR10(data, train=False, transform=test_transforms, download=False) # Models model = resnet.ResNet(depth=32, num_classes=10) print(model) # Make save directory if not os.path.exists(save): os.makedirs(save) if not os.path.isdir(save): raise Exception('%s is not a dir' % save) # Train the model train(model=model, train_set=train_set, test_set=test_set, save=save, valid_size=valid_size, n_epochs=n_epochs, batch_size=batch_size, seed=seed) print('Done!')
from skimage.io import imread, imsave from skimage.util import img_as_float import utils from sys import argv border = 30 overlap = 30 if len(argv) != 3: print('Usage:', argv[0], 'input_file output_file') exit(0) net = resnet.ResNet(32, 128) net = net.cuda() net.eval() def save_model(filename): torch.save(net.state_dict(), 'checkpoints/' + filename) def load_model(filename): net.load_state_dict(torch.load(filename)) def add_border(img, n_pixels): """ Add a mirrored border to the image
def main(): global args, best_err1, best_err5 args = parser.parse_args() if args.dataset.startswith('cifar'): normalize = transforms.Normalize( mean=[x / 255.0 for x in [125.3, 123.0, 113.9]], std=[x / 255.0 for x in [63.0, 62.1, 66.7]]) transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ]) transform_test = transforms.Compose([transforms.ToTensor(), normalize]) if args.dataset == 'cifar100': val_loader = torch.utils.data.DataLoader( datasets.CIFAR100('../data', train=False, transform=transform_test), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) numberofclass = 100 elif args.dataset == 'cifar10': val_loader = torch.utils.data.DataLoader( datasets.CIFAR10('../data', train=False, transform=transform_test), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) numberofclass = 10 else: raise Exception('unknown dataset: {}'.format(args.dataset)) elif args.dataset == 'imagenet': valdir = os.path.join( '/data_large/readonly/ImageNet-Fast/imagenet/val') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) val_transform = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize ]) val_loader = torch.utils.data.DataLoader(datasets.ImageFolder( valdir, val_transform), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) numberofclass = 1000 else: raise Exception('unknown dataset: {}'.format(args.dataset)) print("=> creating model '{}'".format(args.net_type)) if args.net_type == 'resnet': model = RN.ResNet(args.dataset, args.depth, numberofclass, args.bottleneck) # for ResNet elif args.net_type == 'pyramidnet': model = PYRM.PyramidNet(args.dataset, args.depth, args.alpha, numberofclass, args.bottleneck) else: raise Exception('unknown network architecture: {}'.format( args.net_type)) model = torch.nn.DataParallel(model).cuda() if os.path.isfile(args.pretrained): print("=> loading checkpoint '{}'".format(args.pretrained)) checkpoint = torch.load(args.pretrained) model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}'(best err1: {}%)".format( args.pretrained, checkpoint['best_err1'])) else: raise Exception("=> no checkpoint found at '{}'".format( args.pretrained)) print('the number of model parameters: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() cudnn.benchmark = True # evaluate on validation set err1, err5, val_loss = validate(val_loader, model, criterion) print('Accuracy (top-1 and 5 error):', err1, err5)
else: nb_classes = 10 epochs = 40 learning_rate = 0.01 droprate = 0.2 print('load svhn data..') # reference : https://github.com/robertomest/convnet-study train_set, valid_set, test_set = get_svhn_data.load('data/svhn') train_set['data'] = get_svhn_data.preprocess(train_set['data']) test_set['data'] = get_svhn_data.preprocess(test_set['data']) validation_data = (test_set['data'], test_set['labels']) input_shape = (32, 32, 3) print('create residual densenet..') model = resnet.ResNet(nb_classes=10, img_dim=(32, 32, 3), k=widen_factor, nb_blocks=nb_blocks) opt = SGD(lr=0.1, momentum=0.9, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) model.summary() # training if dataset_select == 'cifar10' or dataset_select == 'cifar100': print('start training with {}'.format(dataset_select)) print('Using real-time data augmentation..') datagen = ImageDataGenerator(horizontal_flip=True, width_shift_range=5. / 32, height_shift_range=5. / 32)
# save_freq: frequency in terms of number steps each time checkpoint is saved model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint( filepath=os.path.join(configuration.get_snapshot_dir(), "{epoch:03d}.h5"), save_weights_only=True, mode="max", monitor="val_acc", save_freq="epoch", ) # save_freq = configuration.get_snapshot_steps()) # resnet 34 if configuration.get_model_name() == "SKETCH": model = alexnet.AlexNetModel(configuration.get_number_of_classes()) process_fun = imgproc.process_sketch elif configuration.get_model_name() == "FASHION-RESNET50": model = resnet.ResNet(configuration.get_number_of_classes()) process_fun = imgproc.process_sketch elif configuration.get_model_name() == "FASHION-ALEXNET": model = alexnet.AlexNetModel(configuration.get_number_of_classes()) process_fun = imgproc.process_sketch # elif configuration.get_model_name() == 'FASHION-RESNEXT': # model = alexnet.(configuration.get_number_of_classes()) process_fun = imgproc.process_sketch else: model = simple.SimpleModel(configuration.get_number_of_classes()) process_fun = imgproc.process_mnist # resnet_50 # model = resnet.ResNet([3,4,6,3],[64,128,256,512], configuration.get_number_of_classes(), use_bottleneck = True) # build the model indicating the input shape # define the model input
def _build_model(self): if self.option.arch == 'resnet': from models import resnet if self.option.depth == 18: n_layers = [2, 2, 2, 2] block = resnet.BasicBlock elif self.option.depth == 34: n_layers = [3, 4, 6, 3] block = resnet.BasicBlock elif self.option.depth == 50: n_layers = [3, 4, 6, 3] block = resnet.Bottleneck elif self.option.depth == 101: n_layers = [3, 4, 23, 3] block = resnet.Bottleneck elif self.option.depth == 152: n_layers = [3, 8, 36, 3] block = resnet.Bottleneck else: msg = "Unknown depth for resnet: %d. Should be one of (18, 34, 50, 101, 152)" % self.option.depth self.logger.info(msg) raise ValueError self.net = resnet.ResNet(block, n_layers, num_classes=self.option.n_class) elif self.option.arch == 'preresnet': from models import preact_resnet if self.option.depth == 18: n_layers = [2, 2, 2, 2] block = preact_resnet.PreActBlock elif self.option.depth == 34: n_layers = [3, 4, 6, 3] block = preact_resnet.PreActBlock elif self.option.depth == 50: n_layers = [3, 4, 6, 3] block = preact_resnet.PreActBottleneck elif self.option.depth == 101: n_layers = [3, 4, 23, 3] block = preact_resnet.PreActBottleneck elif self.option.depth == 152: n_layers = [3, 8, 36, 3] block = preact_resnet.PreActBottleneck else: msg = "Unknown depth for pre-resnet: %d. Should be one of (18, 34, 50, 101, 152)" % self.option.depth self.logger.info(msg) raise ValueError self.net = preact_resnet.PreActResNet( block, n_layers, num_classes=self.option.n_class) #elif self.option.arch == 'preresnet': # from models import preresnet # if (self.option.depth-2)%6 == 0: # self.net = preresnet.PreResNet(depth=self.option.depth, num_classes=self.option.n_class) # else: # msg = "Depth should be 6n+2" # self.logger.info(msg) # raise ValueError elif self.option.arch == 'vgg': from models import vgg vgg_name = 'VGG%d' % self.option.depth if vgg_name in vgg.cfg: self.net = vgg.VGG(vgg_name) else: msg = "Unknown depth for vgg: %d. Should be one of (11, 13, 16, 19)" % self.option.depth self.logger.info(msg) raise ValueError else: msg = "Unknown architecture: %s. Should be one of ('resnet', 'preresnet', 'vgg')" % self.option.arch self.logger.info(msg) raise ValueError self.loss = nn.CrossEntropyLoss(ignore_index=255) if self.option.cuda and len(self.option.gpu_ids) > 1: self.net = nn.DataParallel(self.net, device_ids=self.option.gpu_ids) if self.option.cuda: self.net.cuda() self.loss.cuda()
imgs[i, ...] = img[offset[i][0]:offset[i][0] + 224, offset[i][1]:offset[i][1] + 224] img = cv.flip(img, 1) for i in range(0, 5): imgs[i + 5, ...] = img[offset[i][0]:offset[i][0] + 224, offset[i][1]:offset[i][1] + 224] else: raise ValueError("Type not support") imgs = ((imgs / 255.0) - 0.5) * 2.0 imgs = imgs[..., ::-1] return imgs # build model images = tf.placeholder(dtype=tf.float32, shape=[None, 224, 224, 3]) net = resnet.ResNet(images, is_training=False) net.build_model() logits = net.logit feat = net.feat # restore model saver = tf.train.Saver(tf.global_variables()) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.visible_device_list = str(FLAGS.visiable_gpu) config.log_device_placement = False sess = tf.Session(config=config) # load trained model saver.restore(sess, FLAGS.pretrain_ckpt)
log_dir=configuration.get_snapshot_dir(), histogram_freq=1) #Defining callback for saving checkpoints #save_freq: frequency in terms of number steps each time checkpoint is saved model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint( filepath=configuration.get_snapshot_dir() + '{epoch:03d}.h5', save_weights_only=True, mode='max', monitor='val_acc', save_freq='epoch', ) #save_freq = configuration.get_snapshot_steps()) #resnet 34, no bottleneck is required #model = resnet.ResNet([3,4,6,3],[64,128,256,512], configuration.get_number_of_classes(), se_factor = 0) #resnet_50 model = resnet.ResNet([3, 4, 6, 3], [64, 128, 256, 512], configuration.get_number_of_classes(), use_bottleneck=True) print('Model is Resnet') sys.stdout.flush() #build the model indicating the input shape #define the model input input_image = tf.keras.Input( (input_shape[0], input_shape[1], input_shape[2]), name='input_image') model(input_image) model.summary() #use_checkpoints to load weights if configuration.use_checkpoint(): model.load_weights(configuration.get_checkpoint_file(), by_name=True, skip_mismatch=True) #model.load_weights(configuration.get_checkpoint_file(), by_name = False)
def train(): learning_rate = cfg.learning_rate #image_shape = [-1, 3, cfg.TRAIN.max_size, cfg.TRAIN.max_size] devices_num = get_device_num() total_batch_size = devices_num * cfg.TRAIN.im_per_batch use_random = True startup_prog = fluid.Program() train_prog = fluid.Program() with fluid.program_guard(train_prog, startup_prog): with fluid.unique_name.guard(): model = model_builder.RRPN(add_conv_body_func=resnet.ResNet(), add_roi_box_head_func=resnet.ResNetC5(), use_pyreader=cfg.use_pyreader, use_random=use_random) model.build_model() losses, keys, rpn_rois = model.loss() loss = losses[0] fetch_list = losses boundaries = cfg.lr_steps gamma = cfg.lr_gamma step_num = len(cfg.lr_steps) values = [learning_rate * (gamma**i) for i in range(step_num + 1)] start_lr = learning_rate * cfg.start_factor lr = fluid.layers.piecewise_decay(boundaries, values) lr = fluid.layers.linear_lr_warmup(lr, cfg.warm_up_iter, start_lr, learning_rate) optimizer = fluid.optimizer.Momentum( learning_rate=lr, regularization=fluid.regularizer.L2Decay(cfg.weight_decay), momentum=cfg.momentum) optimizer.minimize(loss) fetch_list = fetch_list + [lr] for var in fetch_list: var.persistable = True gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) place = fluid.CUDAPlace(gpu_id) if cfg.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) build_strategy = fluid.BuildStrategy() build_strategy.fuse_all_optimizer_ops = False build_strategy.fuse_elewise_add_act_ops = True exec_strategy = fluid.ExecutionStrategy() exec_strategy.num_iteration_per_drop_scope = 1 exe.run(startup_prog) if cfg.pretrained_model: checkpoint.load_and_fusebn(exe, train_prog, cfg.pretrained_model) compiled_train_prog = fluid.CompiledProgram(train_prog).with_data_parallel( loss_name=loss.name, build_strategy=build_strategy, exec_strategy=exec_strategy) shuffle = True shuffle_seed = None if num_trainers > 1: shuffle_seed = 1 if cfg.use_pyreader: train_reader = reader.train(batch_size=cfg.TRAIN.im_per_batch, total_batch_size=total_batch_size, padding_total=cfg.TRAIN.padding_minibatch, shuffle=shuffle, shuffle_seed=shuffle_seed) if num_trainers > 1: assert shuffle_seed is not None, \ "If num_trainers > 1, the shuffle_seed must be set, because " \ "the order of batch data generated by reader " \ "must be the same in the respective processes." # NOTE: the order of batch data generated by batch_reader # must be the same in the respective processes. if num_trainers > 1: train_reader = fluid.contrib.reader.distributed_batch_reader( train_reader) data_loader = model.data_loader data_loader.set_sample_list_generator(train_reader, places=place) else: if num_trainers > 1: shuffle = False train_reader = reader.train(batch_size=total_batch_size, shuffle=shuffle) feeder = fluid.DataFeeder(place=place, feed_list=model.feeds()) def train_loop(): data_loader.start() train_stats = TrainingStats(cfg.log_window, keys) try: start_time = time.time() prev_start_time = start_time for iter_id in range(cfg.max_iter): prev_start_time = start_time start_time = time.time() outs = exe.run(compiled_train_prog, fetch_list=[v.name for v in fetch_list]) stats = { k: np.array(v).mean() for k, v in zip(keys, outs[:-1]) } train_stats.update(stats) logs = train_stats.log() if iter_id % 10 == 0: strs = '{}, iter: {}, lr: {:.5f}, {}, time: {:.3f}'.format( now_time(), iter_id, np.mean(outs[-1]), logs, start_time - prev_start_time) print(strs) sys.stdout.flush() if (iter_id) % cfg.TRAIN.snapshot_iter == 0 and iter_id != 0: save_name = "{}".format(iter_id) checkpoint.save( exe, train_prog, os.path.join(cfg.model_save_dir, save_name)) if (iter_id) == cfg.max_iter: checkpoint.save( exe, train_prog, os.path.join(cfg.model_save_dir, "model_final")) break end_time = time.time() total_time = end_time - start_time last_loss = np.array(outs[0]).mean() except (StopIteration, fluid.core.EOFException): data_loader.reset() train_loop()
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint( filepath=configuration.get_snapshot_dir() + '{epoch:03d}.h5', save_weights_only=True, mode='max', monitor='val_acc', save_freq='epoch', ) #save_freq = configuration.get_snapshot_steps()) #resnet 34 if configuration.get_model_name() == 'UVROIS': model = uv_rois.UVRoisModel(configuration.get_number_of_classes()) print('Model is UV') sys.stdout.flush() else: model = resnet.ResNet([3, 4, 6, 3], [64, 128, 256, 512], configuration.get_number_of_classes(), se_factor=0) print('Model is Resnet') sys.stdout.flush() #resnet_50 #model = resnet.ResNet([3,4,6,3],[64,128,256,512], configuration.get_number_of_classes(), use_bottleneck = True) #build the model indicating the input shape #define the model input input_image = tf.keras.Input( (input_shape[0], input_shape[1], input_shape[2]), name='input_image') model(input_image) model.summary() #use_checkpoints to load weights if configuration.use_checkpoint(): model.load_weights(configuration.get_checkpoint_file(),
def __init__(self): super(Solver, self).__init__() global numberofclass #define the network if args.net_type == 'resnet': self.model = RN.ResNet(dataset=args.dataset, depth=args.depth, num_classes=numberofclass, bottleneck=args.bottleneck) elif args.net_type == 'pyramidnet': self.model = PYRM.PyramidNet(args.dataset, args.depth, args.alpha, numberofclass, args.bottleneck) elif args.net_type == 'wideresnet': self.model = WR.WideResNet(depth=args.depth, num_classes=numberofclass, widen_factor=args.width) elif args.net_type == 'vggnet': self.model = VGG.vgg16(num_classes=numberofclass) elif args.net_type == 'mobilenet': self.model = MN.mobile_half(num_classes=numberofclass) elif args.net_type == 'shufflenet': self.model = SN.ShuffleV2(num_classes=numberofclass) elif args.net_type == 'densenet': self.model = DN.densenet_cifar(num_classes=numberofclass) elif args.net_type == 'resnext-2': self.model = ResNeXt29_2x64d(num_classes=numberofclass) elif args.net_type == 'resnext-4': self.model = ResNeXt29_4x64d(num_classes=numberofclass) elif args.net_type == 'resnext-32': self.model = ResNeXt29_32x4d(num_classes=numberofclass) elif args.net_type == 'imagenetresnet18': self.model = multi_resnet18_kd(num_classes=numberofclass) elif args.net_type == 'imagenetresnet34': self.model = multi_resnet34_kd(num_classes=numberofclass) elif args.net_type == 'imagenetresnet50': self.model = multi_resnet50_kd(num_classes=numberofclass) elif args.net_type == 'imagenetresnet101': self.model = multi_resnet101_kd(num_classes=numberofclass) elif args.net_type == 'imagenetresnet152': self.model = multi_resnet152_kd(num_classes=numberofclass) else: raise Exception('unknown network architecture: {}'.format( args.net_type)) self.optimizer = torch.optim.SGD(self.model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=True) self.loss_lams = torch.zeros(numberofclass, numberofclass, dtype=torch.float32).cuda() self.loss_lams.requires_grad = False #define the loss function if args.method == 'ce': self.criterion = nn.CrossEntropyLoss() elif args.method == 'sce': if args.dataset == 'cifar10': self.criterion = SCELoss(alpha=0.1, beta=1.0, num_classes=numberofclass) else: self.criterion = SCELoss(alpha=6.0, beta=0.1, num_classes=numberofclass) elif args.method == 'ls': self.criterion = label_smooth(num_classes=numberofclass) elif args.method == 'gce': self.criterion = generalized_cross_entropy( num_classes=numberofclass) elif args.method == 'jo': self.criterion = joint_optimization(num_classes=numberofclass) elif args.method == 'bootsoft': self.criterion = boot_soft(num_classes=numberofclass) elif args.method == 'boothard': self.criterion = boot_hard(num_classes=numberofclass) elif args.method == 'forward': self.criterion = Forward(num_classes=numberofclass) elif args.method == 'backward': self.criterion = Backward(num_classes=numberofclass) elif args.method == 'disturb': self.criterion = DisturbLabel(num_classes=numberofclass) elif args.method == 'ols': self.criterion = nn.CrossEntropyLoss() self.criterion = self.criterion.cuda()
def infer(): image_shape = [3, 512, 512] model = model_builder.EAST(add_conv_body_func=resnet.ResNet(), mode='infer') f_geo, f_score = model.build_model(image_shape) place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) # yapf: disable if not os.path.exists(cfg.pretrained_model): raise ValueError("Model path [%s] does not exist." % (cfg.pretrained_model)) def if_exist(var): return os.path.exists(os.path.join(cfg.pretrained_model, var.name)) fluid.io.load_vars(exe, cfg.pretrained_model, predicate=if_exist) # yapf: enable infer_reader = reader.infer(cfg.image_path) feeder = fluid.DataFeeder(place=place, feed_list=model.feeds()) print(model.feeds()) fetch_list = [f_geo, f_score] image_path = "ICDAR2015/ch4_test_images/" image_names = os.listdir(image_path) image_names.sort() print(image_names) for iter_id, data in enumerate(infer_reader()): #print(image_names[i]) #data = next(infer_reader()) im_info = data[0][1] print(im_info.shape) ratio_w = im_info[0][3] ratio_h = im_info[0][2] result = exe.run(fetch_list=[v.name for v in fetch_list], feed=feeder.feed(data), return_numpy=True) pred_boxes_v = result[0] score = result[0] geometry = result[1] boxes = detect(score_map=score, geo_map=geometry) #draw_bounding_box_on_image(cfg.image_path, nmsed_out, cfg.draw_threshold, # labels_map, image) f = open("result/res_" + image_names[iter_id].split('.')[0] + ".txt", 'w') # for k in boxes: # print(k) # f.write(str(int(k[0]/ratio_h)) + "," + str(int(k[1]/ratio_w)) + "," + str(int(k[2]/ratio_h)) + "," + str(int(k[3]/ratio_w)) + "," + str(int(k[4]/ratio_h)) \ # + "," + str(int(k[5]/ ratio_w)) + "," + str(int(k[6]/ratio_h)) + "," + str(int(k[7]/ratio_w)) + "\n") try: if boxes.shape[0] != 0: boxes = boxes[:, :8].reshape(-1, 4, 2) boxes_w = boxes[:, :, 0:1] / ratio_w boxes_h = boxes[:, :, 1:] / ratio_h boxes = np.concatenate((boxes_w, boxes_h), axis=-1) print(boxes.shape) for box in boxes: if np.linalg.norm(box[0] - box[1]) < 5 or np.linalg.norm( box[3] - box[0]) < 5: continue f.write(str(int(box[0][0])) + "," + str(int(box[0][1])) + "," + str(int(box[1][0])) + "," + str(int(box[1][1])) + "," + \ str(int(box[2][0])) + "," + str(int(box[2][1])) + "," + str(int(box[3][0])) + "," + str(int(box[3][1])) + "\n") print(boxes.shape) image = cv2.imread("ICDAR2015/ch4_test_images/" + image_names[iter_id]) #image = cv2.resize(image, (int(im_info[0][1]), int(im_info[0][0]))) for i in range(boxes.shape[0]): cv2.polylines(image, [boxes[i].astype('int32')], True, color=(255, 255, 0), thickness=1) #image = cv2.resize(image, (int(im_info[0][1]/im_info[0][3]), int(im_info[0][0]/im_info[0][2]))) print("saving result image", image_names[iter_id]) cv2.imwrite("./" + image_names[iter_id], image) except: continue
def get_model(model): from models import resnet if model == 'R32_C10': rnet_hr = resnet.ResNet(resnet.BasicBlock, [3, 4, 6, 3], 3, 10) rnet_lr = resnet.ResNet(resnet.BasicBlock, [3, 4, 6, 3], 3, 10) agent = resnet.ResNet(resnet.BasicBlock, [1, 1, 1, 1], 3, 16) elif model == 'R32_C100': rnet_hr = resnet.ResNet(resnet.BasicBlock, [3, 4, 6, 3], 3, 100) rnet_lr = resnet.ResNet(resnet.BasicBlock, [3, 4, 6, 3], 3, 100) agent = resnet.ResNet(resnet.BasicBlock, [1, 1, 1, 1], 3, 16) elif model == 'R50_ImgNet': rnet_hr = resnet.ResNet(resnet.BasicBlock, [3, 4, 6, 3], 7, 1000) rnet_lr = resnet.ResNet(resnet.BasicBlock, [3, 4, 6, 3], 7, 1000) agent = resnet.ResNet(resnet.BasicBlock, [2, 2, 2, 2], 3, 16) elif model == 'R34_fMoW': rnet_hr = resnet.ResNet(resnet.BasicBlock, [3, 4, 6, 3], 7, 62) rnet_lr = resnet.ResNet(resnet.BasicBlock, [3, 4, 6, 3], 7, 62) agent = resnet.ResNet(resnet.BasicBlock, [2, 2, 2, 2], 3, 16) return rnet_hr, rnet_lr, agent
def main(): global args, best_err1, best_err5 args = parser.parse_args() if args.seed >= 0: np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) cudnn.benchmark = True # Save path args.expname += args.method if args.transport: args.expname += '_tp' args.expname += '_prob_' + str(args.mixup_prob) if args.clean_lam > 0: args.expname += '_clean_' + str(args.clean_lam) if args.seed >= 0: args.expname += '_seed' + str(args.seed) print("Model is saved at {}".format(args.expname)) # Dataset and loader if args.dataset.startswith('cifar'): mean = [x / 255.0 for x in [125.3, 123.0, 113.9]] std = [x / 255.0 for x in [63.0, 62.1, 66.7]] normalize = transforms.Normalize(mean=mean, std=std) transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=args.padding), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ]) transform_test = transforms.Compose([transforms.ToTensor(), normalize]) if args.dataset == 'cifar100': train_loader = torch.utils.data.DataLoader(datasets.CIFAR100('~/Datasets/cifar100/', train=True, download=True, transform=transform_train), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(datasets.CIFAR100('~/Datasets/cifar100/', train=False, transform=transform_test), batch_size=args.batch_size // 4, shuffle=True, num_workers=args.workers, pin_memory=True) numberofclass = 100 elif args.dataset == 'cifar10': train_loader = torch.utils.data.DataLoader(datasets.CIFAR10('../data', train=True, download=True, transform=transform_train), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(datasets.CIFAR10('../data', train=False, transform=transform_test), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) numberofclass = 10 else: raise Exception('unknown dataset: {}'.format(args.dataset)) elif args.dataset == 'imagenet': traindir = os.path.join('/data/readonly/ImageNet-Fast/imagenet/train') valdir = os.path.join('/data/readonly/ImageNet-Fast/imagenet/val') mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] normalize = transforms.Normalize(mean=mean, std=std) jittering = utils.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4) lighting = utils.Lighting(alphastd=0.1, eigval=[0.2175, 0.0188, 0.0045], eigvec=[[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]]) train_dataset = datasets.ImageFolder( traindir, transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), jittering, lighting, normalize, ])) train_sampler = None train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler) val_transform = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ]) val_loader = torch.utils.data.DataLoader(datasets.ImageFolder(valdir, val_transform), batch_size=args.batch_size // 4, shuffle=False, num_workers=args.workers, pin_memory=True) numberofclass = 1000 args.neigh_size = min(args.neigh_size, 2) else: raise Exception('unknown dataset: {}'.format(args.dataset)) # Model print("=> creating model '{}'".format(args.net_type)) if args.net_type == 'resnet': model = RN.ResNet(args.dataset, args.depth, numberofclass, args.bottleneck) # for ResNet elif args.net_type == 'pyramidnet': model = PYRM.PyramidNet(args.dataset, args.depth, args.alpha, numberofclass, args.bottleneck) else: raise Exception('unknown network architecture: {}'.format(args.net_type)) pretrained = "runs/{}/{}".format(args.expname, 'checkpoint.pth.tar') if os.path.isfile(pretrained): print("=> loading checkpoint '{}'".format(pretrained)) checkpoint = torch.load(pretrained) checkpoint['state_dict'] = dict( (key[7:], value) for (key, value) in checkpoint['state_dict'].items()) model.load_state_dict(checkpoint['state_dict']) cur_epoch = checkpoint['epoch'] + 1 best_err1 = checkpoint['best_err1'] print("=> loaded checkpoint '{}'(epoch: {}, best err1: {}%)".format( pretrained, cur_epoch, checkpoint['best_err1'])) else: cur_epoch = 0 print("=> no checkpoint found at '{}'".format(pretrained)) model = torch.nn.DataParallel(model).cuda() print('the number of model parameters: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() criterion_batch = nn.CrossEntropyLoss(reduction='none').cuda() optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=True) if os.path.isfile(pretrained): optimizer.load_state_dict(checkpoint['optimizer']) print("optimizer is loaded!") mean_torch = torch.tensor(mean, dtype=torch.float32).reshape(1, 3, 1, 1).cuda() std_torch = torch.tensor(std, dtype=torch.float32).reshape(1, 3, 1, 1).cuda() if args.mp > 0: mp = Pool(args.mp) else: mp = None # Start training and validation for epoch in range(cur_epoch, args.epochs): adjust_learning_rate(optimizer, epoch) # train for one epoch train_loss = train(train_loader, model, criterion, criterion_batch, optimizer, epoch, mean_torch, std_torch, mp) # evaluate on validation set err1, err5, val_loss = validate(val_loader, model, criterion, epoch) # remember best prec@1 and save checkpoint is_best = err1 <= best_err1 best_err1 = min(err1, best_err1) if is_best: best_err5 = err5 print('Current best accuracy (top-1 and 5 error):', best_err1, best_err5) save_checkpoint( { 'epoch': epoch, 'arch': args.net_type, 'state_dict': model.state_dict(), 'best_err1': best_err1, 'best_err5': best_err5, 'optimizer': optimizer.state_dict(), }, is_best) print('Best accuracy (top-1 and 5 error):', best_err1, best_err5)
def main(): parser = argparse.ArgumentParser(description='Speech Emotion Recognition') parser.add_argument('--hidden_size', type=int, default=512, help='hidden size of model (default: 256)') parser.add_argument('--layer_size', type=int, default=3, help='number of layers of model (default: 3)') parser.add_argument('--n_class', type=int, default=7, help='number of classes of data (default: 7)') parser.add_argument('--dropout', type=float, default=0.2, help='dropout rate in training (default: 0.2') parser.add_argument('--bidirectional', default=True, action='store_true', help='use bidirectional RNN (default: False') parser.add_argument('--batch_size', type=int, default=8, help='batch size in training (default: 32') parser.add_argument( '--workers', type=int, default=4, help='number of workers in dataset loader (default: 4)') parser.add_argument('--max_epochs', type=int, default=30, help='number of max epochs in training (default: 10)') parser.add_argument('--lr', type=float, default=1e-04, help='learning rate (default: 0.0001)') parser.add_argument('--no_cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--seed', type=int, default=1, help='random seed (default: 1)') parser.add_argument('--save_name', type=str, default='model', help='the name of model') parser.add_argument('--mode', type=str, default='train') args = parser.parse_args() random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) args.cuda = not args.no_cuda and torch.cuda.is_available() device = torch.device('cuda' if args.cuda else 'cpu') # N_FFT: defined in loader.py feature_size = N_FFT / 2 + 1 cnn = resnet.ResNet(feature_size, resnet.BasicBlock, [3, 3, 3]) rnn = RNN.RNN(cnn.feature_size, args.hidden_size, args.n_class, input_dropout_p=args.dropout, dropout_p=args.dropout, n_layers=args.layer_size, bidirectional=args.bidirectional, rnn_cell='gru', variable_lengths=False) model = CRNN.CRNN(cnn, rnn) model.flatten_parameters() model = nn.DataParallel(model).to(device) optimizer = optim.Adam(model.module.parameters(), lr=args.lr) criterion = nn.CrossEntropyLoss(reduction='sum').to(device) if args.mode != 'train': return data_download() wav_paths = [ os.path.join('./dataset/wav', fname) for fname in os.listdir('./dataset/wav') ] best_acc = 0 begin_epoch = 0 loss_acc = [[], [], [], []] train_batch_num, train_dataset_list, valid_dataset, test_dataset = split_dataset( args, wav_paths, dataset_ratio=[0.7, 0.1, 0.2]) logger.info('start') train_begin = time.time() for epoch in range(begin_epoch, args.max_epochs): train_queue = queue.Queue(args.workers * 2) train_loader = MultiLoader(train_dataset_list, train_queue, args.batch_size, args.workers) train_loader.start() train_loss, train_acc = train(model, train_batch_num, train_queue, criterion, optimizer, device, train_begin, args.workers, 10) logger.info('Epoch %d (Training) Loss %0.4f Acc %0.4f' % (epoch, train_loss, train_acc)) train_loader.join() loss_acc[0].append(train_loss) loss_acc[1].append(train_acc) valid_queue = queue.Queue(args.workers * 2) valid_loader = BaseDataLoader(valid_dataset, valid_queue, args.batch_size, 0) valid_loader.start() eval_loss, eval_acc = evaluate(model, valid_loader, valid_queue, criterion, device) logger.info('Epoch %d (Evaluate) Loss %0.4f Acc %0.4f' % (epoch, eval_loss, eval_acc)) valid_loader.join() loss_acc[2].append(eval_loss) loss_acc[3].append(eval_acc) best_model = (eval_acc > best_acc) if best_model: best_acc = eval_acc torch.save(model.state_dict(), './save_model/best_model.pt') save_epoch = epoch model.load_state_dict(torch.load('./save_model/best_model.pt')) test_queue = queue.Queue(args.workers * 2) test_loader = BaseDataLoader(test_dataset, test_queue, args.batch_size, 0) test_loader.start() test_loss, test_acc = evaluate(model, test_loader, test_queue, criterion, device) logger.info('Epoch %d (Test) Loss %0.4f Acc %0.4f' % (save_epoch, test_loss, test_acc)) test_loader.join() save_data(loss_acc, test_loss, test_acc) plot_data(loss_acc, test_loss, test_acc) return 0
def resnet_model_fn(features, labels, mode, params): """Our model_fn for ResNet to be used with our Estimator.""" #tf.summary.image('images', features, max_outputs=6) # build model net = resnet.ResNet(features, is_training=(mode == tf.estimator.ModeKeys.TRAIN)) logits = net.build_model() predictions = { 'classes': tf.argmax(logits, axis=1), 'probabilities': tf.nn.softmax(logits, name='softmax_tensor') } if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) # Calculate loss, which includes softmax cross entropy and L2 regularization. # a. get loss coeficiente pos_mask = tf.reduce_sum( tf.cast( tf.greater_equal(labels, tf.fill(tf.shape(labels), FLAGS.mask_thres)), tf.float32), 0) pos_curr_count = tf.cast(tf.greater(pos_mask, 0), tf.float32) neg_curr_count = tf.cast(tf.less_equal(pos_mask, 0), tf.float32) pos_count = tf.Variable(tf.zeros(shape=[ FLAGS.class_num, ]), trainable=False) neg_count = tf.Variable(tf.zeros(shape=[ FLAGS.class_num, ]), trainable=False) neg_select = tf.cast( tf.less_equal( tf.random_uniform(shape=[ FLAGS.class_num, ], minval=0, maxval=1, seed=FLAGS.random_seed), FLAGS.neg_select), tf.float32) #tf.compat.v1.summary.histogram('pos_curr_count', pos_curr_count) #tf.compat.v1.summary.histogram('neg_curr_count', neg_curr_count) #tf.compat.v1.summary.histogram('neg_select', neg_select) with tf.control_dependencies([pos_curr_count, neg_curr_count, neg_select]): pos_count = tf.assign_sub(tf.assign_add(pos_count, pos_curr_count), tf.multiply(pos_count, neg_curr_count)) neg_count = tf.assign_sub( tf.assign_add(neg_count, tf.multiply(neg_curr_count, neg_select)), tf.multiply(neg_count, pos_curr_count)) #tf.compat.v1.summary.histogram('pos_count', pos_count) #tf.compat.v1.summary.histogram('neg_count', neg_count) pos_loss_coef = -1 * (tf.log((0.01 + pos_count) / 10) / tf.log(10.0)) pos_loss_coef = tf.where( tf.greater(pos_loss_coef, tf.fill(tf.shape(pos_loss_coef), 0.01)), pos_loss_coef, tf.fill(tf.shape(pos_loss_coef), 0.01)) pos_loss_coef = tf.multiply(pos_loss_coef, pos_curr_count) #tf.compat.v1.summary.histogram('pos_loss_coef', pos_loss_coef) neg_loss_coef = -1 * (tf.log((8 + neg_count) / 10) / tf.log(10.0)) neg_loss_coef = tf.where( tf.greater(neg_loss_coef, tf.fill(tf.shape(neg_loss_coef), 0.01)), neg_loss_coef, tf.fill(tf.shape(neg_loss_coef), 0.001)) neg_loss_coef = tf.multiply(neg_loss_coef, tf.multiply(neg_curr_count, neg_select)) #tf.compat.v1.summary.histogram('neg_loss_coef', neg_loss_coef) loss_coef = tf.add(pos_loss_coef, neg_loss_coef) #tf.compat.v1.summary.histogram('loss_coef', loss_coef) # b. get non-negative mask non_neg_mask = tf.fill(tf.shape(labels), -1.0, name='non_neg') non_neg_mask = tf.cast(tf.not_equal(labels, non_neg_mask), tf.float32) #tf.compat.v1.summary.histogram('non_neg', non_neg_mask) # cal loss cross_entropy = tf.nn.weighted_cross_entropy_with_logits( logits=logits, labels=labels, pos_weight=12, name='sigmod_cross_entropy') #tf.compat.v1.summary.histogram('sigmod_ce', cross_entropy) cross_entropy_cost = tf.reduce_sum( tf.reduce_mean(cross_entropy * non_neg_mask, axis=0) * loss_coef) # Create a tensor named cross_entropy for logging purposes. tf.identity(cross_entropy_cost, name='cross_entropy') #tf.summary.scalar('cross_entropy', cross_entropy_cost) #tf.compat.v1.summary.scalar('cross_entropy', cross_entropy_cost) # Add weight decay to the loss. We exclude the batch norm variables because # doing so leads to a small improvement in accuracy. loss = cross_entropy_cost + FLAGS.weight_decay * tf.add_n([ tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'batch_normalization' not in v.name ]) if mode == tf.estimator.ModeKeys.TRAIN: # Scale the learning rate linearly with the batch size. When the batch size # is 256, the learning rate should be 0.1. lr_warmup = FLAGS.lr_warmup warmup_step = FLAGS.warmup warmup_decay_step = FLAGS.lr_warmup_decay_step warmup_decay_factor = FLAGS.lr_warmup_decay_factor #global_step = tf.train.get_or_create_global_step() global_step = tf.compat.v1.train.get_or_create_global_step() boundaries = [ int(FLAGS.lr_decay_step * epoch) for epoch in [1, 2, 3, 4] ] values = [FLAGS.lr * decay for decay in [1, 0.1, 0.01, 1e-3, 1e-4]] #learning_rate = tf.train.piecewise_constant( learning_rate = tf.compat.v1.train.piecewise_constant( tf.cast(global_step, tf.int32), boundaries, values) # Linear Scaling Rule and Gradual Warmup lr = tf.cond( global_step < warmup_step, #lambda: tf.train.exponential_decay( lambda: tf.compat.v1.train.exponential_decay(lr_warmup, global_step, warmup_decay_step, warmup_decay_factor, staircase=True), lambda: learning_rate) # Create a tensor named learning_rate for logging purposes. tf.identity(lr, name='learning_rate') #tf.summary.scalar('learning_rate', lr) #tf.compat.v1.summary.scalar('learning_rate', lr) #optimizer = tf.train.MomentumOptimizer( # learning_rate=lr, # momentum=FLAGS.opt_momentum) optimizer = tf.compat.v1.train.MomentumOptimizer( learning_rate=lr, momentum=FLAGS.opt_momentum) optimizer = hvd.DistributedOptimizer( optimizer) #, device_sparse='/cpu:0') # Batch norm requires update_ops to be added as a train_op dependency. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.minimize(loss, global_step) else: train_op = None # Build evaluate metrics accuracy = tf.metrics.accuracy(tf.argmax(labels, axis=1), predictions['classes']) metrics = {'accuracy': accuracy} tf.identity(accuracy[1], name='train_accuracy') #tf.summary.scalar('train_accuracy', accuracy[1]) #tf.compat.v1.summary.scalar('train_accuracy', accuracy[1]) return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions, loss=loss, train_op=train_op, eval_metric_ops=metrics)
def train(): learning_rate = cfg.learning_rate image_shape = [3, 512, 512] if cfg.enable_ce: fluid.default_startup_program().random_seed = 1000 fluid.default_main_program().random_seed = 1000 import random random.seed(0) np.random.seed(0) devices_num = get_device_num() total_batch_size = devices_num * cfg.TRAIN.im_per_batch use_random = True startup_prog = fluid.Program() train_prog = fluid.Program() with fluid.program_guard(train_prog, startup_prog): with fluid.unique_name.guard(): model = model_builder.EAST( add_conv_body_func=resnet.ResNet(), use_random=use_random) model.build_model(image_shape) losses, keys = model.loss() loss = losses[0] fetch_list = losses boundaries = cfg.lr_steps gamma = cfg.lr_gamma step_num = len(cfg.lr_steps) values = [learning_rate * (gamma**i) for i in range(step_num + 1)] lr = exponential_with_warmup_decay( learning_rate=learning_rate, boundaries=boundaries, values=values, warmup_iter=cfg.warm_up_iter, warmup_factor=cfg.warm_up_factor) optimizer = fluid.optimizer.AdamOptimizer(learning_rate=lr, regularization=fluid.regularizer.L2Decay(cfg.weight_decay)) optimizer.minimize(loss) fetch_list = fetch_list + [lr] for var in fetch_list: var.persistable = True gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) place = fluid.CUDAPlace(gpu_id) if cfg.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) build_strategy = fluid.BuildStrategy() build_strategy.fuse_all_optimizer_ops = False build_strategy.fuse_elewise_add_act_ops = True build_strategy.sync_batch_norm=True exec_strategy = fluid.ExecutionStrategy() exec_strategy.num_iteration_per_drop_scope = 1 exe.run(startup_prog) if cfg.pretrained_model: def if_exist(var): return os.path.exists(os.path.join(cfg.pretrained_model, var.name)) fluid.io.load_vars(exe, cfg.pretrained_model, predicate=if_exist) compiled_train_prog = fluid.CompiledProgram(train_prog).with_data_parallel( loss_name=loss.name, build_strategy=build_strategy, exec_strategy=exec_strategy) dataset = icdar.ICDAR2015Dataset() data_generator = dataset.get_batch(num_workers=24, input_size=512, batch_size=14) def train_loop(): start_time = time.time() prev_start_time = start_time start = start_time train_stats = TrainingStats(cfg.log_window, keys) #for iter_id, data in enumerate(next(data_generator)): for iter_id in range(100000): data = next(data_generator) #for data in data_list: prev_start_time = start_time start_time = time.time() outs = exe.run(compiled_train_prog, fetch_list=[v.name for v in fetch_list], feed={"input_images": data[0], "input_score_maps": data[2], "input_geo_maps": data[3], "input_training_masks": data[4]}) stats = {k: np.array(v).mean() for k, v in zip(keys, outs[:-1])} train_stats.update(stats) logs = train_stats.log() strs = '{}, batch: {}, lr: {:.5f}, {}, time: {:.3f}'.format( now_time(), iter_id, np.mean(outs[-1]), logs, start_time - prev_start_time) if iter_id % 10 == 0: print(strs) sys.stdout.flush() if (iter_id + 1) % cfg.TRAIN.snapshot_iter == 0: save_model(exe, "model_iter{}".format(iter_id), train_prog) if (iter_id + 1) == cfg.max_iter: break end_time = time.time() total_time = end_time - start_time last_loss = np.array(outs[0]).mean() train_loop()