Exemplo n.º 1
0
def infer():
    place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    class_nums = cfg.class_num
    model = model_builder.RRPN(add_conv_body_func=resnet.ResNet(),
                               add_roi_box_head_func=resnet.ResNetC5(),
                               use_pyreader=False,
                               mode='infer')
    startup_prog = fluid.Program()
    infer_prog = fluid.Program()
    with fluid.program_guard(infer_prog, startup_prog):
        with fluid.unique_name.guard():
            model.build_model()
            pred_boxes = model.eval_bbox_out()
    infer_prog = infer_prog.clone(True)
    exe.run(startup_prog)
    fluid.load(infer_prog, cfg.pretrained_model, exe)
    infer_reader = reader.infer(cfg.image_path)
    data_loader = model.data_loader
    data_loader.set_sample_list_generator(infer_reader, places=place)
    fetch_list = [pred_boxes]
    imgs = os.listdir(cfg.image_path)
    imgs.sort()

    for i, data in enumerate(data_loader()):
        result = exe.run(infer_prog,
                         fetch_list=[v.name for v in fetch_list],
                         feed=data,
                         return_numpy=False)
        nmsed_out = result[0]
        im_info = np.array(data[0]['im_info'])[0]
        im_scale = im_info[2]
        outs = np.array(nmsed_out)
        draw_bounding_box_on_image(cfg.image_path, imgs[i], outs, im_scale,
                                   cfg.draw_threshold)
Exemplo n.º 2
0
    def construct_model_new(self, block_nums, solution):
        block_nums_new = []
        for i in range(len(block_nums)):
            begin = sum(block_nums[:i])
            end = sum(block_nums[:i + 1])
            solution_stage = solution[begin:end]
            block_nums_new.append(int(sum(solution_stage)))
        # import pdb; pdb.set_trace()
        model_new = resnet.ResNet(resnet.Bottleneck,
                                  block_nums_new,
                                  num_classes=1000)

        return model_new, block_nums_new
Exemplo n.º 3
0
def main():
    # build model
    images = tf.placeholder(dtype=tf.float32, shape=[None, 224, 224, 3])
    net = resnet.ResNet(images, is_training=False)
    net.build_model()
    logits = net.logit
    # restore model
    saver = tf.train.Saver(tf.global_variables())
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.visible_device_list = FLAGS.visiable_gpus
    config.log_device_placement = False
    sess = tf.Session(config=config)
    # load trained model
    saver.restore(sess, FLAGS.pretrain_ckpt)
    # inference
    types = 'center'

    with open(FLAGS.images) as f:
        img_paths = [l.strip().split('\t') for l in f]

    cache = {}
    if os.path.exists(FLAGS.result):
        with open(FLAGS.result, 'rb') as f:
            cache = pkl.load(f)

    unfinished = [(p, i) for p, i in img_paths if i not in cache]
    progress = tqdm(unfinished)
    n_errors = 0
    for img_path, img_id in progress:
        raw_img = cv.imread(img_path)
        if raw_img is None or raw_img.data is None:
            n_errors += 1
        imgs = preprocess(raw_img, types)
        output = sess.run(logits, {images: imgs})
        output = np.squeeze(output[0])
        if types == '10crop':
            output = np.mean(output, axis=0)
        idx = output.argsort()[::-1]
        output = idx[output[idx] > FLAGS.prob_thres].tolist()
        cache[img_id] = output
        progress.set_description("n_errors: {}".format(n_errors))
        if len(cache) % FLAGS.flush_every == 0:
            with open(FLAGS.result, 'wb') as f:
                pkl.dump(cache, f, protocol=4)

    with open(FLAGS.result, 'wb') as f:
        pkl.dump(cache, f, protocol=4)
Exemplo n.º 4
0
    def __init__(self, config_file, model_name):

        self.configuration = conf.ConfigurationFile(config_file, model_name)
        #loading data
        mean_file = os.path.join(self.configuration.get_data_dir(), "mean.dat")
        shape_file = os.path.join(self.configuration.get_data_dir(),
                                  "shape.dat")
        #
        self.input_shape = np.fromfile(shape_file, dtype=np.int32)
        self.mean_image = np.fromfile(mean_file, dtype=np.float32)
        self.mean_image = np.reshape(self.mean_image, self.input_shape)

        #loading classifier model
        model = resnet.ResNet([3, 4, 6, 3], [64, 128, 256, 512],
                              self.configuration.get_number_of_classes(),
                              se_factor=0)
        input_image = tf.keras.Input(
            (self.input_shape[0], self.input_shape[1], self.input_shape[2]),
            name='input_image')
        model(input_image)
        model.summary()
        model.load_weights(self.configuration.get_checkpoint_file(),
                           by_name=True,
                           skip_mismatch=True)
        #create the sim-model with a customized layer
        #you can change output_layer_name
        output_layer_name = 'global_average_pooling2d'
        output = model.get_layer(output_layer_name).output
        self.sim_model = tf.keras.Model(model.input, output)
        self.sim_model.summary()
        print('sim_model was loaded OK')
        #defining process arch
        self.process_fun = imgproc.process_image
        #loading catalog
        self.ssearch_dir = os.path.join(self.configuration.get_data_dir(),
                                        'ssearch')
        catalog_file = os.path.join(self.ssearch_dir, 'catalog.txt')
        assert os.path.exists(catalog_file), '{} does not exist'.format(
            catalog_file)
        print('loading catalog ...')
        self.load_catalog(catalog_file)
        print('loading catalog ok ...')
        self.enable_search = False
Exemplo n.º 5
0
def eval():

    place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    class_nums = cfg.class_num
    model = model_builder.RRPN(add_conv_body_func=resnet.ResNet(),
                               add_roi_box_head_func=resnet.ResNetC5(),
                               use_pyreader=False,
                               mode='val')

    startup_prog = fluid.Program()
    infer_prog = fluid.Program()
    with fluid.program_guard(infer_prog, startup_prog):
        with fluid.unique_name.guard():
            model.build_model()
            pred_boxes = model.eval_bbox_out()
    infer_prog = infer_prog.clone(True)
    exe.run(startup_prog)
    fluid.load(infer_prog, cfg.pretrained_model, exe)
    test_reader = reader.test(1)
    data_loader = model.data_loader
    data_loader.set_sample_list_generator(test_reader, places=place)

    fetch_list = [pred_boxes]
    res_list = []
    keys = [
        'bbox', 'gt_box', 'gt_class', 'is_crowed', 'im_info', 'im_id',
        'is_difficult'
    ]
    for i, data in enumerate(data_loader()):
        result = exe.run(infer_prog,
                         fetch_list=[v.name for v in fetch_list],
                         feed=data,
                         return_numpy=False)
        pred_boxes_v = result[0]
        nmsed_out = pred_boxes_v
        outs = np.array(nmsed_out)
        res = get_key_dict(outs, data[0], keys)
        res_list.append(res)
        if i % 50 == 0:
            logger.info('test_iter {}'.format(i))
    icdar_eval(res_list)
Exemplo n.º 6
0
def tower_model(images, labels):
    model = resnet.ResNet(
        images, is_training=(FLAGS.mode == tf.estimator.ModeKeys.TRAIN))
    model.build_model()

    # waring: Calculate loss, which includes softmax cross entropy and L2 regularization.
    cross_entropy = tf.losses.softmax_cross_entropy(logits=model.logit,
                                                    onehot_labels=labels)
    tf.identity(cross_entropy, name='cross_entropy')
    tf.summary.scalar('cross_entropy', cross_entropy)

    # Add weight decay to the loss. We Add the batch norm variables into L2 normal because
    # in large scale data training this will improve the generalization power of model.
    loss = cross_entropy + FLAGS.weight_decay * tf.add_n([
        tf.nn.l2_loss(v)
        for v in tf.trainable_variables() if 'bn' not in v.name
    ]) + 0.1 * FLAGS.weight_decay * tf.add_n(
        [tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'bn' in v.name])

    return model, loss
Exemplo n.º 7
0
def demo(data,
         save,
         depth=30,
         valid_size=5000,
         n_epochs=25,
         batch_size=64,
         seed=None):
    """
    A demo to show off training of efficient DenseNets.
    Trains and evaluates a DenseNet-BC on CIFAR-10.

    Args:
        data (str) - path to directory where data should be loaded from/downloaded
            (default $DATA_DIR)
        save (str) - path to save the model to (default /tmp)

        depth (int) - depth of the network (number of convolution layers) (default 40)
        growth_rate (int) - number of features added per DenseNet layer (default 12)
        efficient (bool) - use the memory efficient implementation? (default True)

        valid_size (int) - size of validation set
        n_epochs (int) - number of epochs for training (default 300)
        batch_size (int) - size of minibatch (default 256)
        seed (int) - manually set the random seed (default None)
    """

    # Get densenet configuration

    # Data transforms
    mean = [0.5071, 0.4867, 0.4408]
    stdv = [0.2675, 0.2565, 0.2761]
    train_transforms = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=stdv),
    ])
    test_transforms = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=stdv),
    ])

    # Datasets
    train_set = datasets.CIFAR10(data,
                                 train=True,
                                 transform=train_transforms,
                                 download=True)
    test_set = datasets.CIFAR10(data,
                                train=False,
                                transform=test_transforms,
                                download=False)

    # Models
    model = resnet.ResNet(depth=32, num_classes=10)
    print(model)

    # Make save directory
    if not os.path.exists(save):
        os.makedirs(save)
    if not os.path.isdir(save):
        raise Exception('%s is not a dir' % save)

    # Train the model
    train(model=model,
          train_set=train_set,
          test_set=test_set,
          save=save,
          valid_size=valid_size,
          n_epochs=n_epochs,
          batch_size=batch_size,
          seed=seed)
    print('Done!')
Exemplo n.º 8
0
from skimage.io import imread, imsave
from skimage.util import img_as_float

import utils
from sys import argv

border = 30
overlap = 30

if len(argv) != 3:
    print('Usage:', argv[0], 'input_file output_file')
    exit(0)


net = resnet.ResNet(32, 128)
net = net.cuda()
net.eval()


def save_model(filename):
    torch.save(net.state_dict(), 'checkpoints/' + filename)


def load_model(filename):
    net.load_state_dict(torch.load(filename))


def add_border(img, n_pixels):
    """
    Add a mirrored border to the image
Exemplo n.º 9
0
def main():
    global args, best_err1, best_err5
    args = parser.parse_args()

    if args.dataset.startswith('cifar'):
        normalize = transforms.Normalize(
            mean=[x / 255.0 for x in [125.3, 123.0, 113.9]],
            std=[x / 255.0 for x in [63.0, 62.1, 66.7]])

        transform_train = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ])

        transform_test = transforms.Compose([transforms.ToTensor(), normalize])

        if args.dataset == 'cifar100':
            val_loader = torch.utils.data.DataLoader(
                datasets.CIFAR100('../data',
                                  train=False,
                                  transform=transform_test),
                batch_size=args.batch_size,
                shuffle=True,
                num_workers=args.workers,
                pin_memory=True)
            numberofclass = 100
        elif args.dataset == 'cifar10':
            val_loader = torch.utils.data.DataLoader(
                datasets.CIFAR10('../data',
                                 train=False,
                                 transform=transform_test),
                batch_size=args.batch_size,
                shuffle=True,
                num_workers=args.workers,
                pin_memory=True)
            numberofclass = 10
        else:
            raise Exception('unknown dataset: {}'.format(args.dataset))

    elif args.dataset == 'imagenet':
        valdir = os.path.join(
            '/data_large/readonly/ImageNet-Fast/imagenet/val')
        normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                         std=[0.229, 0.224, 0.225])
        val_transform = transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(), normalize
        ])
        val_loader = torch.utils.data.DataLoader(datasets.ImageFolder(
            valdir, val_transform),
                                                 batch_size=args.batch_size,
                                                 shuffle=False,
                                                 num_workers=args.workers,
                                                 pin_memory=True)
        numberofclass = 1000

    else:
        raise Exception('unknown dataset: {}'.format(args.dataset))

    print("=> creating model '{}'".format(args.net_type))
    if args.net_type == 'resnet':
        model = RN.ResNet(args.dataset, args.depth, numberofclass,
                          args.bottleneck)  # for ResNet
    elif args.net_type == 'pyramidnet':
        model = PYRM.PyramidNet(args.dataset, args.depth, args.alpha,
                                numberofclass, args.bottleneck)
    else:
        raise Exception('unknown network architecture: {}'.format(
            args.net_type))

    model = torch.nn.DataParallel(model).cuda()

    if os.path.isfile(args.pretrained):
        print("=> loading checkpoint '{}'".format(args.pretrained))
        checkpoint = torch.load(args.pretrained)
        model.load_state_dict(checkpoint['state_dict'])
        print("=> loaded checkpoint '{}'(best err1: {}%)".format(
            args.pretrained, checkpoint['best_err1']))
    else:
        raise Exception("=> no checkpoint found at '{}'".format(
            args.pretrained))

    print('the number of model parameters: {}'.format(
        sum([p.data.nelement() for p in model.parameters()])))

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()

    cudnn.benchmark = True

    # evaluate on validation set
    err1, err5, val_loss = validate(val_loader, model, criterion)
    print('Accuracy (top-1 and 5 error):', err1, err5)
Exemplo n.º 10
0
    else:
        nb_classes = 10
        epochs = 40
        learning_rate = 0.01
        droprate = 0.2
        print('load svhn data..')
        # reference : https://github.com/robertomest/convnet-study
        train_set, valid_set, test_set = get_svhn_data.load('data/svhn')
        train_set['data'] = get_svhn_data.preprocess(train_set['data'])
        test_set['data'] = get_svhn_data.preprocess(test_set['data'])
        validation_data = (test_set['data'], test_set['labels'])
        input_shape = (32, 32, 3)

    print('create residual densenet..')
    model = resnet.ResNet(nb_classes=10,
                          img_dim=(32, 32, 3),
                          k=widen_factor,
                          nb_blocks=nb_blocks)
    opt = SGD(lr=0.1, momentum=0.9, nesterov=True)
    model.compile(loss='categorical_crossentropy',
                  optimizer=opt,
                  metrics=['accuracy'])
    model.summary()

    # training
    if dataset_select == 'cifar10' or dataset_select == 'cifar100':
        print('start training with {}'.format(dataset_select))
        print('Using real-time data augmentation..')
        datagen = ImageDataGenerator(horizontal_flip=True,
                                     width_shift_range=5. / 32,
                                     height_shift_range=5. / 32)
Exemplo n.º 11
0
    # save_freq: frequency in terms of number steps each time checkpoint is saved
    model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
        filepath=os.path.join(configuration.get_snapshot_dir(),
                              "{epoch:03d}.h5"),
        save_weights_only=True,
        mode="max",
        monitor="val_acc",
        save_freq="epoch",
    )
    # save_freq = configuration.get_snapshot_steps())
    # resnet 34
    if configuration.get_model_name() == "SKETCH":
        model = alexnet.AlexNetModel(configuration.get_number_of_classes())
        process_fun = imgproc.process_sketch
    elif configuration.get_model_name() == "FASHION-RESNET50":
        model = resnet.ResNet(configuration.get_number_of_classes())
        process_fun = imgproc.process_sketch
    elif configuration.get_model_name() == "FASHION-ALEXNET":
        model = alexnet.AlexNetModel(configuration.get_number_of_classes())
        process_fun = imgproc.process_sketch
        # elif configuration.get_model_name() == 'FASHION-RESNEXT':
        #     model = alexnet.(configuration.get_number_of_classes())
        process_fun = imgproc.process_sketch
    else:
        model = simple.SimpleModel(configuration.get_number_of_classes())
        process_fun = imgproc.process_mnist

        # resnet_50
    # model = resnet.ResNet([3,4,6,3],[64,128,256,512], configuration.get_number_of_classes(), use_bottleneck = True)
    # build the model indicating the input shape
    # define the model input
Exemplo n.º 12
0
    def _build_model(self):
        if self.option.arch == 'resnet':
            from models import resnet
            if self.option.depth == 18:
                n_layers = [2, 2, 2, 2]
                block = resnet.BasicBlock
            elif self.option.depth == 34:
                n_layers = [3, 4, 6, 3]
                block = resnet.BasicBlock
            elif self.option.depth == 50:
                n_layers = [3, 4, 6, 3]
                block = resnet.Bottleneck
            elif self.option.depth == 101:
                n_layers = [3, 4, 23, 3]
                block = resnet.Bottleneck
            elif self.option.depth == 152:
                n_layers = [3, 8, 36, 3]
                block = resnet.Bottleneck
            else:
                msg = "Unknown depth for resnet: %d. Should be one of (18, 34, 50, 101, 152)" % self.option.depth
                self.logger.info(msg)
                raise ValueError
            self.net = resnet.ResNet(block,
                                     n_layers,
                                     num_classes=self.option.n_class)

        elif self.option.arch == 'preresnet':
            from models import preact_resnet
            if self.option.depth == 18:
                n_layers = [2, 2, 2, 2]
                block = preact_resnet.PreActBlock
            elif self.option.depth == 34:
                n_layers = [3, 4, 6, 3]
                block = preact_resnet.PreActBlock
            elif self.option.depth == 50:
                n_layers = [3, 4, 6, 3]
                block = preact_resnet.PreActBottleneck
            elif self.option.depth == 101:
                n_layers = [3, 4, 23, 3]
                block = preact_resnet.PreActBottleneck
            elif self.option.depth == 152:
                n_layers = [3, 8, 36, 3]
                block = preact_resnet.PreActBottleneck
            else:
                msg = "Unknown depth for pre-resnet: %d. Should be one of (18, 34, 50, 101, 152)" % self.option.depth
                self.logger.info(msg)
                raise ValueError
            self.net = preact_resnet.PreActResNet(
                block, n_layers, num_classes=self.option.n_class)

        #elif self.option.arch == 'preresnet':
        #    from models import preresnet
        #    if (self.option.depth-2)%6 == 0:
        #        self.net = preresnet.PreResNet(depth=self.option.depth, num_classes=self.option.n_class)
        #    else:
        #        msg = "Depth should be 6n+2"
        #        self.logger.info(msg)
        #        raise ValueError

        elif self.option.arch == 'vgg':
            from models import vgg
            vgg_name = 'VGG%d' % self.option.depth
            if vgg_name in vgg.cfg:
                self.net = vgg.VGG(vgg_name)
            else:
                msg = "Unknown depth for vgg: %d. Should be one of (11, 13, 16, 19)" % self.option.depth
                self.logger.info(msg)
                raise ValueError

        else:
            msg = "Unknown architecture: %s. Should be one of ('resnet', 'preresnet', 'vgg')" % self.option.arch
            self.logger.info(msg)
            raise ValueError

        self.loss = nn.CrossEntropyLoss(ignore_index=255)

        if self.option.cuda and len(self.option.gpu_ids) > 1:
            self.net = nn.DataParallel(self.net,
                                       device_ids=self.option.gpu_ids)

        if self.option.cuda:
            self.net.cuda()
            self.loss.cuda()
Exemplo n.º 13
0
            imgs[i, ...] = img[offset[i][0]:offset[i][0] + 224,
                               offset[i][1]:offset[i][1] + 224]
        img = cv.flip(img, 1)
        for i in range(0, 5):
            imgs[i + 5, ...] = img[offset[i][0]:offset[i][0] + 224,
                                   offset[i][1]:offset[i][1] + 224]
    else:
        raise ValueError("Type not support")
    imgs = ((imgs / 255.0) - 0.5) * 2.0
    imgs = imgs[..., ::-1]
    return imgs


# build model
images = tf.placeholder(dtype=tf.float32, shape=[None, 224, 224, 3])
net = resnet.ResNet(images, is_training=False)
net.build_model()

logits = net.logit
feat = net.feat

# restore model
saver = tf.train.Saver(tf.global_variables())
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.gpu_options.visible_device_list = str(FLAGS.visiable_gpu)
config.log_device_placement = False
sess = tf.Session(config=config)

# load trained model
saver.restore(sess, FLAGS.pretrain_ckpt)
Exemplo n.º 14
0
     log_dir=configuration.get_snapshot_dir(), histogram_freq=1)
 #Defining callback for saving checkpoints
 #save_freq: frequency in terms of number steps each time checkpoint is saved
 model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
     filepath=configuration.get_snapshot_dir() + '{epoch:03d}.h5',
     save_weights_only=True,
     mode='max',
     monitor='val_acc',
     save_freq='epoch',
 )
 #save_freq = configuration.get_snapshot_steps())
 #resnet 34, no bottleneck is required
 #model = resnet.ResNet([3,4,6,3],[64,128,256,512], configuration.get_number_of_classes(), se_factor = 0)
 #resnet_50
 model = resnet.ResNet([3, 4, 6, 3], [64, 128, 256, 512],
                       configuration.get_number_of_classes(),
                       use_bottleneck=True)
 print('Model is Resnet')
 sys.stdout.flush()
 #build the model indicating the input shape
 #define the model input
 input_image = tf.keras.Input(
     (input_shape[0], input_shape[1], input_shape[2]), name='input_image')
 model(input_image)
 model.summary()
 #use_checkpoints to load weights
 if configuration.use_checkpoint():
     model.load_weights(configuration.get_checkpoint_file(),
                        by_name=True,
                        skip_mismatch=True)
     #model.load_weights(configuration.get_checkpoint_file(), by_name = False)
Exemplo n.º 15
0
def train():
    learning_rate = cfg.learning_rate
    #image_shape = [-1, 3, cfg.TRAIN.max_size, cfg.TRAIN.max_size]

    devices_num = get_device_num()
    total_batch_size = devices_num * cfg.TRAIN.im_per_batch

    use_random = True
    startup_prog = fluid.Program()
    train_prog = fluid.Program()
    with fluid.program_guard(train_prog, startup_prog):
        with fluid.unique_name.guard():
            model = model_builder.RRPN(add_conv_body_func=resnet.ResNet(),
                                       add_roi_box_head_func=resnet.ResNetC5(),
                                       use_pyreader=cfg.use_pyreader,
                                       use_random=use_random)
            model.build_model()
            losses, keys, rpn_rois = model.loss()
            loss = losses[0]
            fetch_list = losses

            boundaries = cfg.lr_steps
            gamma = cfg.lr_gamma
            step_num = len(cfg.lr_steps)
            values = [learning_rate * (gamma**i) for i in range(step_num + 1)]
            start_lr = learning_rate * cfg.start_factor
            lr = fluid.layers.piecewise_decay(boundaries, values)
            lr = fluid.layers.linear_lr_warmup(lr, cfg.warm_up_iter, start_lr,
                                               learning_rate)
            optimizer = fluid.optimizer.Momentum(
                learning_rate=lr,
                regularization=fluid.regularizer.L2Decay(cfg.weight_decay),
                momentum=cfg.momentum)
            optimizer.minimize(loss)
            fetch_list = fetch_list + [lr]

            for var in fetch_list:
                var.persistable = True
    gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0))
    place = fluid.CUDAPlace(gpu_id) if cfg.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)

    build_strategy = fluid.BuildStrategy()
    build_strategy.fuse_all_optimizer_ops = False
    build_strategy.fuse_elewise_add_act_ops = True
    exec_strategy = fluid.ExecutionStrategy()
    exec_strategy.num_iteration_per_drop_scope = 1
    exe.run(startup_prog)

    if cfg.pretrained_model:
        checkpoint.load_and_fusebn(exe, train_prog, cfg.pretrained_model)
    compiled_train_prog = fluid.CompiledProgram(train_prog).with_data_parallel(
        loss_name=loss.name,
        build_strategy=build_strategy,
        exec_strategy=exec_strategy)

    shuffle = True
    shuffle_seed = None
    if num_trainers > 1:
        shuffle_seed = 1
    if cfg.use_pyreader:
        train_reader = reader.train(batch_size=cfg.TRAIN.im_per_batch,
                                    total_batch_size=total_batch_size,
                                    padding_total=cfg.TRAIN.padding_minibatch,
                                    shuffle=shuffle,
                                    shuffle_seed=shuffle_seed)
        if num_trainers > 1:
            assert shuffle_seed is not None, \
                "If num_trainers > 1, the shuffle_seed must be set, because " \
                "the order of batch data generated by reader " \
                "must be the same in the respective processes."
            # NOTE: the order of batch data generated by batch_reader
            # must be the same in the respective processes.
            if num_trainers > 1:
                train_reader = fluid.contrib.reader.distributed_batch_reader(
                    train_reader)
        data_loader = model.data_loader
        data_loader.set_sample_list_generator(train_reader, places=place)
    else:
        if num_trainers > 1: shuffle = False
        train_reader = reader.train(batch_size=total_batch_size,
                                    shuffle=shuffle)
        feeder = fluid.DataFeeder(place=place, feed_list=model.feeds())

    def train_loop():
        data_loader.start()
        train_stats = TrainingStats(cfg.log_window, keys)
        try:
            start_time = time.time()
            prev_start_time = start_time
            for iter_id in range(cfg.max_iter):
                prev_start_time = start_time
                start_time = time.time()
                outs = exe.run(compiled_train_prog,
                               fetch_list=[v.name for v in fetch_list])
                stats = {
                    k: np.array(v).mean()
                    for k, v in zip(keys, outs[:-1])
                }
                train_stats.update(stats)
                logs = train_stats.log()
                if iter_id % 10 == 0:
                    strs = '{}, iter: {}, lr: {:.5f}, {}, time: {:.3f}'.format(
                        now_time(), iter_id, np.mean(outs[-1]), logs,
                        start_time - prev_start_time)
                    print(strs)
                sys.stdout.flush()
                if (iter_id) % cfg.TRAIN.snapshot_iter == 0 and iter_id != 0:
                    save_name = "{}".format(iter_id)
                    checkpoint.save(
                        exe, train_prog,
                        os.path.join(cfg.model_save_dir, save_name))
                if (iter_id) == cfg.max_iter:
                    checkpoint.save(
                        exe, train_prog,
                        os.path.join(cfg.model_save_dir, "model_final"))
                    break
            end_time = time.time()
            total_time = end_time - start_time
            last_loss = np.array(outs[0]).mean()
        except (StopIteration, fluid.core.EOFException):
            data_loader.reset()

    train_loop()
Exemplo n.º 16
0
 model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
     filepath=configuration.get_snapshot_dir() + '{epoch:03d}.h5',
     save_weights_only=True,
     mode='max',
     monitor='val_acc',
     save_freq='epoch',
 )
 #save_freq = configuration.get_snapshot_steps())
 #resnet 34
 if configuration.get_model_name() == 'UVROIS':
     model = uv_rois.UVRoisModel(configuration.get_number_of_classes())
     print('Model is UV')
     sys.stdout.flush()
 else:
     model = resnet.ResNet([3, 4, 6, 3], [64, 128, 256, 512],
                           configuration.get_number_of_classes(),
                           se_factor=0)
     print('Model is Resnet')
     sys.stdout.flush()
 #resnet_50
 #model = resnet.ResNet([3,4,6,3],[64,128,256,512], configuration.get_number_of_classes(), use_bottleneck = True)
 #build the model indicating the input shape
 #define the model input
 input_image = tf.keras.Input(
     (input_shape[0], input_shape[1], input_shape[2]),
     name='input_image')
 model(input_image)
 model.summary()
 #use_checkpoints to load weights
 if configuration.use_checkpoint():
     model.load_weights(configuration.get_checkpoint_file(),
    def __init__(self):
        super(Solver, self).__init__()
        global numberofclass

        #define the network
        if args.net_type == 'resnet':
            self.model = RN.ResNet(dataset=args.dataset,
                                   depth=args.depth,
                                   num_classes=numberofclass,
                                   bottleneck=args.bottleneck)

        elif args.net_type == 'pyramidnet':
            self.model = PYRM.PyramidNet(args.dataset, args.depth, args.alpha,
                                         numberofclass, args.bottleneck)

        elif args.net_type == 'wideresnet':
            self.model = WR.WideResNet(depth=args.depth,
                                       num_classes=numberofclass,
                                       widen_factor=args.width)

        elif args.net_type == 'vggnet':
            self.model = VGG.vgg16(num_classes=numberofclass)

        elif args.net_type == 'mobilenet':
            self.model = MN.mobile_half(num_classes=numberofclass)

        elif args.net_type == 'shufflenet':
            self.model = SN.ShuffleV2(num_classes=numberofclass)

        elif args.net_type == 'densenet':
            self.model = DN.densenet_cifar(num_classes=numberofclass)

        elif args.net_type == 'resnext-2':
            self.model = ResNeXt29_2x64d(num_classes=numberofclass)
        elif args.net_type == 'resnext-4':
            self.model = ResNeXt29_4x64d(num_classes=numberofclass)
        elif args.net_type == 'resnext-32':
            self.model = ResNeXt29_32x4d(num_classes=numberofclass)

        elif args.net_type == 'imagenetresnet18':
            self.model = multi_resnet18_kd(num_classes=numberofclass)
        elif args.net_type == 'imagenetresnet34':
            self.model = multi_resnet34_kd(num_classes=numberofclass)
        elif args.net_type == 'imagenetresnet50':
            self.model = multi_resnet50_kd(num_classes=numberofclass)
        elif args.net_type == 'imagenetresnet101':
            self.model = multi_resnet101_kd(num_classes=numberofclass)
        elif args.net_type == 'imagenetresnet152':
            self.model = multi_resnet152_kd(num_classes=numberofclass)
        else:
            raise Exception('unknown network architecture: {}'.format(
                args.net_type))

        self.optimizer = torch.optim.SGD(self.model.parameters(),
                                         args.lr,
                                         momentum=args.momentum,
                                         weight_decay=args.weight_decay,
                                         nesterov=True)
        self.loss_lams = torch.zeros(numberofclass,
                                     numberofclass,
                                     dtype=torch.float32).cuda()
        self.loss_lams.requires_grad = False
        #define the loss function
        if args.method == 'ce':
            self.criterion = nn.CrossEntropyLoss()
        elif args.method == 'sce':
            if args.dataset == 'cifar10':
                self.criterion = SCELoss(alpha=0.1,
                                         beta=1.0,
                                         num_classes=numberofclass)
            else:
                self.criterion = SCELoss(alpha=6.0,
                                         beta=0.1,
                                         num_classes=numberofclass)
        elif args.method == 'ls':
            self.criterion = label_smooth(num_classes=numberofclass)
        elif args.method == 'gce':
            self.criterion = generalized_cross_entropy(
                num_classes=numberofclass)
        elif args.method == 'jo':
            self.criterion = joint_optimization(num_classes=numberofclass)
        elif args.method == 'bootsoft':
            self.criterion = boot_soft(num_classes=numberofclass)
        elif args.method == 'boothard':
            self.criterion = boot_hard(num_classes=numberofclass)
        elif args.method == 'forward':
            self.criterion = Forward(num_classes=numberofclass)
        elif args.method == 'backward':
            self.criterion = Backward(num_classes=numberofclass)
        elif args.method == 'disturb':
            self.criterion = DisturbLabel(num_classes=numberofclass)
        elif args.method == 'ols':
            self.criterion = nn.CrossEntropyLoss()
        self.criterion = self.criterion.cuda()
Exemplo n.º 18
0
def infer():

    image_shape = [3, 512, 512]
    model = model_builder.EAST(add_conv_body_func=resnet.ResNet(),
                               mode='infer')
    f_geo, f_score = model.build_model(image_shape)
    place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    # yapf: disable
    if not os.path.exists(cfg.pretrained_model):
        raise ValueError("Model path [%s] does not exist." % (cfg.pretrained_model))

    def if_exist(var):
        return os.path.exists(os.path.join(cfg.pretrained_model, var.name))
    fluid.io.load_vars(exe, cfg.pretrained_model, predicate=if_exist)
    # yapf: enable
    infer_reader = reader.infer(cfg.image_path)
    feeder = fluid.DataFeeder(place=place, feed_list=model.feeds())
    print(model.feeds())
    fetch_list = [f_geo, f_score]
    image_path = "ICDAR2015/ch4_test_images/"
    image_names = os.listdir(image_path)
    image_names.sort()
    print(image_names)
    for iter_id, data in enumerate(infer_reader()):
        #print(image_names[i])
        #data = next(infer_reader())
        im_info = data[0][1]
        print(im_info.shape)
        ratio_w = im_info[0][3]
        ratio_h = im_info[0][2]
        result = exe.run(fetch_list=[v.name for v in fetch_list],
                         feed=feeder.feed(data),
                         return_numpy=True)
        pred_boxes_v = result[0]
        score = result[0]
        geometry = result[1]
        boxes = detect(score_map=score, geo_map=geometry)
        #draw_bounding_box_on_image(cfg.image_path, nmsed_out, cfg.draw_threshold,
        #                           labels_map, image)
        f = open("result/res_" + image_names[iter_id].split('.')[0] + ".txt",
                 'w')

        #            for k in boxes:
        #                print(k)
        #                f.write(str(int(k[0]/ratio_h)) + "," + str(int(k[1]/ratio_w)) + "," + str(int(k[2]/ratio_h)) + "," + str(int(k[3]/ratio_w)) + "," + str(int(k[4]/ratio_h)) \
        #                        + "," + str(int(k[5]/ ratio_w)) + "," + str(int(k[6]/ratio_h)) + "," + str(int(k[7]/ratio_w)) + "\n")
        try:
            if boxes.shape[0] != 0:
                boxes = boxes[:, :8].reshape(-1, 4, 2)
                boxes_w = boxes[:, :, 0:1] / ratio_w
                boxes_h = boxes[:, :, 1:] / ratio_h
                boxes = np.concatenate((boxes_w, boxes_h), axis=-1)
                print(boxes.shape)
                for box in boxes:
                    if np.linalg.norm(box[0] - box[1]) < 5 or np.linalg.norm(
                            box[3] - box[0]) < 5:
                        continue
                    f.write(str(int(box[0][0])) + "," + str(int(box[0][1])) + "," + str(int(box[1][0])) + "," + str(int(box[1][1])) + "," + \
                            str(int(box[2][0])) + "," + str(int(box[2][1])) + "," + str(int(box[3][0])) + "," + str(int(box[3][1])) + "\n")
                print(boxes.shape)
                image = cv2.imread("ICDAR2015/ch4_test_images/" +
                                   image_names[iter_id])
                #image = cv2.resize(image, (int(im_info[0][1]), int(im_info[0][0])))
                for i in range(boxes.shape[0]):
                    cv2.polylines(image, [boxes[i].astype('int32')],
                                  True,
                                  color=(255, 255, 0),
                                  thickness=1)
                #image = cv2.resize(image, (int(im_info[0][1]/im_info[0][3]), int(im_info[0][0]/im_info[0][2])))
                print("saving result image", image_names[iter_id])
                cv2.imwrite("./" + image_names[iter_id], image)
        except:
            continue
Exemplo n.º 19
0
def get_model(model):

    from models import resnet

    if model == 'R32_C10':
        rnet_hr = resnet.ResNet(resnet.BasicBlock, [3, 4, 6, 3], 3, 10)
        rnet_lr = resnet.ResNet(resnet.BasicBlock, [3, 4, 6, 3], 3, 10)
        agent = resnet.ResNet(resnet.BasicBlock, [1, 1, 1, 1], 3, 16)

    elif model == 'R32_C100':
        rnet_hr = resnet.ResNet(resnet.BasicBlock, [3, 4, 6, 3], 3, 100)
        rnet_lr = resnet.ResNet(resnet.BasicBlock, [3, 4, 6, 3], 3, 100)
        agent = resnet.ResNet(resnet.BasicBlock, [1, 1, 1, 1], 3, 16)

    elif model == 'R50_ImgNet':
        rnet_hr = resnet.ResNet(resnet.BasicBlock, [3, 4, 6, 3], 7, 1000)
        rnet_lr = resnet.ResNet(resnet.BasicBlock, [3, 4, 6, 3], 7, 1000)
        agent = resnet.ResNet(resnet.BasicBlock, [2, 2, 2, 2], 3, 16)

    elif model == 'R34_fMoW':
        rnet_hr = resnet.ResNet(resnet.BasicBlock, [3, 4, 6, 3], 7, 62)
        rnet_lr = resnet.ResNet(resnet.BasicBlock, [3, 4, 6, 3], 7, 62)
        agent = resnet.ResNet(resnet.BasicBlock, [2, 2, 2, 2], 3, 16)

    return rnet_hr, rnet_lr, agent
Exemplo n.º 20
0
def main():
    global args, best_err1, best_err5
    args = parser.parse_args()

    if args.seed >= 0:
        np.random.seed(args.seed)
        torch.manual_seed(args.seed)
        torch.cuda.manual_seed(args.seed)
    cudnn.benchmark = True

    # Save path
    args.expname += args.method
    if args.transport:
        args.expname += '_tp'
    args.expname += '_prob_' + str(args.mixup_prob)
    if args.clean_lam > 0:
        args.expname += '_clean_' + str(args.clean_lam)
    if args.seed >= 0:
        args.expname += '_seed' + str(args.seed)
    print("Model is saved at {}".format(args.expname))

    # Dataset and loader
    if args.dataset.startswith('cifar'):
        mean = [x / 255.0 for x in [125.3, 123.0, 113.9]]
        std = [x / 255.0 for x in [63.0, 62.1, 66.7]]
        normalize = transforms.Normalize(mean=mean, std=std)

        transform_train = transforms.Compose([
            transforms.RandomCrop(32, padding=args.padding),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ])
        transform_test = transforms.Compose([transforms.ToTensor(), normalize])

        if args.dataset == 'cifar100':
            train_loader = torch.utils.data.DataLoader(datasets.CIFAR100('~/Datasets/cifar100/',
                                                                         train=True,
                                                                         download=True,
                                                                         transform=transform_train),
                                                       batch_size=args.batch_size,
                                                       shuffle=True,
                                                       num_workers=args.workers,
                                                       pin_memory=True)
            val_loader = torch.utils.data.DataLoader(datasets.CIFAR100('~/Datasets/cifar100/',
                                                                       train=False,
                                                                       transform=transform_test),
                                                     batch_size=args.batch_size // 4,
                                                     shuffle=True,
                                                     num_workers=args.workers,
                                                     pin_memory=True)
            numberofclass = 100
        elif args.dataset == 'cifar10':
            train_loader = torch.utils.data.DataLoader(datasets.CIFAR10('../data',
                                                                        train=True,
                                                                        download=True,
                                                                        transform=transform_train),
                                                       batch_size=args.batch_size,
                                                       shuffle=True,
                                                       num_workers=args.workers,
                                                       pin_memory=True)
            val_loader = torch.utils.data.DataLoader(datasets.CIFAR10('../data',
                                                                      train=False,
                                                                      transform=transform_test),
                                                     batch_size=args.batch_size,
                                                     shuffle=True,
                                                     num_workers=args.workers,
                                                     pin_memory=True)
            numberofclass = 10
        else:
            raise Exception('unknown dataset: {}'.format(args.dataset))

    elif args.dataset == 'imagenet':
        traindir = os.path.join('/data/readonly/ImageNet-Fast/imagenet/train')
        valdir = os.path.join('/data/readonly/ImageNet-Fast/imagenet/val')
        mean = [0.485, 0.456, 0.406]
        std = [0.229, 0.224, 0.225]
        normalize = transforms.Normalize(mean=mean, std=std)
        jittering = utils.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4)
        lighting = utils.Lighting(alphastd=0.1,
                                  eigval=[0.2175, 0.0188, 0.0045],
                                  eigvec=[[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140],
                                          [-0.5836, -0.6948, 0.4203]])

        train_dataset = datasets.ImageFolder(
            traindir,
            transforms.Compose([
                transforms.RandomResizedCrop(224),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                jittering,
                lighting,
                normalize,
            ]))
        train_sampler = None

        train_loader = torch.utils.data.DataLoader(train_dataset,
                                                   batch_size=args.batch_size,
                                                   shuffle=(train_sampler is None),
                                                   num_workers=args.workers,
                                                   pin_memory=True,
                                                   sampler=train_sampler)
        val_transform = transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            normalize,
        ])
        val_loader = torch.utils.data.DataLoader(datasets.ImageFolder(valdir, val_transform),
                                                 batch_size=args.batch_size // 4,
                                                 shuffle=False,
                                                 num_workers=args.workers,
                                                 pin_memory=True)
        numberofclass = 1000
        args.neigh_size = min(args.neigh_size, 2)

    else:
        raise Exception('unknown dataset: {}'.format(args.dataset))

    # Model
    print("=> creating model '{}'".format(args.net_type))
    if args.net_type == 'resnet':
        model = RN.ResNet(args.dataset, args.depth, numberofclass, args.bottleneck)  # for ResNet
    elif args.net_type == 'pyramidnet':
        model = PYRM.PyramidNet(args.dataset, args.depth, args.alpha, numberofclass,
                                args.bottleneck)
    else:
        raise Exception('unknown network architecture: {}'.format(args.net_type))

    pretrained = "runs/{}/{}".format(args.expname, 'checkpoint.pth.tar')
    if os.path.isfile(pretrained):
        print("=> loading checkpoint '{}'".format(pretrained))
        checkpoint = torch.load(pretrained)
        checkpoint['state_dict'] = dict(
            (key[7:], value) for (key, value) in checkpoint['state_dict'].items())
        model.load_state_dict(checkpoint['state_dict'])
        cur_epoch = checkpoint['epoch'] + 1
        best_err1 = checkpoint['best_err1']
        print("=> loaded checkpoint '{}'(epoch: {}, best err1: {}%)".format(
            pretrained, cur_epoch, checkpoint['best_err1']))
    else:
        cur_epoch = 0
        print("=> no checkpoint found at '{}'".format(pretrained))

    model = torch.nn.DataParallel(model).cuda()
    print('the number of model parameters: {}'.format(
        sum([p.data.nelement() for p in model.parameters()])))

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()
    criterion_batch = nn.CrossEntropyLoss(reduction='none').cuda()

    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay,
                                nesterov=True)
    if os.path.isfile(pretrained):
        optimizer.load_state_dict(checkpoint['optimizer'])
        print("optimizer is loaded!")

    mean_torch = torch.tensor(mean, dtype=torch.float32).reshape(1, 3, 1, 1).cuda()
    std_torch = torch.tensor(std, dtype=torch.float32).reshape(1, 3, 1, 1).cuda()
    if args.mp > 0:
        mp = Pool(args.mp)
    else:
        mp = None

    # Start training and validation
    for epoch in range(cur_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch)

        # train for one epoch
        train_loss = train(train_loader, model, criterion, criterion_batch, optimizer, epoch,
                           mean_torch, std_torch, mp)
        # evaluate on validation set
        err1, err5, val_loss = validate(val_loader, model, criterion, epoch)

        # remember best prec@1 and save checkpoint
        is_best = err1 <= best_err1
        best_err1 = min(err1, best_err1)
        if is_best:
            best_err5 = err5

        print('Current best accuracy (top-1 and 5 error):', best_err1, best_err5)
        save_checkpoint(
            {
                'epoch': epoch,
                'arch': args.net_type,
                'state_dict': model.state_dict(),
                'best_err1': best_err1,
                'best_err5': best_err5,
                'optimizer': optimizer.state_dict(),
            }, is_best)

    print('Best accuracy (top-1 and 5 error):', best_err1, best_err5)
def main():
    parser = argparse.ArgumentParser(description='Speech Emotion Recognition')
    parser.add_argument('--hidden_size',
                        type=int,
                        default=512,
                        help='hidden size of model (default: 256)')
    parser.add_argument('--layer_size',
                        type=int,
                        default=3,
                        help='number of layers of model (default: 3)')
    parser.add_argument('--n_class',
                        type=int,
                        default=7,
                        help='number of classes of data (default: 7)')
    parser.add_argument('--dropout',
                        type=float,
                        default=0.2,
                        help='dropout rate in training (default: 0.2')
    parser.add_argument('--bidirectional',
                        default=True,
                        action='store_true',
                        help='use bidirectional RNN (default: False')
    parser.add_argument('--batch_size',
                        type=int,
                        default=8,
                        help='batch size in training (default: 32')
    parser.add_argument(
        '--workers',
        type=int,
        default=4,
        help='number of workers in dataset loader (default: 4)')
    parser.add_argument('--max_epochs',
                        type=int,
                        default=30,
                        help='number of max epochs in training (default: 10)')
    parser.add_argument('--lr',
                        type=float,
                        default=1e-04,
                        help='learning rate (default: 0.0001)')
    parser.add_argument('--no_cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA training')
    parser.add_argument('--seed',
                        type=int,
                        default=1,
                        help='random seed (default: 1)')
    parser.add_argument('--save_name',
                        type=str,
                        default='model',
                        help='the name of model')
    parser.add_argument('--mode', type=str, default='train')

    args = parser.parse_args()

    random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    args.cuda = not args.no_cuda and torch.cuda.is_available()
    device = torch.device('cuda' if args.cuda else 'cpu')

    # N_FFT: defined in loader.py
    feature_size = N_FFT / 2 + 1

    cnn = resnet.ResNet(feature_size, resnet.BasicBlock, [3, 3, 3])
    rnn = RNN.RNN(cnn.feature_size,
                  args.hidden_size,
                  args.n_class,
                  input_dropout_p=args.dropout,
                  dropout_p=args.dropout,
                  n_layers=args.layer_size,
                  bidirectional=args.bidirectional,
                  rnn_cell='gru',
                  variable_lengths=False)

    model = CRNN.CRNN(cnn, rnn)
    model.flatten_parameters()

    model = nn.DataParallel(model).to(device)

    optimizer = optim.Adam(model.module.parameters(), lr=args.lr)
    criterion = nn.CrossEntropyLoss(reduction='sum').to(device)

    if args.mode != 'train':
        return

    data_download()

    wav_paths = [
        os.path.join('./dataset/wav', fname)
        for fname in os.listdir('./dataset/wav')
    ]

    best_acc = 0
    begin_epoch = 0

    loss_acc = [[], [], [], []]

    train_batch_num, train_dataset_list, valid_dataset, test_dataset = split_dataset(
        args, wav_paths, dataset_ratio=[0.7, 0.1, 0.2])

    logger.info('start')

    train_begin = time.time()

    for epoch in range(begin_epoch, args.max_epochs):

        train_queue = queue.Queue(args.workers * 2)

        train_loader = MultiLoader(train_dataset_list, train_queue,
                                   args.batch_size, args.workers)
        train_loader.start()

        train_loss, train_acc = train(model, train_batch_num, train_queue,
                                      criterion, optimizer, device,
                                      train_begin, args.workers, 10)
        logger.info('Epoch %d (Training) Loss %0.4f Acc %0.4f' %
                    (epoch, train_loss, train_acc))

        train_loader.join()

        loss_acc[0].append(train_loss)
        loss_acc[1].append(train_acc)

        valid_queue = queue.Queue(args.workers * 2)

        valid_loader = BaseDataLoader(valid_dataset, valid_queue,
                                      args.batch_size, 0)
        valid_loader.start()

        eval_loss, eval_acc = evaluate(model, valid_loader, valid_queue,
                                       criterion, device)
        logger.info('Epoch %d (Evaluate) Loss %0.4f Acc %0.4f' %
                    (epoch, eval_loss, eval_acc))

        valid_loader.join()

        loss_acc[2].append(eval_loss)
        loss_acc[3].append(eval_acc)

        best_model = (eval_acc > best_acc)

        if best_model:
            best_acc = eval_acc
            torch.save(model.state_dict(), './save_model/best_model.pt')
            save_epoch = epoch

    model.load_state_dict(torch.load('./save_model/best_model.pt'))

    test_queue = queue.Queue(args.workers * 2)

    test_loader = BaseDataLoader(test_dataset, test_queue, args.batch_size, 0)
    test_loader.start()

    test_loss, test_acc = evaluate(model, test_loader, test_queue, criterion,
                                   device)
    logger.info('Epoch %d (Test) Loss %0.4f Acc %0.4f' %
                (save_epoch, test_loss, test_acc))

    test_loader.join()

    save_data(loss_acc, test_loss, test_acc)
    plot_data(loss_acc, test_loss, test_acc)

    return 0
Exemplo n.º 22
0
def resnet_model_fn(features, labels, mode, params):
    """Our model_fn for ResNet to be used with our Estimator."""
    #tf.summary.image('images', features, max_outputs=6)

    # build model
    net = resnet.ResNet(features,
                        is_training=(mode == tf.estimator.ModeKeys.TRAIN))
    logits = net.build_model()
    predictions = {
        'classes': tf.argmax(logits, axis=1),
        'probabilities': tf.nn.softmax(logits, name='softmax_tensor')
    }

    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    # Calculate loss, which includes softmax cross entropy and L2 regularization.
    # a. get loss coeficiente
    pos_mask = tf.reduce_sum(
        tf.cast(
            tf.greater_equal(labels, tf.fill(tf.shape(labels),
                                             FLAGS.mask_thres)), tf.float32),
        0)
    pos_curr_count = tf.cast(tf.greater(pos_mask, 0), tf.float32)
    neg_curr_count = tf.cast(tf.less_equal(pos_mask, 0), tf.float32)
    pos_count = tf.Variable(tf.zeros(shape=[
        FLAGS.class_num,
    ]),
                            trainable=False)
    neg_count = tf.Variable(tf.zeros(shape=[
        FLAGS.class_num,
    ]),
                            trainable=False)
    neg_select = tf.cast(
        tf.less_equal(
            tf.random_uniform(shape=[
                FLAGS.class_num,
            ],
                              minval=0,
                              maxval=1,
                              seed=FLAGS.random_seed), FLAGS.neg_select),
        tf.float32)
    #tf.compat.v1.summary.histogram('pos_curr_count', pos_curr_count)
    #tf.compat.v1.summary.histogram('neg_curr_count', neg_curr_count)
    #tf.compat.v1.summary.histogram('neg_select', neg_select)
    with tf.control_dependencies([pos_curr_count, neg_curr_count, neg_select]):
        pos_count = tf.assign_sub(tf.assign_add(pos_count, pos_curr_count),
                                  tf.multiply(pos_count, neg_curr_count))
        neg_count = tf.assign_sub(
            tf.assign_add(neg_count, tf.multiply(neg_curr_count, neg_select)),
            tf.multiply(neg_count, pos_curr_count))
        #tf.compat.v1.summary.histogram('pos_count', pos_count)
        #tf.compat.v1.summary.histogram('neg_count', neg_count)
    pos_loss_coef = -1 * (tf.log((0.01 + pos_count) / 10) / tf.log(10.0))
    pos_loss_coef = tf.where(
        tf.greater(pos_loss_coef, tf.fill(tf.shape(pos_loss_coef), 0.01)),
        pos_loss_coef, tf.fill(tf.shape(pos_loss_coef), 0.01))
    pos_loss_coef = tf.multiply(pos_loss_coef, pos_curr_count)
    #tf.compat.v1.summary.histogram('pos_loss_coef', pos_loss_coef)
    neg_loss_coef = -1 * (tf.log((8 + neg_count) / 10) / tf.log(10.0))
    neg_loss_coef = tf.where(
        tf.greater(neg_loss_coef, tf.fill(tf.shape(neg_loss_coef), 0.01)),
        neg_loss_coef, tf.fill(tf.shape(neg_loss_coef), 0.001))
    neg_loss_coef = tf.multiply(neg_loss_coef,
                                tf.multiply(neg_curr_count, neg_select))
    #tf.compat.v1.summary.histogram('neg_loss_coef', neg_loss_coef)
    loss_coef = tf.add(pos_loss_coef, neg_loss_coef)
    #tf.compat.v1.summary.histogram('loss_coef', loss_coef)

    # b. get non-negative mask
    non_neg_mask = tf.fill(tf.shape(labels), -1.0, name='non_neg')
    non_neg_mask = tf.cast(tf.not_equal(labels, non_neg_mask), tf.float32)
    #tf.compat.v1.summary.histogram('non_neg', non_neg_mask)

    # cal loss
    cross_entropy = tf.nn.weighted_cross_entropy_with_logits(
        logits=logits,
        labels=labels,
        pos_weight=12,
        name='sigmod_cross_entropy')
    #tf.compat.v1.summary.histogram('sigmod_ce', cross_entropy)
    cross_entropy_cost = tf.reduce_sum(
        tf.reduce_mean(cross_entropy * non_neg_mask, axis=0) * loss_coef)

    # Create a tensor named cross_entropy for logging purposes.
    tf.identity(cross_entropy_cost, name='cross_entropy')
    #tf.summary.scalar('cross_entropy', cross_entropy_cost)
    #tf.compat.v1.summary.scalar('cross_entropy', cross_entropy_cost)

    # Add weight decay to the loss. We exclude the batch norm variables because
    # doing so leads to a small improvement in accuracy.
    loss = cross_entropy_cost + FLAGS.weight_decay * tf.add_n([
        tf.nn.l2_loss(v) for v in tf.trainable_variables()
        if 'batch_normalization' not in v.name
    ])

    if mode == tf.estimator.ModeKeys.TRAIN:
        # Scale the learning rate linearly with the batch size. When the batch size
        # is 256, the learning rate should be 0.1.
        lr_warmup = FLAGS.lr_warmup
        warmup_step = FLAGS.warmup
        warmup_decay_step = FLAGS.lr_warmup_decay_step
        warmup_decay_factor = FLAGS.lr_warmup_decay_factor
        #global_step = tf.train.get_or_create_global_step()
        global_step = tf.compat.v1.train.get_or_create_global_step()
        boundaries = [
            int(FLAGS.lr_decay_step * epoch) for epoch in [1, 2, 3, 4]
        ]
        values = [FLAGS.lr * decay for decay in [1, 0.1, 0.01, 1e-3, 1e-4]]
        #learning_rate = tf.train.piecewise_constant(
        learning_rate = tf.compat.v1.train.piecewise_constant(
            tf.cast(global_step, tf.int32), boundaries, values)

        # Linear Scaling Rule and Gradual Warmup
        lr = tf.cond(
            global_step < warmup_step,
            #lambda: tf.train.exponential_decay(
            lambda: tf.compat.v1.train.exponential_decay(lr_warmup,
                                                         global_step,
                                                         warmup_decay_step,
                                                         warmup_decay_factor,
                                                         staircase=True),
            lambda: learning_rate)

        # Create a tensor named learning_rate for logging purposes.
        tf.identity(lr, name='learning_rate')
        #tf.summary.scalar('learning_rate', lr)
        #tf.compat.v1.summary.scalar('learning_rate', lr)

        #optimizer = tf.train.MomentumOptimizer(
        #    learning_rate=lr,
        #    momentum=FLAGS.opt_momentum)
        optimizer = tf.compat.v1.train.MomentumOptimizer(
            learning_rate=lr, momentum=FLAGS.opt_momentum)

        optimizer = hvd.DistributedOptimizer(
            optimizer)  #, device_sparse='/cpu:0')
        # Batch norm requires update_ops to be added as a train_op dependency.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            train_op = optimizer.minimize(loss, global_step)
    else:
        train_op = None

    # Build evaluate metrics
    accuracy = tf.metrics.accuracy(tf.argmax(labels, axis=1),
                                   predictions['classes'])
    metrics = {'accuracy': accuracy}
    tf.identity(accuracy[1], name='train_accuracy')
    #tf.summary.scalar('train_accuracy', accuracy[1])
    #tf.compat.v1.summary.scalar('train_accuracy', accuracy[1])

    return tf.estimator.EstimatorSpec(mode=mode,
                                      predictions=predictions,
                                      loss=loss,
                                      train_op=train_op,
                                      eval_metric_ops=metrics)
Exemplo n.º 23
0
def train():
    learning_rate = cfg.learning_rate
    image_shape = [3, 512, 512]

    if cfg.enable_ce:
        fluid.default_startup_program().random_seed = 1000
        fluid.default_main_program().random_seed = 1000
        import random
        random.seed(0)
        np.random.seed(0)

    devices_num = get_device_num()
    total_batch_size = devices_num * cfg.TRAIN.im_per_batch

    use_random = True
    startup_prog = fluid.Program()
    train_prog = fluid.Program()
    with fluid.program_guard(train_prog, startup_prog):
        with fluid.unique_name.guard():
            model = model_builder.EAST(
                add_conv_body_func=resnet.ResNet(),
                use_random=use_random)
            model.build_model(image_shape)
            losses, keys = model.loss()
            loss = losses[0]
            fetch_list = losses

            boundaries = cfg.lr_steps
            gamma = cfg.lr_gamma
            step_num = len(cfg.lr_steps)
            values = [learning_rate * (gamma**i) for i in range(step_num + 1)]

            lr = exponential_with_warmup_decay(
                learning_rate=learning_rate,
                boundaries=boundaries,
                values=values,
                warmup_iter=cfg.warm_up_iter,
                warmup_factor=cfg.warm_up_factor)
            optimizer = fluid.optimizer.AdamOptimizer(learning_rate=lr, regularization=fluid.regularizer.L2Decay(cfg.weight_decay))
            optimizer.minimize(loss)
            fetch_list = fetch_list + [lr]

            for var in fetch_list:
                var.persistable = True

    gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0))
    place = fluid.CUDAPlace(gpu_id) if cfg.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    build_strategy = fluid.BuildStrategy()
    build_strategy.fuse_all_optimizer_ops = False
    build_strategy.fuse_elewise_add_act_ops = True
    build_strategy.sync_batch_norm=True
    exec_strategy = fluid.ExecutionStrategy()
    exec_strategy.num_iteration_per_drop_scope = 1
    exe.run(startup_prog)

    if cfg.pretrained_model:
        def if_exist(var):
            return os.path.exists(os.path.join(cfg.pretrained_model, var.name))
        fluid.io.load_vars(exe, cfg.pretrained_model, predicate=if_exist)
    compiled_train_prog = fluid.CompiledProgram(train_prog).with_data_parallel(
        loss_name=loss.name,
        build_strategy=build_strategy,
        exec_strategy=exec_strategy)


    dataset = icdar.ICDAR2015Dataset()
    data_generator = dataset.get_batch(num_workers=24,
                                     input_size=512,
                                     batch_size=14)

    def train_loop():
        start_time = time.time()
        prev_start_time = start_time
        start = start_time
        train_stats = TrainingStats(cfg.log_window, keys)
        #for iter_id, data in enumerate(next(data_generator)):
        for iter_id in range(100000):
            data = next(data_generator)
            #for data in data_list:
            prev_start_time = start_time
            start_time = time.time()
            outs = exe.run(compiled_train_prog, fetch_list=[v.name for v in fetch_list],
                                 feed={"input_images": data[0],
                                       "input_score_maps": data[2],
                                       "input_geo_maps": data[3],
                                       "input_training_masks": data[4]})
            stats = {k: np.array(v).mean() for k, v in zip(keys, outs[:-1])}
            train_stats.update(stats)
            logs = train_stats.log()
            strs = '{}, batch: {}, lr: {:.5f}, {}, time: {:.3f}'.format(
                now_time(), iter_id,
                np.mean(outs[-1]), logs, start_time - prev_start_time)
            if iter_id % 10 == 0:
                print(strs)
            sys.stdout.flush()
            if (iter_id + 1) % cfg.TRAIN.snapshot_iter == 0:
                save_model(exe, "model_iter{}".format(iter_id), train_prog)
            if (iter_id + 1) == cfg.max_iter:
                break
        end_time = time.time()
        total_time = end_time - start_time
        last_loss = np.array(outs[0]).mean()
    train_loop()