Пример #1
0
def main():
    # spawn theano vars
    xs = [T.imatrix('x%d' % i) for i in range(options['max_src'])]
    y = T.ivector('y')
    learning_rate = T.scalar('learning_rate')
    trng = RandomStreams(4321)

    # use test values
    """
    import numpy as np
    batch_size = 10
    theano.config.compute_test_value = 'raise'
    xl.tag.test_value = np.random.randn(batch_size, 392).astype(floatX)
    xr.tag.test_value = np.random.randn(batch_size, 392).astype(floatX)
    y.tag.test_value = np.random.randint(8, size=batch_size).astype(np.int32)
    learning_rate.tag.test_value = 0.5
    """

    # build cgs
    model = build_model(xs, y, learning_rate, trng=trng, **options)

    # compile
    opt = get_optimizer(options['optimizer'])
    f_train = opt(learning_rate, model, xs + [y], return_alpha=True)

    # compile validation/test functions
    f_valid = theano.function(xs + [y], [model.cost, model.acc],
                              on_unused_input='warn')

    # training loop
    train(f_train, f_valid, xs, y, **options)
Пример #2
0
    def __init__(self, opt, logger=None):
        super(Model, self).__init__()
        self.opt = opt
        self.logger = logger

        # 根据YoloV2和YoloV3使用不同的配置文件
        if opt.model == 'Yolo2':
            cfgfile = 'configs/yolo2-voc.cfg'
        elif opt.model == 'Yolo3':
            cfgfile = 'configs/yolo3-coco.cfg'

        # 初始化detector
        self.detector = Darknet(cfgfile, device=opt.device).to(opt.device)
        print_network(self.detector, logger=logger)

        # 在--load之前加载weights文件(可选)
        if opt.weights:
            utils.color_print('Load Yolo weights from %s.' % opt.weights, 3)
            self.detector.load_weights(opt.weights)

        self.optimizer = get_optimizer(opt, self.detector)
        self.scheduler = get_scheduler(opt, self.optimizer)

        self.avg_meters = ExponentialMovingAverage(0.95)
        self.save_dir = os.path.join(opt.checkpoint_dir, opt.tag)
Пример #3
0
def soft_train(network, args):
    device = torch.device("cuda" if args.gpu_flag is True else "cpu")
    optimizer, scheduler = get_optimizer(network, args)

    train_data_set = get_data_set(args, train_flag=True)
    test_data_set = get_data_set(args, train_flag=False)
    train_data_loader = torch.utils.data.DataLoader(train_data_set,
                                                    batch_size=args.batch_size,
                                                    shuffle=True)
    test_data_loader = torch.utils.data.DataLoader(test_data_set,
                                                   batch_size=args.batch_size,
                                                   shuffle=False)

    print("-*-" * 10 + "\n\t\tTrain network\n" + "-*-" * 10)
    for epoch in range(0, args.epoch):
        network = network.cpu()
        if args.network is "vgg":
            network = soft_prune_vgg_step(network, args.prune_rate[0])
        elif args.network == 'resnet':
            network = soft_prune_resnet_step(network, args.prune_rate)
        network = network.to(device)
        train_step(network, train_data_loader, test_data_loader, optimizer,
                   device, epoch)
        if scheduler is not None:
            scheduler.step()

    return network
Пример #4
0
def train(args):
    iters, vocab = get_iterator(args)

    model = get_model(args, vocab)
    loss_fn = get_loss(args, vocab)
    optimizer = get_optimizer(args, model)

    trainer = get_trainer(args, model, loss_fn, optimizer)
    metrics = get_metrics(args, vocab)
    evaluator = get_evaluator(args, model, loss_fn, metrics)

    logger = get_logger(args)
    @trainer.on(Events.STARTED)
    def on_training_started(engine):
        print("Begin Training")

    @trainer.on(Events.ITERATION_COMPLETED)
    def log_iter_results(engine):
        log_results(logger, 'train/iter', engine.state, engine.state.iteration)

    @trainer.on(Events.EPOCH_COMPLETED)
    def evaluate_epoch(engine):
        log_results(logger, 'train/epoch', engine.state, engine.state.epoch)
        state = evaluate_once(evaluator, iterator=iters['val'])
        log_results(logger, 'valid/epoch', state, engine.state.epoch)

    trainer.run(iters['train'], max_epochs=args.max_epochs)
Пример #5
0
    def get_optimizer(self, optimizer_name, used_look_head=True):

        return get_optimizer(optimizer_name,
                             self.net,
                             self.args,
                             Trainer.learn_change_func,
                             used_look_head=used_look_head)
Пример #6
0
def main():
    """
    Main Function
    """
    # Set up the Arguments, Tensorboard Writer, Dataloader, Loss Fn, Optimizer
    assert_and_infer_cfg(args)
    prep_experiment(args, parser)
    writer = None

    _, _, _, extra_val_loaders, _ = datasets.setup_loaders(args)

    criterion, criterion_val = loss.get_loss(args)
    criterion_aux = loss.get_loss_aux(args)
    net = network.get_net(args, criterion, criterion_aux)

    optim, scheduler = optimizer.get_optimizer(args, net)

    net = torch.nn.SyncBatchNorm.convert_sync_batchnorm(net)
    net = network.warp_network_in_dataparallel(net, args.local_rank)
    epoch = 0
    i = 0

    if args.snapshot:
        epoch, mean_iu = optimizer.load_weights(net, optim, scheduler,
                            args.snapshot, args.restore_optimizer)

    print("#### iteration", i)
    torch.cuda.empty_cache()
    # Main Loop
    # for epoch in range(args.start_epoch, args.max_epoch):

    for dataset, val_loader in extra_val_loaders.items():
        print("Extra validating... This won't save pth file")
        validate(val_loader, dataset, net, criterion_val, optim, scheduler, epoch, writer, i, save_pth=False)
Пример #7
0
def train_network(network, args):
    if network is None:
        return

    device = torch.device("cuda" if args.gpu_flag is True else "cpu")
    network = network.to(device)
    optimizer, scheduler = get_optimizer(network, args)

    train_data_set = get_data_set(args, train_flag=True)
    test_data_set = get_data_set(args, train_flag=False)
    train_data_loader = torch.utils.data.DataLoader(train_data_set,
                                                    batch_size=args.batch_size,
                                                    shuffle=True)
    test_data_loader = torch.utils.data.DataLoader(test_data_set,
                                                   batch_size=args.batch_size,
                                                   shuffle=False)

    print("-*-" * 10 + "\n\t\tTrain network\n" + "-*-" * 10)
    for epoch in range(0, args.epoch):
        if args.pruned and args.alpha < 1:
            network = network.cpu()
            network = soft_prune_step(network, 1 - args.alpha)
            network = network.to(device)
        train_step(network, train_data_loader, test_data_loader, optimizer,
                   device, epoch)
        if scheduler is not None:
            scheduler.step()

    return network
def train_workers(dataset,
                  workers,
                  epochs,
                  training_steps,
                  cutoff,
                  optimizer,
                  test_size=1000):
    train_step, init_op, reset_opt = get_optimizer(optimizer)
    step = 0
    with tf.Session() as sess:
        sess.run(init_op)
        step_time = Timer()
        for pbt_step in range(1, training_steps + 1):
            for worker in workers:
                step += 1
                print('%d, ' % step, end='')
                print('%d, ' % worker['id'], end='')
                score_value = worker['score_value']
                train_epochs(sess, epochs, worker['hparams'][0], dataset,
                             train_step)
                train, test, valid = test_accuracy.test_graph(
                    sess, test_size, dataset)
                print('%f, %f, %f' % (train, test, valid))
                worker[
                    'score_value'] = train * test  #overfit_score.overfit_blended(train, test)
                worker['score'] = (1.0 + worker['score_value']) / (1.0 +
                                                                   score_value)
                pbt.tournament_replace(worker,
                                       workers,
                                       cutoff,
                                       dup_all=False,
                                       explore_fun=pbt.perturb_hparams)
            #pbt.pbt(workers, cutoff, dup_all=False)
            print('# step time %3.1fs, ' % step_time.split())
Пример #9
0
def train(config):
    # load Vocab
    src_vocab = data_reader.Vocab(vocab_limits=config['src_vocab_size'])
    src_vocab.load_metadata(config['metadata']['src'])
    config['src_vocab_size'] = src_vocab.vocab_size()

    tgt_vocab = data_reader.Vocab(vocab_limits=config['tgt_vocab_size'])
    tgt_vocab.load_metadata(config['metadata']['tgt'])
    config['tgt_vocab_size'] = tgt_vocab.vocab_size()
    tf.logging.info(config)

    initializer = tf.random_uniform_initializer(-config['init_scale'],
                                                config['init_scale'])

    # create models
    with tf.name_scope('Train'):
        opt, lr = optimizer.get_optimizer(config['optimizer'],
                                          config['learning_rate'])
        with tf.variable_scope("Model", reuse=None, initializer=initializer):
            train_model = model.Model(is_training=True,
                                      config=config,
                                      seq_length=config['tgt_length'] - 1,
                                      optimizer=opt,
                                      lr=lr)

    with tf.name_scope('Test'):
        with tf.variable_scope("Model", reuse=True):
            test_model = model.Model(is_training=False,
                                     config=config,
                                     seq_length=1)

    sv = tf.train.Supervisor(logdir=config['logdir'])
    sess_config = tf.ConfigProto(allow_soft_placement=True,
                                 log_device_placement=True)
    sess_config.gpu_options.allow_growth = True
    sess_config.gpu_options.per_process_gpu_memory_fraction = 0.9

    # load Data
    train_data = data_reader.DataReader(
        src_data=config['train_data']['src'][0],
        tgt_data=config['train_data']['tgt'][0],
        src_vocab=src_vocab,
        tgt_vocab=tgt_vocab,
        src_length=config['src_length'],
        tgt_length=config['tgt_length'],
        batch_size=config['batch_size'])

    tf.logging.info('Start Sess')
    with sv.managed_session(config=sess_config) as sess:
        for i in range(config['n_epoch']):
            lr_decay = config['lr_decay']**max(i + 1 - config['decay_epoch'],
                                               0)
            train_model.assign_lr(sess, config['learning_rate'] * lr_decay)

            tf.logging.info('Iter %d Start, Learning_rate: %.4f' %
                            (i, sess.run(train_model.lr)))
            loss = run_epoch(sess, train_model, train_data, is_training = True, \
                             t_model = test_model, src_vocab = src_vocab, tgt_vocab = tgt_vocab)
            tf.logging.info('Iter %d: training_loss: %.4f' %
                            (i, np.power(2, loss)))
Пример #10
0
def train_network(network, args):
    if network is None:
        if args.network == 'vgg':
            network = MyVGG()
        elif args.network == 'resnet':
            network = resnet32()

    device = torch.device("cuda" if args.gpu_flag is True else "cpu")
    network = network.to(device)
    optimizer, scheduler = get_optimizer(network, args)

    train_data_set = get_data_set(args, train_flag=True)
    test_data_set = get_data_set(args, train_flag=False)
    train_data_loader = torch.utils.data.DataLoader(train_data_set,
                                                    batch_size=args.batch_size,
                                                    shuffle=True)
    test_data_loader = torch.utils.data.DataLoader(test_data_set,
                                                   batch_size=args.batch_size,
                                                   shuffle=False)

    print("-*-" * 10 + "\n\t\tTrain network\n" + "-*-" * 10)
    for epoch in range(0, args.epoch):
        train_step(network, train_data_loader, test_data_loader, optimizer,
                   device, epoch)
        if scheduler is not None:
            scheduler.step()

    return network
def retrain_with_pseudo_label(loaded_models, train_ids, valid_ids, TRAIN_IMAGE_DIR, DATAFRAME, config):

    if 'pseudo_dataframe' not in loaded_models[list(loaded_models.keys())[0]]:
        return
    
    def worker_init_fn(worker_id):   
        random.seed(worker_id+random_seed)   
        np.random.seed(worker_id+random_seed) 

    for key in loaded_models.keys():    

        # make dataloader with pseudo label
        model_config = loaded_models[key]['config']
        dataframe_with_pseudo = pd.concat([DATAFRAME.loc[DATAFRAME['image_id'].isin(train_ids), :], loaded_models[key]['pseudo_dataframe']], axis=0)
        retrain_dataset = GWDDataset(dataframe_with_pseudo, TRAIN_IMAGE_DIR, model_config, is_train=True, do_transform=False)
        # dataset for retrain
        retrain_data_loader = DataLoader(retrain_dataset, batch_size=1, shuffle=True, num_workers=0, worker_init_fn=worker_init_fn, collate_fn=collate_fn)    

        model = copy.deepcopy(loaded_models[key]['model'])
        model.train()
        trainable_params = [p for p in model.parameters() if p.requires_grad]
        optimizer = get_optimizer(model_config['train']['optimizer'], trainable_params)

        # retraining
        print("Retraining %s" % key)
        for epoch in range(0, config['epochs']):
            if model_config['general']['kfold'] < 0:
                print("\r[Epoch %d]" % epoch)
            train_epoch(model, retrain_data_loader, None, optimizer)
        model.eval()
        loaded_models[key]['pseudo_model'] = model
    return 
Пример #12
0
def train(args):
    args, model, iters, vocab, ckpt_available = get_model_ckpt(args)

    if ckpt_available:
        print("loaded checkpoint {}".format(args.ckpt_name))
    loss_fn = get_loss(args, vocab)
    optimizer = get_optimizer(args, model)

    trainer = get_trainer(args, model, loss_fn, optimizer)
    metrics = get_metrics(args, vocab)
    evaluator = get_evaluator(args, model, loss_fn, metrics)

    logger = get_logger(args)
    @trainer.on(Events.STARTED)
    def on_training_started(engine):
        print("Begin Training")

    @trainer.on(Events.ITERATION_COMPLETED)
    def log_iter_results(engine):
        log_results(logger, 'train/iter', engine.state, engine.state.iteration)

    @trainer.on(Events.EPOCH_COMPLETED)
    def evaluate_epoch(engine):
        log_results(logger, 'train/epoch', engine.state, engine.state.epoch)
        state = evaluate_once(evaluator, iterator=iters['val'])
        log_results(logger, 'valid/epoch', state, engine.state.epoch)
        save_ckpt(args, engine.state.epoch, engine.state.metrics['loss'], model, vocab)

    trainer.run(iters['train'], max_epochs=args.max_epochs)
Пример #13
0
    def __init__(self, opt):
        super(Model, self).__init__()
        self.opt = opt
        self.classifier = Classifier(opt.model)  #.cuda(device=opt.device)
        #####################
        #    Init weights
        #####################
        # self.classifier.apply(weights_init)

        print_network(self.classifier)

        self.optimizer = get_optimizer(opt, self.classifier)
        self.scheduler = get_scheduler(opt, self.optimizer)

        # load networks
        # if opt.load:
        #     pretrained_path = opt.load
        #     self.load_network(self.classifier, 'G', opt.which_epoch, pretrained_path)
        # if self.training:
        #     self.load_network(self.discriminitor, 'D', opt.which_epoch, pretrained_path)

        self.avg_meters = ExponentialMovingAverage(0.95)
        self.save_dir = os.path.join(opt.checkpoint_dir, opt.tag)

        # with open('datasets/class_weight.pkl', 'rb') as f:
        #     class_weight = pickle.load(f, encoding='bytes')
        #     class_weight = np.array(class_weight, dtype=np.float32)
        #     class_weight = torch.from_numpy(class_weight).to(opt.device)
        #     if opt.class_weight:
        #         self.criterionCE = nn.CrossEntropyLoss(weight=class_weight)
        #     else:
        self.criterionCE = nn.CrossEntropyLoss()
Пример #14
0
    def __init__(self, config):

        self.cuda = int(config['cuda'])
        #torch.cuda.empty_cache()
        self.train_dataloader = get_dataloader(config, scope='train')
        self.val_dataloader = get_dataloader(config, scope='val')

        self.model = get_model(config)
        try:
            model_weights = 'experiment/' + config['dir'] + '/' + config[
                'weights']
            self.model.load_state_dict(torch.load(model_weights)['model'])
            print('Weigths loaded')
        except:
            print('Weights randomized')

        self.optimizer = get_optimizer(config, self.model)
        self.total_epochs = config['epochs']
        self.batches_per_epoch = config['batches_per_epoch']
        self.val_batches_per_epoch = config['val_batches_per_epoch']

        self.final_weights_file = 'experiment/' + config[
            'dir'] + '/weights_last.pth'
        self.best_weights_file = 'experiment/' + config[
            'dir'] + '/weights_best.pth'
        self.log_file = 'experiment/' + config['dir'] + '/logs.csv'

        self.loss_dict = {
            'sample_name': config['sample_name'],
            'output_name': config['output_name'],
            'loss': [get_criterion(x) for x in config['loss_criterion']],
            'weight': config['loss_weight']
        }

        self.train_fe = bool(config['train_feature_extractor'])
Пример #15
0
def main():
    '''
    Main Function

    '''

    #Set up the Arguments, Tensorboard Writer, Dataloader, Loss Fn, Optimizer
    assert_and_infer_cfg(args)
    writer = prep_experiment(args, parser)
    train_loader, val_loader, train_obj = datasets.setup_loaders(args)
    criterion, criterion_val = loss.get_loss(args)
    net = network.get_net(args, criterion)
    optim, scheduler = optimizer.get_optimizer(args, net)

    torch.cuda.empty_cache()

    if args.evaluate:
        # Early evaluation for benchmarking
        validate(val_loader, net, criterion_val, optim, epoch, writer)
        evaluate(val_loader, net)
        return

    #Main Loop
    for epoch in range(args.start_epoch, args.max_epoch):
        # Update EPOCH CTR
        cfg.immutable(False)
        cfg.EPOCH = epoch
        cfg.immutable(True)

        scheduler.step()

        train(train_loader, net, criterion, optim, epoch, writer)
        validate(val_loader, net, criterion_val, optim, epoch, writer)
Пример #16
0
    def __init__(self, opt, logger=None):
        super(Model, self).__init__(config, kwargs)
        self.opt = opt
        # cfgfile = 'yolo-voc.cfg'
        # self.detector = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
        # in_features = self.detector.roi_heads.box_predictor.cls_score.in_features
        #
        # # replace the pre-trained head with a new one
        # self.detector.roi_heads.box_predictor = FastRCNNPredictor(in_features, config.DATA.NUM_CLASSESS + 1)
        self.detector = yolov4(inference=True,
                               n_classes=config.DATA.NUM_CLASSESS)

        # """
        # 预训练模型
        # """
        # pretrained_dict = torch.load('pretrained/yolov4.pth')
        # self.detector.load_state_dict(pretrained_dict)

        self.yolov4loss = Yolo_loss(device=opt.device, batch=opt.batch_size)
        #####################
        #    Init weights
        #####################
        # normal_init(self.detector)

        if opt.debug:
            print_network(self.detector)

        self.optimizer = get_optimizer(opt, self.detector)
        self.scheduler = get_scheduler(opt, self.optimizer)

        self.avg_meters = ExponentialMovingAverage(0.95)
        self.save_dir = os.path.join('checkpoints', opt.tag)
Пример #17
0
    def __init__(self,
                 log_dir: str,
                 cfg: EasyDict,
                 use_cuda: bool = True,
                 use_multi_gpu: bool = True,
                 load_ckpt: str = "",
                 is_train=True):
        self.use_multi_gpu = use_multi_gpu
        self.use_cuda = use_cuda
        self.cfg = cfg
        self.log_dir = Path(log_dir)

        self.timer_start_train = 0
        self.det_best_field_current = 0
        self.det_best_field_best = 0
        self.best_epoch = 0

        # logger / metrics
        self.metrics_fh = None
        if is_train:
            os.makedirs(self.log_dir, exist_ok=True)
            metrics_file = self.log_dir / "train_metrics.csv"
            metric_keys = utils.get_csv_header_keys(
                cfg.training.compute_clip_retrieval)
            self.metrics_fh = metrics_file.open("wt", encoding="utf8")
            self.metrics_writer = csv.DictWriter(self.metrics_fh, metric_keys)
            self.metrics_writer.writeheader()
            self.metrics_fh.flush()
            utils.dump_config(cfg, self.log_dir / "config.yaml")
        self.logger = utils.get_logger(self.log_dir,
                                       "trainer",
                                       log_file=is_train)
        # model
        self.model = CootModel(cfg, use_cuda, use_multi_gpu)

        # contrastive loss
        self.loss_f_contr = ContrastiveLoss(use_cuda)

        # cycle consistency loss
        self.loss_f_cyclecons = None
        if cfg.training.loss_cycle_cons != 0:
            self.loss_f_cyclecons = CycleConsistencyLoss(num_samples=1,
                                                         use_cuda=use_cuda)

        # optimizer
        self.optimizer = get_optimizer(cfg.optimizer, self.model.get_params())

        # scheduler
        self.lr_scheduler = ReduceLROnPlateauWarmup(
            self.optimizer,
            cfg.scheduler.warmup,
            mode="max",
            patience=cfg.scheduler.patience,
            cooldown=cfg.scheduler.cooldown)

        if load_ckpt != "":
            self.logger.info(f"Load checkpoint {load_ckpt}")
            self.model.load_checkpoint(load_ckpt)
Пример #18
0
    def __init__(self, opt, logger=None):
        super(Model, self).__init__()
        self.opt = opt
        self.logger = logger

        kargs = {}
        if opt.scale:
            min_size = opt.scale
            max_size = int(min_size / 3 * 4)
            kargs = {
                'min_size': min_size,
                'max_size': max_size,
            }

        kargs.update({'box_nms_thresh': nms_thresh})

        # 定义backbone和Faster RCNN模型
        if opt.backbone is None or opt.backbone.lower() in [
                'res50', 'resnet50'
        ]:
            # 默认是带fpn的resnet50
            self.detector = fasterrcnn_resnet50_fpn(pretrained=False, **kargs)

            in_features = self.detector.roi_heads.box_predictor.cls_score.in_features

            # replace the pre-trained head with a new one
            self.detector.roi_heads.box_predictor = FastRCNNPredictor(
                in_features, opt.num_classes + 1)

        elif opt.backbone.lower() in ['vgg16', 'vgg']:
            backbone = vgg16_backbone()
            self.detector = FasterRCNN(backbone,
                                       num_classes=opt.num_classes + 1,
                                       **kargs)

        elif opt.backbone.lower() in ['res101', 'resnet101']:
            # 不带FPN的resnet101
            backbone = res101_backbone()
            self.detector = FasterRCNN(backbone,
                                       num_classes=opt.num_classes + 1,
                                       **kargs)

        elif opt.backbone.lower() in ['res', 'resnet']:
            raise RuntimeError(
                f'backbone "{opt.backbone}" is ambiguous, please specify layers.'
            )

        else:
            raise NotImplementedError(f'no such backbone: {opt.backbone}')

        print_network(self.detector)

        self.optimizer = get_optimizer(opt, self.detector)
        self.scheduler = get_scheduler(opt, self.optimizer)

        self.avg_meters = ExponentialMovingAverage(0.95)
        self.save_dir = os.path.join(opt.checkpoint_dir, opt.tag)
Пример #19
0
    def __init__(self, opt):
        super(Model, self).__init__()
        self.opt = opt
        self.classifier = Classifier(opt.model)
        # self.classifier.apply(weights_init)  # 初始化权重

        print_network(self.classifier)

        self.optimizer = get_optimizer(opt, self.classifier)
        self.scheduler = get_scheduler(opt, self.optimizer)
Пример #20
0
def train(args):
    args, model, iters, vocab, ckpt_available = get_model_ckpt(args)

    if ckpt_available:
        print("loaded checkpoint {}".format(args.ckpt_name))
    loss_fn = get_loss(args, vocab)
    optimizer = get_optimizer(args, model)

    pretrainer = get_pretrainer(args, model, loss_fn, optimizer)
    trainer = get_trainer(args, model, loss_fn, optimizer)

    metrics = get_metrics(args, vocab)
    evaluator = get_evaluator(args, model, loss_fn, metrics)

    logger = get_logger(args)

    @pretrainer.on(Events.STARTED)
    def on_training_started(engine):
        print("Begin Pretraining")

    @pretrainer.on(Events.ITERATION_COMPLETED)
    def log_iter_results(engine):
        log_results(logger, 'pretrain/iter', engine.state, engine.state.iteration)

    @pretrainer.on(Events.EPOCH_COMPLETED)
    def evaluate_epoch(engine):
        log_results(logger, 'pretrain/epoch', engine.state, engine.state.epoch)

    """
    @pretrainer.on(Events.COMPLETED)
    def unfreeze_language_model(engine):
        for param in model.module.language_model.base_model.parameters():
            param.requires_grad = True
    """

    @trainer.on(Events.STARTED)
    def on_training_started(engine):
        print("Begin Training")

    @trainer.on(Events.ITERATION_COMPLETED)
    def log_iter_results(engine):
        log_results(logger, 'train/iter', engine.state, engine.state.iteration)

    @trainer.on(Events.EPOCH_COMPLETED)
    def evaluate_epoch(engine):
        log_results(logger, 'train/epoch', engine.state, engine.state.epoch)
        state = evaluate_once(evaluator, iterator=iters['val'])
        log_results(logger, 'valid/epoch', state, engine.state.epoch)
        log_results_cmd('valid/epoch', state, engine.state.epoch)
        save_ckpt(args, engine.state.epoch, engine.state.metrics['loss'], model, vocab)
        evaluate_by_logic_level(args, model, iterator=iters['val'])

    if args.pretrain_epochs > 0:
        pretrainer.run(iters['pretrain'], max_epochs=args.pretrain_epochs) 
    trainer.run(iters['train'], max_epochs=args.max_epochs)
Пример #21
0
def input_stream():
    model = TinyYolo()
    train_loader, test_loader, val_loader = getdatasets('./data/', batch_size=16)


    for i, train_data in enumerate(train_loader):
        opt = get_optimizer(model, [0.00001])
        out,train_map = train(model, train_data, opt=opt, iou_thresh=0.1 )
        print( out)
        print(train_map)
        print()
        print()
def train(config):
    # load Vocab
    vocab = data_reader.Vocab(vocab_limits=config['vocab_size'])
    vocab.load_metadata(config['metadata'])
    config['vocab_size'] = vocab.vocab_size()
    tf.logging.info(config)

    initializer = tf.random_uniform_initializer(-config['init_scale'],
                                                config['init_scale'])

    # create models
    with tf.name_scope('Train'):
        opt, lr = optimizer.get_optimizer("sgd", config['learning_rate'])
        with tf.variable_scope("Model", reuse=None, initializer=initializer):
            train_model = model.Model(is_training = True, \
                                      config = config, \
                                      optimizer = opt,
                                      lr = lr)

    with tf.name_scope('Generate'):
        generate_config = copy.deepcopy(config)
        generate_config['batch_size'] = 1
        generate_config['seq_length'] = 1
        with tf.variable_scope("Model", reuse=True, initializer=initializer):
            gen_model = model.Model(is_training=False, config=generate_config)

    sv = tf.train.Supervisor(logdir=config['logdir'])
    sess_config = tf.ConfigProto(allow_soft_placement=True,
                                 log_device_placement=False)
    sess_config.gpu_options.allow_growth = True
    sess_config.gpu_options.per_process_gpu_memory_fraction = 0.5

    tf.logging.info('Start Sess')
    with sv.managed_session(config=sess_config) as sess:
        for i in range(config['n_epoch']):
            lr_decay = config['lr_decay']**max(i + 1 - config['decay_epoch'],
                                               0)
            train_model.assign_lr(sess, config['learning_rate'] * lr_decay)

            tf.logging.info('Iter %d Start, Learning_rate: %.4f' %
                            (i, sess.run(train_model.lr)))
            costs, speed = run_epoch(sess,
                                     train_model,
                                     datapath=config['train_data'][0],
                                     config=config,
                                     is_training=True,
                                     gen_model=gen_model,
                                     vocab=vocab)
            tf.logging.info(
                'Iter %d: training_loss:%.4f, speed %.4f words/sec' %
                (i, np.exp(costs), speed))
Пример #23
0
def main():

    """
    Main Function
    """

    # Set up the Arguments, Tensorboard Writer, Dataloader, Loss Fn, Optimizer
    assert_and_infer_cfg(args)
    writer = prep_experiment(args, parser)
    train_loader, val_loader, train_obj = datasets.setup_loaders(args)
    criterion, criterion_val = loss.get_loss(args)
    net = network.get_net(args, criterion)
    optim, scheduler = optimizer.get_optimizer(args, net)

    if args.fix_bn:
        net.apply(set_bn_eval)
        print("Fix bn for finetuning")

    if args.fp16:
        net, optim = amp.initialize(net, optim, opt_level="O1")

    net = network.wrap_network_in_dataparallel(net, args.apex)
    if args.snapshot:
        optimizer.load_weights(net, optim,
                               args.snapshot, args.restore_optimizer)
    if args.evaluateF:
        assert args.snapshot is not None, "must load weights for evaluation"
        evaluate(val_loader, net, args)
        return
    torch.cuda.empty_cache()
    # Main Loop
    for epoch in range(args.start_epoch, args.max_epoch):
        # Update EPOCH CTR
        cfg.immutable(False)
        cfg.EPOCH = epoch
        cfg.immutable(True)

        scheduler.step()
        train(train_loader, net, optim, epoch, writer)
        if args.apex:
            train_loader.sampler.set_epoch(epoch + 1)
        validate(val_loader, net, criterion_val,
                 optim, epoch, writer)
        if args.class_uniform_pct:
            if epoch >= args.max_cu_epoch:
                train_obj.build_epoch(cut=True)
                if args.apex:
                    train_loader.sampler.set_num_samples()
            else:
                train_obj.build_epoch()
    def _initialize(self, batch_steps_per_epoch, output_path):
        self.cost = get_cost(self.net.logits, self.tgt, self.cost_kwargs)
        self.optimizer, self.ema, self.learning_rate_node = get_optimizer(
            self.cost, self.global_step, batch_steps_per_epoch,
            self.opt_kwargs)

        init = tf.global_variables_initializer()
        if not output_path is None:
            output_path = os.path.abspath(output_path)
            if not os.path.exists(output_path):
                print("Allocating '{:}'".format(output_path))
                os.makedirs(output_path)

        return init
def train_new_model(model, train_queue, valid_queue, test_queue):
    ori_model = model.module if args.distributed else model
    optimizer = get_optimizer(model, args)
    scheduler = get_scheduler(optimizer, args)
    drop_layers = ori_model.drop_layers()
    criterion = get_criterion(args.classes, args.label_smoothing)

    for epoch in range(args.epochs):
        scheduler.step()
        if args.warmup and epoch < args.warmup_epochs:
            lr = args.learning_rate * epoch / args.warmup_epochs + args.warmup_lr
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr
            cond_logging('epoch %d lr %e', epoch, lr)
        else:
            lr = scheduler.get_lr()[0]
            cond_logging('epoch %d lr %e', epoch, lr)

        if args.distributed:
            train_queue.sampler.set_epoch(epoch)
        if args.epd:
            drop_rate = args.drop_rate * epoch / args.epochs
        else:
            drop_rate = args.drop_rate
        drop_rates = [drop_rate] * drop_layers
        if args.layerd:
            for i in range(drop_layers):
                drop_rates[i] = drop_rates[i] * (i + 1) / drop_layers
        ori_model.set_drop_rates(drop_rates)
        cond_logging('drop rates:')
        cond_logging(ori_model.drop_rates)

        #training
        train_acc, train_obj = train(train_queue, model, criterion, optimizer,
                                     lr, args.report_freq, args.world_size,
                                     args.distributed, args.local_rank)

        cond_logging('train acc %f', train_acc)
        #validation
        drop_rates = [0] * drop_layers
        ori_model.set_drop_rates(drop_rates)
        valid_acc, valid_obj = infer(valid_queue, model, criterion,
                                     args.report_freq, args.world_size,
                                     args.distributed, args.local_rank)
        cond_logging('valid acc %f', valid_acc)
        test_acc, test_obj = infer(test_queue, model, criterion,
                                   args.report_freq, args.world_size,
                                   args.distributed, args.local_rank)
        cond_logging('test acc %f', test_acc)
    return model
Пример #26
0
def train_network(args, network=None, data_set=None):
    device = torch.device("cuda" if args.gpu_no >= 0 else "cpu")

    if network is None:
        if args.data_set == 'CIFAR10':
            if 'vgg' in args.network:
                network = VGG(args.network, args.data_set)

    network = network.to(device)
    print(network)

    if data_set is None:
        data_set = get_data_set(args, train_flag=True)

    loss_calculator = Loss_Calculator()

    optimizer, scheduler = get_optimizer(network, args)

    if args.resume_flag:
        check_point = torch.load(args.load_path)
        network.load_state_dict(check_point['state_dict'])
        loss_calculator.loss_seq = check_point['loss_seq']
        args.start_epoch = check_point['epoch']  # update start epoch

    print("Start at %s" % time.ctime())
    print("-*-" * 10 + "\n\tTrain network\n" + "-*-" * 10)
    for epoch in range(args.start_epoch, args.epoch):
        print("---------- EPOCH %d ----------" % (epoch + 1))
        # make shuffled data loader
        data_loader = torch.utils.data.DataLoader(data_set,
                                                  batch_size=args.batch_size,
                                                  shuffle=True)

        # train one epoch
        train_step(network, data_loader, loss_calculator, optimizer, device,
                   epoch, args.print_freq)

        # adjust learning rate
        if scheduler is not None:
            scheduler.step()

        torch.save(
            {
                'epoch': epoch + 1,
                'state_dict': network.state_dict(),
                'loss_seq': loss_calculator.loss_seq
            }, args.save_path + args.network + '_checkpoint.pth')
    print("End at %s" % time.ctime())
    return network
Пример #27
0
def load_model(dataset, rc, experiment_name):
    loss = LossCombiner(4, dataset.class_weights, NllLoss)
    transformer = TransformerEncoder(dataset.source_embedding,
                                     hyperparameters=rc)
    model = JointAspectTagger(transformer, rc, 4, 20, dataset.target_names)
    optimizer = get_optimizer(model, rc)
    trainer = Trainer(model,
                      loss,
                      optimizer,
                      rc,
                      dataset,
                      experiment_name,
                      enable_tensorboard=False,
                      verbose=False)
    return trainer
Пример #28
0
    def run_WrappedVOCSBDSegmentation5i_network():
        from torch.utils import data
        from model.head.pgn import PGN
        import torch.nn.functional as F
        from loss import cross_entropy2d
        from optimizer import get_optimizer

        batch_size = 4
        epoch = 1

        train_set = WrappedVOCSBDSegmentation5i(
            root=roots_path,
            fold=1,
            # remember to run both train and test set
            split='test',
            rebuild_mask=False,
            img_size=224)
        train_loader = data.DataLoader(train_set,
                                       batch_size=batch_size,
                                       shuffle=True,
                                       num_workers=8)

        model = PGN()

        optim = get_optimizer()(model.parameters(),
                                lr=0.0025,
                                momentum=0.9,
                                dampening=0,
                                weight_decay=0,
                                nesterov=False)

        for e in range(epoch):
            for i_iter, data in enumerate(train_loader):
                Is, Ys, Iq, Yq, sample_class, _, _ = data
                Ys, Yq = Ys.unsqueeze(1).float(), Yq.unsqueeze(1).float()

                pred = model(Is, Ys, Iq)

                pred = F.interpolate(pred,
                                     size=Yq.size()[-2:],
                                     mode='bilinear',
                                     align_corners=True)

                loss = cross_entropy2d(pred, Yq.long())
                optim.zero_grad()
                loss.backward()
                optim.step()
                print(loss.item(), sample_class)
Пример #29
0
    def __init__(self, opt):
        super(Model, self).__init__()
        self.opt = opt
        self.cleaner = FFA().to(device=opt.device)
        #####################
        #    Init weights
        #####################
        # normal_init(self.cleaner)

        print_network(self.cleaner)

        self.g_optimizer = get_optimizer(opt, self.cleaner)
        self.scheduler = get_scheduler(opt, self.g_optimizer)

        self.avg_meters = ExponentialMovingAverage(0.95)
        self.save_dir = os.path.join(opt.checkpoint_dir, opt.tag)
Пример #30
0
def search_grid_epochs(dataset,
                       epochs,
                       learnlist=[0.1],
                       optimizer='sgd',
                       start_wid=0,
                       test_size=1000):
    train_step, init_op, reset_opt = get_optimizer(optimizer)
    worker_time = Timer()
    with tf.Session() as sess:
        for wid, learn_rate in enumerate(learnlist):
            step = 0
            sess.run(init_op)
            for e in range(epochs):
                step = train_epochs(sess, wid + start_wid, 1, step, learn_rate,
                                    dataset, test_size, train_step)
            print('# worker time %3.1fs' % worker_time.split())
Пример #31
0
def main():
    # spawn theano vars
    xl = T.matrix('xl')
    xr = T.matrix('xr')
    y = T.ivector('y')
    learning_rate = T.scalar('learning_rate')
    trng = RandomStreams(4321)

    # use test values
    """
    import numpy as np
    batch_size = 10
    theano.config.compute_test_value = 'raise'
    xl.tag.test_value = np.random.randn(batch_size, 392).astype(floatX)
    xr.tag.test_value = np.random.randn(batch_size, 392).astype(floatX)
    y.tag.test_value = np.random.randint(8, size=batch_size).astype(np.int32)
    learning_rate.tag.test_value = 0.5
    """

    # build cgs
    model_l, model_r, model_b = build_model(
        xl, xr, y, learning_rate, trng=trng,
        **options)

    # compile
    opt = get_optimizer(options['optimizer'])
    f_train_l = opt(learning_rate, model_l, [xl, y])
    f_train_r = opt(learning_rate, model_r, [xr, y])
    f_train_b = opt(learning_rate, model_b, [xl, xr, y], return_alpha=True)

    # compile validation/test functions
    f_valid_l = theano.function([xl, y], [model_l.cost, model_l.acc])
    f_valid_r = theano.function([xr, y], [model_r.cost, model_r.acc])
    f_valid_b = theano.function([xl, xr, y], [model_b.cost, model_b.acc])

    # training loop
    train(f_train_l, f_train_r, f_train_b, f_valid_l, f_valid_r, f_valid_b,
          xl, xr, y, **options)