Пример #1
0
 def __init__(self,
              model_factory,
              trainloader,
              valloader,
              params,
              dataset_cfg,
              experiment_cfg,
              best_metric_recorder,
              restore_checkpoint=None,
              logger=None):
     logger = logger if logger else Logger.get()
     items = model_factory(params)
     self.model = items['model']
     self.optimizer = items['optimizer']
     self.criterion = items['criterion']
     self.metrics = items['metrics']
     self.best_metric_recorder = best_metric_recorder
     self.trainloader = trainloader
     self.valloader = valloader
     self.num_epochs = params.num_epochs
     self.running_avg_steps = params.running_avg_steps
     self.checkpoint = Checkpoint(
         checkpoint_dir=experiment_cfg.experiment_dir(),
         filename=experiment_cfg.checkpoint_filename(),
         best_checkpoint=experiment_cfg.best_checkpoint(),
         latest_checkpoint=experiment_cfg.latest_checkpoint(),
         logger=logger)
     self.best_metrics_file = experiment_cfg.best_metrics_file(
         dataset_cfg.val_name())
     self.latest_metrics_file = experiment_cfg.latest_metrics_file(
         dataset_cfg.val_name())
     self.restore_checkpoint = restore_checkpoint
     self.logger = logger
Пример #2
0
 def __init__(self,
              model_factory,
              dataset,
              params,
              dataset_cfg,
              experiment_cfg,
              restore_checkpoint,
              logger=None):
     items = model_factory(params)
     self.logger = logger if logger else Logger.get()
     self.model = items['model']
     # self.optimizer = items['optimizer']
     self.criterion = items['criterion']
     self.metrics = items['metrics']
     # restore model from checkpoint
     checkpoint = Checkpoint(
         checkpoint_dir=experiment_cfg.experiment_dir(),
         filename=experiment_cfg.checkpoint_filename(),
         best_checkpoint=experiment_cfg.best_checkpoint(),
         latest_checkpoint=experiment_cfg.latest_checkpoint(),
         logger=self.logger)
     status = checkpoint.restore(self.model, None, restore_checkpoint)
     assert status, "Restore model from the checkpoint: {}, failed".format(
         restore_checkpoint)
     self.dataset = dataset
     self.metrics_file = experiment_cfg.metrics_file(
         restore_checkpoint, self.dataset.dataset_name)
Пример #3
0
    def __init__(self, args):
        self.config = args
        # parameters
        self.start_epoch = 1
        self.max_epoch = args.max_epoch
        self.save_dir = args.save_dir
        self.device = args.device
        self.verbose = args.verbose
        self.max_points = args.max_points
        self.voxel_size = args.voxel_size

        self.model = args.model.to(self.device)
        self.optimizer = args.optimizer
        self.scheduler = args.scheduler
        self.scheduler_freq = args.scheduler_freq
        self.snapshot_freq = args.snapshot_freq
        self.snapshot_dir = args.snapshot_dir
        self.benchmark = args.benchmark
        self.iter_size = args.iter_size
        self.verbose_freq = args.verbose_freq

        self.w_circle_loss = args.w_circle_loss
        self.w_overlap_loss = args.w_overlap_loss
        self.w_saliency_loss = args.w_saliency_loss
        self.desc_loss = args.desc_loss

        self.best_loss = 1e5
        self.best_recall = -1e5
        self.writer = SummaryWriter(log_dir=args.tboard_dir)
        self.logger = Logger(args.snapshot_dir)
        self.logger.write(
            f'#parameters {sum([x.nelement() for x in self.model.parameters()])/1000000.} M\n'
        )

        if (args.pretrain != ''):
            self._load_pretrain(args.pretrain)

        self.loader = dict()
        self.loader['train'] = args.train_loader
        self.loader['val'] = args.val_loader
        self.loader['test'] = args.test_loader

        with open(f'{args.snapshot_dir}/model', 'w') as f:
            f.write(str(self.model))
        f.close()
Пример #4
0
    parser.add_argument('--encoding',
                        default='utf8',
                        help="The encoding for input and output file.")

    args = parser.parse_args()
    dataset_cfg = DatasetCfg(args.data_dir)
    exp_cfg = ExperimentCfg(args.exp_dir)
    inputs_file = args.inputs_file
    outputs_file = args.outputs_file
    restore_checkpoint = args.restore_checkpoint
    encoding = args.encoding

    msg = "Inputs file not exists: {}"
    assert os.path.isfile(inputs_file), msg.format(inputs_file)

    logger = Logger.set(os.path.join(exp_cfg.experiment_dir(), 
                                     'predict.log'))

    checkpoint = Checkpoint(
        checkpoint_dir=exp_cfg.experiment_dir(),
        filename=exp_cfg.checkpoint_filename(),
        best_checkpoint=exp_cfg.best_checkpoint(),
        latest_checkpoint=exp_cfg.latest_checkpoint(),
        logger=logger)

    # load params
    word_vocab = Vocab(words_file)
    tag_vocab = Vocab(tags_file)

    params = Params(exp_cfg.params_file())
    params.update(Params(dataset_cfg.params_file()))
    params.set('cuda', torch.cuda.is_available())
Пример #5
0
            epochs=args.epochs)
    elif args.i_what == 'iFlow':
        metadata.update({"device": device})
        model = iFlow(args=metadata).to(device)

    optimizer = optim.Adam(model.parameters(), lr=args.lr)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, \
                                                     factor=args.lr_drop_factor, \
                                                     patience=args.lr_patience, \
                                                     verbose=True) # factor=0.1 and patience=4

    ste = time.time()
    print('setup time: {}s'.format(ste - st))

    # setup loggers
    logger = Logger(logdir=LOG_FOLDER)  # 'log/'
    exp_id = logger.get_id()  # 1

    tensorboard_run_name = TENSORBOARD_RUN_FOLDER + 'exp' + str(
        exp_id) + '_'.join(
            map(str, [
                '', args.batch_size, args.max_iter, args.lr, args.hidden_dim,
                args.depth, args.anneal
            ]))
    # 'runs/exp1_64_12500_0.001_50_3_False'

    writer = SummaryWriter(logdir=tensorboard_run_name)

    if args.i_what == 'iFlow':
        logger.add('log_normalizer')
        logger.add('neg_log_det')
Пример #6
0
def logger_instance(D_learnrate, G_learnrate, Dataset_name):
    return Logger(model_name='GAN D_LR=' + str(D_learnrate) + ' G_LR' +
                  str(G_learnrate),
                  data_name=Dataset_name)
Пример #7
0
import argparse
import os
import align.detect_face as detect_face
import cv2
import numpy as np
import tensorflow as tf
from lib.face_utils import judge_side_face
from lib.utils import Logger, mkdir
from src.sort import Sort
logger = Logger(__name__)


def main():
    global colours, img_size
    args = parse_args()
    #一个或多个视频存放路径
    root_dir = args.root_dir
    #采集并裁剪人脸保存路径
    output_path = args.output_path
    display = args.display
    mkdir(output_path)

    if display:
        colours = np.random.rand(32, 3)

    #初始化tracker
    tracker = Sort()

    logger.info('start track and extract......')
    with tf.Graph().as_default():
        with tf.Session(
Пример #8
0
def main(args):

    # fix random seeds
    if args.seed:
        torch.manual_seed(args.seed)
        torch.cuda.manual_seed_all(args.seed)
        np.random.seed(args.seed)

    # CNN
    if args.verbose:
        print('Architecture: {}'.format(args.arch))
    model = models.__dict__[args.arch](sobel=args.sobel, dropout=args.dropout)
    fd = int(model.top_layer.weight.size()[1])
    model.top_layer = None
    model.features = torch.nn.DataParallel(model.features)
    model.cuda()
    cudnn.benchmark = True

    # create optimizer
    optimizer = torch.optim.SGD(
        filter(lambda x: x.requires_grad, model.parameters()),
        lr=args.learning_rate,
        momentum=args.momentum,
        weight_decay=10**args.weight_decay,
    )

    # define loss function
    criterion = nn.CrossEntropyLoss().cuda()

    restore(model, args.resume)

    # creating checkpoint repo
    exp_check = os.path.join(args.experiment, 'checkpoints')
    if not os.path.isdir(exp_check):
        os.makedirs(exp_check)

    # creating cluster assignments log
    cluster_log = Logger(os.path.join(args.experiment, 'clusters'))

    # preprocessing of data
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    tra = [
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(), normalize
    ]

    # load the data
    end = time.time()
    dataset = datasets.ImageFolder(args.data,
                                   transform=transforms.Compose(tra))
    if args.verbose:
        print('Load dataset: {0:.2f} s'.format(time.time() - end))

    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=args.batch,
                                             num_workers=args.workers,
                                             pin_memory=True)

    algs = {
        'KMeans': clustering.KMeans,
        'PIC': clustering.PIC,
    }
    cluster_alg = algs[args.cluster_alg](args.nmb_cluster)

    # training convnet with cluster_alg
    for epoch in range(args.start_epoch, args.epochs):
        end = time.time()

        # remove head
        model.top_layer = None
        model.classifier = nn.Sequential(
            *list(model.classifier.children())[:-1])

        # get the features for the whole dataset
        features = compute_features(dataloader, model, len(dataset),
                                    args.batch)

        # cluster the features
        if args.verbose:
            print('Cluster the features')
        clustering_loss = cluster_alg.cluster(features, verbose=args.verbose)

        # assign pseudo-labels
        if args.verbose:
            print('Assign real labels')
        # train_dataset = cluster_assign(cluster_alg.images_lists,
        #                                           dataset.imgs)
        train_dataset = cluster_assign_with_original_labels(dataset.imgs)

        # uniformly sample per target
        sampler = UnifLabelSampler(int(args.reassign * len(train_dataset)),
                                   cluster_alg.images_lists)

        train_dataloader = torch.utils.data.DataLoader(
            train_dataset,
            batch_size=args.batch,
            num_workers=args.workers,
            sampler=sampler,
            pin_memory=True,
        )

        # set last fully connected layer
        mlp = list(model.classifier.children())
        mlp.append(nn.ReLU(inplace=True).cuda())
        model.classifier = nn.Sequential(*mlp)
        model.top_layer = nn.Linear(fd, len(cluster_alg.images_lists))
        model.top_layer.weight.data.normal_(0, 0.01)
        model.top_layer.bias.data.zero_()
        model.top_layer.cuda()

        # train network with clusters as pseudo-labels
        end = time.time()

        for x in range(1000):
            loss = train(train_dataloader, model, criterion, optimizer, epoch)

        # print log
        if args.verbose:
            print('###### Epoch [{0}] ###### \n'
                  'Time: {1:.3f} s\n'
                  'Clustering loss: {2:.3f} \n'
                  'ConvNet loss: {3:.3f}'.format(epoch,
                                                 time.time() - end,
                                                 clustering_loss, loss))
            try:
                nmi = normalized_mutual_info_score(
                    arrange_clustering(cluster_alg.images_lists),
                    arrange_clustering(cluster_log.data[-1]))
                print('NMI against previous assignment: {0:.3f}'.format(nmi))
            except IndexError:
                pass
            print('####################### \n')
        # save running checkpoint
        torch.save(
            {
                'epoch': epoch + 1,
                'arch': args.arch,
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict()
            }, os.path.join(args.experiment, 'checkpoint.pth.tar'))

        # save cluster assignments
        cluster_log.log(cluster_alg.images_lists)
Пример #9
0
if __name__ == '__main__':
    dataset_cfg = DatasetCfg(config.data_dir)
    exp_cfg = ExperimentCfg(config.base_model_dir)
    parser = get_parser(data_dir=dataset_cfg.data_dir(),
                        exp_dir=exp_cfg.experiment_dir(),
                        restore_checkpoint=exp_cfg.best_checkpoint(),
                        dataset_name=dataset_cfg.test_name())

    args = parser.parse_args()
    dataset_name = args.dataset_name
    restore_checkpoint = args.restore_checkpoint
    dataset_cfg.set_data_dir(args.data_dir)
    exp_cfg.set_experiment_dir(args.exp_dir)

    # set logger
    logger = Logger.set(exp_cfg.evaluate_log())

    # load model configuration
    logger.info("Loading the experiment configurations...")
    params = Params(exp_cfg.params_file())
    # cuda flag
    params.set('cuda', torch.cuda.is_available())
    logger.info("- done.")

    # load datesets
    logger.info("Loading the {} dataset...".format(dataset_name))
    # add datasets parameters into params
    params.update(Params(dataset_cfg.params_file()))
    dataset = load_data(params, dataset_cfg.data_dir(), dataset_name,
                        params['{}_size'.format(dataset_name)])
    logger.info("- done.")
Пример #10
0
class Trainer(object):
    def __init__(self, args):
        self.config = args
        # parameters
        self.start_epoch = 1
        self.max_epoch = args.max_epoch
        self.save_dir = args.save_dir
        self.device = args.device
        self.verbose = args.verbose
        self.max_points = args.max_points
        self.voxel_size = args.voxel_size

        self.model = args.model.to(self.device)
        self.optimizer = args.optimizer
        self.scheduler = args.scheduler
        self.scheduler_freq = args.scheduler_freq
        self.snapshot_freq = args.snapshot_freq
        self.snapshot_dir = args.snapshot_dir
        self.benchmark = args.benchmark
        self.iter_size = args.iter_size
        self.verbose_freq = args.verbose_freq

        self.w_circle_loss = args.w_circle_loss
        self.w_overlap_loss = args.w_overlap_loss
        self.w_saliency_loss = args.w_saliency_loss
        self.desc_loss = args.desc_loss

        self.best_loss = 1e5
        self.best_recall = -1e5
        self.writer = SummaryWriter(log_dir=args.tboard_dir)
        self.logger = Logger(args.snapshot_dir)
        self.logger.write(
            f'#parameters {sum([x.nelement() for x in self.model.parameters()])/1000000.} M\n'
        )

        if (args.pretrain != ''):
            self._load_pretrain(args.pretrain)

        self.loader = dict()
        self.loader['train'] = args.train_loader
        self.loader['val'] = args.val_loader
        self.loader['test'] = args.test_loader

        with open(f'{args.snapshot_dir}/model', 'w') as f:
            f.write(str(self.model))
        f.close()

    def _snapshot(self, epoch, name=None):
        state = {
            'epoch': epoch,
            'state_dict': self.model.state_dict(),
            'optimizer': self.optimizer.state_dict(),
            'scheduler': self.scheduler.state_dict(),
            'best_loss': self.best_loss,
            'best_recall': self.best_recall
        }
        if name is None:
            filename = os.path.join(self.save_dir, f'model_{epoch}.pth')
        else:
            filename = os.path.join(self.save_dir, f'model_{name}.pth')
        self.logger.write(f"Save model to {filename}\n")
        torch.save(state, filename)

    def _load_pretrain(self, resume):
        if os.path.isfile(resume):
            state = torch.load(resume)
            self.model.load_state_dict(state['state_dict'])
            self.start_epoch = state['epoch']
            self.scheduler.load_state_dict(state['scheduler'])
            self.optimizer.load_state_dict(state['optimizer'])
            self.best_loss = state['best_loss']
            self.best_recall = state['best_recall']

            self.logger.write(
                f'Successfully load pretrained model from {resume}!\n')
            self.logger.write(f'Current best loss {self.best_loss}\n')
            self.logger.write(f'Current best recall {self.best_recall}\n')
        else:
            raise ValueError(f"=> no checkpoint found at '{resume}'")

    def _get_lr(self, group=0):
        return self.optimizer.param_groups[group]['lr']

    def stats_dict(self):
        stats = dict()
        stats['circle_loss'] = 0.
        stats[
            'recall'] = 0.  # feature match recall, divided by number of ground truth pairs
        stats['saliency_loss'] = 0.
        stats['saliency_recall'] = 0.
        stats['saliency_precision'] = 0.
        stats['overlap_loss'] = 0.
        stats['overlap_recall'] = 0.
        stats['overlap_precision'] = 0.
        return stats

    def stats_meter(self):
        meters = dict()
        stats = self.stats_dict()
        for key, _ in stats.items():
            meters[key] = AverageMeter()
        return meters

    def inference_one_batch(self, input_dict, phase):
        assert phase in ['train', 'val', 'test']
        ##################################
        # training
        if (phase == 'train'):
            self.model.train()
            ###############################################
            # forward pass
            sinput_src = ME.SparseTensor(input_dict['src_F'].to(self.device),
                                         coordinates=input_dict['src_C'].to(
                                             self.device))
            sinput_tgt = ME.SparseTensor(input_dict['tgt_F'].to(self.device),
                                         coordinates=input_dict['tgt_C'].to(
                                             self.device))

            src_feats, tgt_feats, scores_overlap, scores_saliency = self.model(
                sinput_src, sinput_tgt)
            src_pcd, tgt_pcd = input_dict['pcd_src'].to(
                self.device), input_dict['pcd_tgt'].to(self.device)
            c_rot = input_dict['rot'].to(self.device)
            c_trans = input_dict['trans'].to(self.device)
            correspondence = input_dict['correspondences'].long().to(
                self.device)

            ###################################################
            # get loss
            stats = self.desc_loss(src_pcd, tgt_pcd, src_feats, tgt_feats,
                                   correspondence, c_rot, c_trans,
                                   scores_overlap, scores_saliency,
                                   input_dict['scale'])

            c_loss = stats['circle_loss'] * self.w_circle_loss + stats[
                'overlap_loss'] * self.w_overlap_loss + stats[
                    'saliency_loss'] * self.w_saliency_loss

            c_loss.backward()

        else:
            self.model.eval()
            with torch.no_grad():
                ###############################################
                # forward pass
                sinput_src = ME.SparseTensor(
                    input_dict['src_F'].to(self.device),
                    coordinates=input_dict['src_C'].to(self.device))
                sinput_tgt = ME.SparseTensor(
                    input_dict['tgt_F'].to(self.device),
                    coordinates=input_dict['tgt_C'].to(self.device))

                src_feats, tgt_feats, scores_overlap, scores_saliency = self.model(
                    sinput_src, sinput_tgt)
                src_pcd, tgt_pcd = input_dict['pcd_src'].to(
                    self.device), input_dict['pcd_tgt'].to(self.device)
                c_rot = input_dict['rot'].to(self.device)
                c_trans = input_dict['trans'].to(self.device)
                correspondence = input_dict['correspondences'].long().to(
                    self.device)

                ###################################################
                # get loss
                stats = self.desc_loss(src_pcd, tgt_pcd, src_feats, tgt_feats,
                                       correspondence, c_rot, c_trans,
                                       scores_overlap, scores_saliency,
                                       input_dict['scale'])

        ##################################
        # detach the gradients for loss terms
        stats['circle_loss'] = float(stats['circle_loss'].detach())
        stats['overlap_loss'] = float(stats['overlap_loss'].detach())
        stats['saliency_loss'] = float(stats['saliency_loss'].detach())

        return stats

    def inference_one_epoch(self, epoch, phase):
        gc.collect()
        assert phase in ['train', 'val', 'test']

        # init stats meter
        stats_meter = self.stats_meter()

        num_iter = int(
            len(self.loader[phase].dataset) // self.loader[phase].batch_size)
        c_loader_iter = self.loader[phase].__iter__()

        self.optimizer.zero_grad()
        for c_iter in tqdm(range(num_iter)):  # loop through this epoch
            inputs = c_loader_iter.next()
            try:
                ##################################
                # forward pass
                # with torch.autograd.detect_anomaly():
                stats = self.inference_one_batch(inputs, phase)

                ###################################################
                # run optimisation
                if ((c_iter + 1) % self.iter_size == 0 and phase == 'train'):
                    gradient_valid = validate_gradient(self.model)
                    if (gradient_valid):
                        self.optimizer.step()
                    else:
                        self.logger.write('gradient not valid\n')
                    self.optimizer.zero_grad()

                ################################
                # update to stats_meter
                for key, value in stats.items():
                    stats_meter[key].update(value)
            except RuntimeError as inst:
                pass

            torch.cuda.empty_cache()

            if (c_iter + 1) % self.verbose_freq == 0 and self.verbose:
                curr_iter = num_iter * (epoch - 1) + c_iter
                for key, value in stats_meter.items():
                    self.writer.add_scalar(f'{phase}/{key}', value.avg,
                                           curr_iter)

                message = f'{phase} Epoch: {epoch} [{c_iter+1:4d}/{num_iter}]'
                for key, value in stats_meter.items():
                    message += f'{key}: {value.avg:.2f}\t'

                self.logger.write(message + '\n')

        message = f'{phase} Epoch: {epoch}'
        for key, value in stats_meter.items():
            message += f'{key}: {value.avg:.2f}\t'
        self.logger.write(message + '\n')

        return stats_meter

    def train(self):
        print('start training...')
        for epoch in range(self.start_epoch, self.max_epoch):
            self.inference_one_epoch(epoch, 'train')
            self.scheduler.step()

            stats_meter = self.inference_one_epoch(epoch, 'val')

            if stats_meter['circle_loss'].avg < self.best_loss:
                self.best_loss = stats_meter['circle_loss'].avg
                self._snapshot(epoch, 'best_loss')
            if stats_meter['recall'].avg > self.best_recall:
                self.best_recall = stats_meter['recall'].avg
                self._snapshot(epoch, 'best_recall')

            # we only add saliency loss when we get descent point-wise features
            if (stats_meter['recall'].avg > 0.3):
                self.w_saliency_loss = 1.
            else:
                self.w_saliency_loss = 0.

        # finish all epoch
        print("Training finish!")

    def eval(self):
        print('Start to evaluate on validation datasets...')
        stats_meter = self.inference_one_epoch(0, 'val')

        for key, value in stats_meter.items():
            print(key, value.avg)
Пример #11
0
                 aux_dim,
                 activation='lrelu',
                 device=device,
                 hidden_dim=args.hidden_dim,
                 anneal=args.anneal)
    optimizer = optim.Adam(model.parameters(), lr=args.lr)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     factor=0.1,
                                                     patience=4,
                                                     verbose=True)

    ste = time.time()
    print('setup time: {}s'.format(ste - st))

    # setup loggers
    logger = Logger(path=LOG_FOLDER)
    exp_id = logger.get_id()
    tensorboard_run_name = TENSORBOARD_RUN_FOLDER + 'exp' + str(
        exp_id) + '_'.join(
            map(str, [
                '', args.batch_size, args.max_iter, args.lr, args.hidden_dim,
                args.depth, args.anneal
            ]))
    writer = SummaryWriter(logdir=tensorboard_run_name)
    logger.add('elbo')
    logger.add('perf')
    print('Beginning training for exp: {}'.format(exp_id))

    # training loop
    it = 0
    model.train()
Пример #12
0
    args = parser.parse_args()

    msg = 'Data file {} not found.'
    assert os.path.isfile(args.data_file), msg.format(args.data_file)
    msg = '{} directory not found. Please create it first.'
    assert os.path.isdir(args.data_dir), msg.format(args.data_dir)
    msg = 'the proportion of dataset to builded must in (0.0, 1.0]'
    assert (args.data_factor > 0.0) and (args.data_factor <= 1.0), msg
    msg = 'train factor + val factor + test factor must be equal to 1.0'
    total = args.train_factor + args.val_factor + args.test_factor
    assert (1.0 == total), msg

    dataset_cfg.set_data_dir(args.data_dir)

    # set and get logger
    logger = Logger.set(dataset_cfg.log_file())

    # build, load and dump datasets
    builder = Builder(data_factor=args.data_factor,
                      train_factor=args.train_factor,
                      val_factor=args.val_factor,
                      test_factor=args.test_factor,
                      train_name=args.train_name,
                      val_name=args.val_name,
                      test_name=args.test_name,
                      logger=logger)
    builder.load(args.data_file, encoding='windows-1252')
    builder.dump(dataset_cfg.data_dir(),
                 dataset_cfg.params_file(),
                 min_count_word=args.min_count_word,
                 min_count_tag=args.min_count_tag,
Пример #13
0
    # load parser
    dataset_cfg = DatasetCfg(config.data_dir)
    exp_cfg = ExperimentCfg(config.base_model_dir)
    parser = get_parser(data_dir=dataset_cfg.data_dir(),
                        exp_dir=exp_cfg.experiment_dir(),
                        restore_checkpoint=None)

    # parse command line arguments
    args = parser.parse_args()
    restore_checkpoint = args.restore_checkpoint
    dataset_cfg.set_data_dir(args.data_dir)
    exp_cfg.set_experiment_dir(args.exp_dir)

    # set logger
    # Note: log file will be stored in the `exp_dir` directory
    logger = Logger.set(exp_cfg.train_log())

    # load experiment configuration
    logger.info("Loading the experiment configurations...")
    params = Params(exp_cfg.params_file())
    logger.info("- done.")

    # set params
    params.set('cuda', torch.cuda.is_available())

    # load datesets
    logger.info("Loading the datasets...")
    # add datasets parameters into params
    params.update(Params(dataset_cfg.params_file()))
    trainloader, valloader = load_data(params,
                                       dataset_cfg.data_dir(),
Пример #14
0
    def __init__(self, name, config, topics, pages):
        self.name = name
        self.config = config
        self.topics = topics
        self.site_alias = {
            target['host']: target.get('alias', '')
            for target in self.config.targets
        }

        self.url_queries = Queue()
        self.current_info = Dict()
        self.queue_dict = {
            target['host']: Queue()
            for target in self.config.targets
        }
        self.current_info_dict = {
            target['host']: Dict()
            for target in self.config.targets
        }
        self.save_breakpoint_path = os.path.join(self.config.save.tmp,
                                                 f'{self.name}.breakpoint')

        if os.path.isfile(self.save_breakpoint_path):
            self._load_breakpoint()

        self.request = Requestor(self.config)
        # self.searcher = Searcher(self.config)
        self.searcher = Searcher('baidu')
        self.parser = Parser()

        self.database = DataBase(**self.config.save.database.login)

        # self.reset_database()
        self.table = 'news'
        self.logger = Logger(filename=self.config.save.log)

        self.url_fetcher = SearchEngineUrlFetcher(
            request=self.request,
            searcher=self.searcher,
            queries=self.url_queries,
            current_info=self.current_info,
            hosts=[target['host'] for target in self.config.targets],
            topics=self.topics,
            pages=pages,
            queue_dict=self.queue_dict,
        )

        # one thread for one host
        self.threads = [
            TargetedCrawler(
                self.request,
                self.parser,
                self.url_fetcher,
                self.database,
                self.table,
                self.logger,
                target,
                self.queue_dict[target['host']],
                self.current_info_dict[target['host']],
            ) for target in config.targets
        ]
Пример #15
0
class TopicalCrawler:
    def __init__(self, name, config, topics, pages):
        self.name = name
        self.config = config
        self.topics = topics
        self.site_alias = {
            target['host']: target.get('alias', '')
            for target in self.config.targets
        }

        self.url_queries = Queue()
        self.current_info = Dict()
        self.queue_dict = {
            target['host']: Queue()
            for target in self.config.targets
        }
        self.current_info_dict = {
            target['host']: Dict()
            for target in self.config.targets
        }
        self.save_breakpoint_path = os.path.join(self.config.save.tmp,
                                                 f'{self.name}.breakpoint')

        if os.path.isfile(self.save_breakpoint_path):
            self._load_breakpoint()

        self.request = Requestor(self.config)
        # self.searcher = Searcher(self.config)
        self.searcher = Searcher('baidu')
        self.parser = Parser()

        self.database = DataBase(**self.config.save.database.login)

        # self.reset_database()
        self.table = 'news'
        self.logger = Logger(filename=self.config.save.log)

        self.url_fetcher = SearchEngineUrlFetcher(
            request=self.request,
            searcher=self.searcher,
            queries=self.url_queries,
            current_info=self.current_info,
            hosts=[target['host'] for target in self.config.targets],
            topics=self.topics,
            pages=pages,
            queue_dict=self.queue_dict,
        )

        # one thread for one host
        self.threads = [
            TargetedCrawler(
                self.request,
                self.parser,
                self.url_fetcher,
                self.database,
                self.table,
                self.logger,
                target,
                self.queue_dict[target['host']],
                self.current_info_dict[target['host']],
            ) for target in config.targets
        ]

    def __call__(self, string, option='url'):
        if option == 'url':
            self.parse(string)
        elif option == 'keyword':
            urls = self.search(string)
            data = []
            for url in urls:
                soup, url, host = self.request(url)
                data.append(self.parse(soup, host))
            return data
        else:
            raise ValueError("option should be 'url' or 'keyword'")

    def test_search(self, keyword, host=None, page=1):
        query = self.searcher.construct_query(keyword, host, page)
        soup, _ = self.request(query)
        urls = self.searcher.parse(soup)
        return urls

    def test_parse(self, url, remain_query_key=None):
        soup, url = self.request(url,
                                 remain_query_key=remain_query_key,
                                 no_headers=True)
        data = self.parser(soup, url['netloc'], url['tld'])
        next_page_url = data.pop('next', None)
        while next_page_url:
            try:
                _soup, _url = self.request(next_page_url)
                _data = self.parser(_soup, _url['netloc'], _url['tld'])
                data['text'] += _data['text']
                next_page_url = _data.pop('next', None)
            except:
                break
        return {
            **data, 'url':
            url['url'],
            'site':
            self.site_alias.get(url['tld'])
            or self.site_alias.get(url['netloc'], '')
        }

    def _load_breakpoint(self):
        with open(self.save_breakpoint_path, 'rb') as f:
            temp = pickle.load(f)

        for q in temp['url_queries']:
            self.url_queries.put(q)

        for host, queue_ in temp['queue_dict'].items():
            for q in queue_:
                self.queue_dict[host].put(q)

    def _save_breakpoint(self):
        temp = {
            'url_queries': ([self.current_info.query]
                            if self.current_info.get('query') else []) +
            [self.url_queries.get() for _ in range(self.url_queries.qsize())],
            'queue_dict': {
                target: ([self.current_info_dict[target].query] if
                         self.current_info_dict[target].get('query') else []) +
                [queue_.get() for _ in range(queue_.qsize())]
                for target, queue_ in self.queue_dict.items()
            }
        }
        with open(self.save_breakpoint_path, 'wb') as f:
            pickle.dump(temp, f)

    def _cleanup(self):
        try:
            self.database.close()
        except:
            pass

    def run(self):
        self._init_topics()
        self.url_fetcher.start()
        for t in self.threads:
            t.start()
        try:
            while True:
                time.sleep(1)
                if self.url_fetcher.is_alive():
                    continue
                for t in self.threads:
                    if t.is_alive():
                        break
                else:
                    break
        except KeyboardInterrupt:
            self.logger.info('keyboard interrupt by user')
            self._save_breakpoint()
            self._cleanup()
        except:
            self.logger.error(exc_info=sys.exc_info())
            self._save_breakpoint()
            self._cleanup()

    def _init_topics(self):
        for topic in self.topics:
            if not isinstance(topic['keywords'], list):
                topic['keywords'] = [topic['keywords']]
            temp_keywords_str = json.dumps(topic['keywords'])
            if not self.database.select('topic', name=topic['name']):
                self.database.insert('topic',
                                     name=topic['name'],
                                     keywords=temp_keywords_str,
                                     entry_time=datetime.datetime.now(),
                                     remark=topic.get('remark', ''))
            else:
                self.database.update(
                    {
                        'table': 'topic',
                        'constraints': {
                            'name': topic['name']
                        }
                    },
                    remark=topic.get('remark', ''),
                    keywords=temp_keywords_str)
Пример #16
0
            'Handler': HandleBase,
            "Input": InputBase
        })
        self.manager.collectPlugins()

    def _watiInputDone(self):
        plugins = self.manager.getPluginsOfCategory('Input')
        map(lambda plugin: plugin.plugin_object.proc.join(), plugins)

    def _inventorySync(self):
        zk_address = self.config.get("Inventory", "zk_address")
        full_sync(zk_address)

    def start(self):
        self._configInitialize()
        self._mangerInitialize()
        self._serviceInitialize()
        self._inventorySync()
        self._pluginInitialize()
        #self._watiInputDone()
        import time
        while True:
            time.sleep(30)


if __name__ == "__main__":
    from lib.utils import Logger
    Logger.basicConfig()
    app = DmsOrchestrator()
    app.start()
Пример #17
0
def train_model(args, metadata, device='cuda'):
    print('training on {}'.format(torch.cuda.get_device_name(device) if args.cuda else 'cpu'))

    # load data
    if not args.preload:
        dset = SyntheticDataset(args.file, 'cpu')  # originally 'cpu' ????
        train_loader = DataLoader(dset, shuffle=True, batch_size=args.batch_size)
        data_dim, latent_dim, aux_dim = dset.get_dims()
        args.N = len(dset)
        metadata.update(dset.get_metadata())
    else:
        train_loader = DataLoaderGPU(args.file, shuffle=True, batch_size=args.batch_size)
        data_dim, latent_dim, aux_dim = train_loader.get_dims()
        args.N = train_loader.dataset_len
        metadata.update(train_loader.get_metadata())

    if args.max_iter is None:
        args.max_iter = len(train_loader) * args.epochs

    if args.latent_dim is not None:
        latent_dim = args.latent_dim
        metadata.update({"train_latent_dim": latent_dim})

    # define model and optimizer
    model = None
    if args.i_what == 'iVAE':
        model = iVAE(latent_dim,
                     data_dim,
                     aux_dim,
                     n_layers=args.depth,
                     activation='lrelu',
                     device=device,
                     hidden_dim=args.hidden_dim,
                     anneal=args.anneal,  # False
                     file=metadata['file'],  # Added dataset location for easier checkpoint loading
                     seed=1,
                     epochs=args.epochs)
    elif args.i_what == 'iFlow':
        metadata.update({"device": device})
        model = iFlow(args=metadata).to(device)

    optimizer = optim.Adam(model.parameters(), lr=args.lr)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, \
                                                     factor=args.lr_drop_factor, \
                                                     patience=args.lr_patience, \
                                                     verbose=True)  # factor=0.1 and patience=4

    ste = time.time()
    print('setup time: {}s'.format(ste - st))

    # setup loggers
    logger = Logger(logdir=LOG_FOLDER)  # 'log/'
    exp_id = logger.get_id()  # 1

    tensorboard_run_name = TENSORBOARD_RUN_FOLDER + 'exp' + str(exp_id) + '_'.join(
        map(str, ['', args.batch_size, args.max_iter, args.lr, args.hidden_dim, args.depth, args.anneal]))
    # 'runs/exp1_64_12500_0.001_50_3_False'

    writer = SummaryWriter(logdir=tensorboard_run_name)

    if args.i_what == 'iFlow':
        logger.add('log_normalizer')
        logger.add('neg_log_det')
        logger.add('neg_trace')

    logger.add('loss')
    logger.add('perf')
    print('Beginning training for exp: {}'.format(exp_id))

    # training loop
    epoch = 0
    model.train()
    while epoch < args.epochs:  # args.max_iter:  #12500
        est = time.time()
        for itr, (x, u, z) in enumerate(train_loader):
            acc_itr = itr + epoch * len(train_loader)

            # x is of shape [64, 4]
            # u is of shape [64, 40], one-hot coding of 40 classes
            # z is of shape [64, 2]

            # it += 1
            # model.anneal(args.N, args.max_iter, it)
            optimizer.zero_grad()

            if args.cuda and not args.preload:
                x = x.cuda(device=device, non_blocking=True)
                u = u.cuda(device=device, non_blocking=True)

            if args.i_what == 'iVAE':
                elbo, z_est = model.elbo(x, u)  # elbo is a scalar loss while z_est is of shape [64, 2]
                loss = elbo.mul(-1)

            elif args.i_what == 'iFlow':
                (log_normalizer, neg_trace, neg_log_det), z_est = model.neg_log_likelihood(x, u)
                loss = log_normalizer + neg_trace + neg_log_det

            loss.backward()
            optimizer.step()

            logger.update('loss', loss.item())
            if args.i_what == 'iFlow':
                logger.update('log_normalizer', log_normalizer.item())
                logger.update('neg_trace', neg_trace.item())
                logger.update('neg_log_det', neg_log_det.item())

            perf = mcc(z.cpu().numpy(), z_est.cpu().detach().numpy())
            logger.update('perf', perf)

            if acc_itr % args.log_freq == 0:  # % 25
                logger.log()
                writer.add_scalar('data/performance', logger.get_last('perf'), acc_itr)
                writer.add_scalar('data/loss', logger.get_last('loss'), acc_itr)

                if args.i_what == 'iFlow':
                    writer.add_scalar('data/log_normalizer', logger.get_last('log_normalizer'), acc_itr)
                    writer.add_scalar('data/neg_trace', logger.get_last('neg_trace'), acc_itr)
                    writer.add_scalar('data/neg_log_det', logger.get_last('neg_log_det'), acc_itr)

                scheduler.step(logger.get_last('loss'))

            if acc_itr % int(args.max_iter / 5) == 0 and not args.no_log:
                checkpoint(TORCH_CHECKPOINT_FOLDER, \
                           exp_id, \
                           acc_itr, \
                           model, \
                           optimizer, \
                           logger.get_last('loss'), \
                           logger.get_last('perf'))

        epoch += 1
        eet = time.time()
        if args.i_what == 'iVAE':
            print('epoch {}: {:.4f}s;\tloss: {:.4f};\tperf: {:.4f}'.format(epoch,
                                                                           eet - est,
                                                                           logger.get_last('loss'),
                                                                           logger.get_last('perf')))
        elif args.i_what == 'iFlow':
            print('epoch {}: {:.4f}s;\tloss: {:.4f} (l1: {:.4f}, l2: {:.4f}, l3: {:.4f});\tperf: {:.4f}'.format( \
                epoch,
                eet - est,
                logger.get_last('loss'),
                logger.get_last('log_normalizer'),
                logger.get_last('neg_trace'),
                logger.get_last('neg_log_det'),
                logger.get_last('perf')))

    et = time.time()
    print('training time: {}s'.format(et - ste))

    # Save final model
    checkpoint(PT_MODELS_FOLDER,
               "",
               'final',
               model,
               optimizer,
               logger.get_last('loss'),
               logger.get_last('perf'))

    writer.close()
    if not args.no_log:
        logger.add_metadata(**metadata)
        logger.save_to_json()
        logger.save_to_npz()

    print('total time: {}s'.format(et - st))
    return model
Пример #18
0
        prediction, to_device(ones_target(size), device)
    )  # instead of minimizing log(1-D(G(z))), maximise log(D(gz)) for stronger gradients in early training
    error.backward()
    '''
    Update weights with gradients
    '''
    optimizer.step()
    return error


# Testing
num_test_samples = 3
test_noise = to_device(noise(num_test_samples), device)

# Create logger instance
logger = Logger(model_name='GAN_d' + str(d_lr) + '_g' + str(g_lr),
                data_name='EPianoDataset')
#C:\Users\Dave\PycharmProjects\DataLoading\runs

# Printing model & optimizer state_dict
print("Discriminator state_dict:")
for param_tensor in discriminator.state_dict():
    print(param_tensor, "\t", discriminator.state_dict()[param_tensor].size())

print("\nGenerator state_dict:")
for param_tensor in generator.state_dict():
    print(param_tensor, "\t", generator.state_dict()[param_tensor].size())

print("\nD_Optimizer state_dict:")
for var_name in d_optimizer.state_dict():
    print(var_name, "\t", d_optimizer.state_dict()[var_name])
Пример #19
0
from lib.trainer import  Trainer
from lib.utils import load_model,save_model,Logger
from lib.coco import COCO
from lib  import optimer
from config import cfg as opt
import torch
import os


torch.backends.cudnn.benchmark= True  ## input size is not fixed
os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str
opt.gpus = [int(i) for i in opt.gpus_str.split(',')]
opt.gpus = list(range(len(opt.gpus)))
opt.batch_size = opt.batch_size * len(opt.gpus)
opt.save_dir = os.path.join(opt.save_dir,opt.exp_id)
logger = Logger(opt)


model = TensorMask(backbone=opt.backbone , num_cls=opt.num_class ,
                   base_window= opt.base_window ,
                   freezeBN=opt.frezeBN,freezeLayers=opt.frezeLayer,
                   align_corners= opt.align_corners)

optimizer = optimer.SGD([{'params':filter(lambda x:len(x.size()) == 4 ,model.parameters()),'weight_decay':0.0001 },
                            {'params': filter(lambda x:len(x.size()) <4,model.parameters())}],
                     lr=opt.lr,warm_up=1000,momentum=0.9,nesterov=True)
start_epoch = 0
if opt.weights != '' :
    model, optimizer, start_epoch = load_model(
      model, opt.weights, optimizer, opt.resume, opt.lr, opt.lr_step)
trainer = Trainer(opt,model,optimizer)
Пример #20
0
                shutil.copyfile(filename, best_file)

    print('=> testing accuracy of last model')
    test_all(model, base_loader, base_val_loader, val_loader, test_loader)

    if os.path.isfile(os.path.join(cfg.misc.checkpoint_dir, 'best_model.tar')):
        # release GPU memory used by benchmark to avoid OOM
        torch.cuda.empty_cache()
        model, _, resume_epoch, best_acc, _ = load_checkpoint(
            model, optimizer, 'best')
        print(
            f'=> testing accuracy of best model in {resume_epoch} epoch with best validate accuracy {best_acc}'
        )
        test_all(model, base_loader, base_val_loader, val_loader, test_loader)


if __name__ == '__main__':
    sys.stdout = Logger(os.path.join(cfg.misc.output_dir, 'log.txt'))
    print('======CONFIGURATION START======')
    pprint(cfg)
    print('======CONFIGURATION END======')

    # for reproducibility
    np.random.seed(cfg.misc.rng_seed)
    torch.manual_seed(cfg.misc.rng_seed)
    torch.backends.cudnn.deterministic = True
    # for efficient
    torch.backends.cudnn.benchmark = True

    main()
Пример #21
0
def main():
    global args

    set_random_seed(args.seed)
    if not args.use_avai_gpus:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices
    use_gpu = torch.cuda.is_available() and not args.use_cpu
    log_name = 'test.log' if args.evaluate else 'train.log'
    log_name += time.strftime('-%Y-%m-%d-%H-%M-%S')
    sys.stdout = Logger(osp.join(args.save_dir, log_name))
    print('** Arguments **')
    arg_keys = list(args.__dict__.keys())
    arg_keys.sort()
    for key in arg_keys:
        print('{}: {}'.format(key, args.__dict__[key]))
    print('\n')
    print('Collecting env info ...')
    print('** System info **\n{}\n'.format(collect_env_info()))
    if use_gpu:
        torch.backends.cudnn.benchmark = True
        # torch.backends.cudnn.benchmark = False
        # torch.backends.cudnn.deterministic = True
    else:
        warnings.warn('Currently using CPU, however, GPU is highly recommended')

    # load data related args
    data_args = imagedata_kwargs(args)

    # initialize dataset
    dataset = init_image_dataset(name=data_args['source'], **data_args)

    # build data transformer
    transforms_tr, transforms_te = build_transforms(**data_args)

    # load train data
    trainset = dataset.train
    train_sampler = build_train_sampler(
        trainset, data_args['train_sampler'],
        batch_size=data_args['batch_size'],
        num_instances=data_args['num_instances'],
        num_train_pids=dataset.num_train_pids
    )
    trainloader = torch.utils.data.DataLoader(
        DataWarpper(data=trainset, transforms=transforms_tr),
        sampler=train_sampler,
        batch_size=data_args['batch_size'],
        shuffle=False,
        num_workers=data_args['workers'],
        pin_memory=False,
        drop_last=True,
    )

    # load test data
    queryset = dataset.query
    queryloader = torch.utils.data.DataLoader(
        DataWarpper(data=queryset, transforms=transforms_te),
        batch_size=data_args['batch_size'],
        shuffle=False,
        num_workers=data_args['workers'],
        pin_memory=False,
        drop_last=False
    )

    galleryset = dataset.gallery
    galleryloader = torch.utils.data.DataLoader(
        DataWarpper(data=galleryset, transforms=transforms_te),
        batch_size=data_args['batch_size'],
        shuffle=False,
        num_workers=data_args['workers'],
        pin_memory=False,
        drop_last=False
    )

    print('Building model: {}'.format(args.arch))
    model = build_model(
        name=args.arch,
        num_classes=dataset.num_train_pids,
        pretrained=(not args.no_pretrained),
        use_gpu=use_gpu,
        batch_size=args.batch_size,
        part_num=args.part_num,
        part_weight=args.part_weight
    )
    model = model.cuda()

    # num_params, flops = compute_model_complexity(model, (1, 3, args.height, args.width))
    # print('Model complexity: params={:,} flops={:,}'.format(num_params, flops))

    if args.load_weights and check_isfile(args.load_weights):
        load_pretrained_weights(model, args.load_weights)

    optimizer = build_optimizer(model, **optimizer_kwargs(args))

    scheduler = build_lr_scheduler(optimizer, **lr_scheduler_kwargs(args))

    model, optimizer = amp.initialize(model, optimizer,
                                      opt_level="O1",
                                      keep_batchnorm_fp32=None,
                                      loss_scale=None)

    if use_gpu:
        model = nn.DataParallel(model)

    if args.resume and check_isfile(args.resume):
        args.start_epoch = resume_from_checkpoint(args.resume, model, optimizer=optimizer)

    print('Building {}-engine for {}-reid'.format(args.loss, args.app))
    engine = Engine(trainloader, queryloader, galleryloader, model, optimizer, scheduler,
                    query=queryset, gallery=galleryset, use_gpu=use_gpu, num_train_pids=dataset.num_train_pids, **engine_kwargs(args))
    engine.run(**engine_kwargs(args), use_gpu=use_gpu)
Пример #22
0
    if args.split_pos:
        plt.subplots(figsize=(30, 16))
        plot(2, 1, y_data[:, :args.split_pos], y_data_std[:, :args.split_pos],
             args.initial_x_1, args.title_1, args.x_label, args.y_label)
        plot(2, 2, y_data[:, args.split_pos:], y_data_std[:, args.split_pos:],
             args.initial_x_2, args.title_2, args.x_label, args.y_label)
    else:
        plt.subplots(figsize=(30, 3))
        plot(1, 1, y_data, y_data_std, args.initial_x, args.title,
             args.x_label, args.y_label)

    plt.savefig("%s.png" % args.prefix, dpi=300, bbox_inches='tight')
    plt.close()


log = Logger()

if __name__ == "__main__":
    parser = argparse.ArgumentParser()

    parser.add_argument("--reference", help="The reference network (.dat)")
    parser.add_argument(
        "--reference-std",
        help=
        "The reference standard deviation network (.dat) - should be in identical order as alternative networks"
    )
    parser.add_argument("--alternatives",
                        help="The alternative networks (.dat)",
                        nargs="*")
    parser.add_argument(
        "--alternatives-std",
Пример #23
0
N_HIDDEN = 388
N_OUTPUTS = 388
N_EPOCHS = 10

trainset = DeviceDataLoader(
    DataLoader(EpianoDataset(N_STEPS), BATCH_SIZE, shuffle=True), device)
num_batches = len(trainset)  # number of batches

model = EPianoLSTM(BATCH_SIZE, N_LAYERS, N_STEPS, N_INPUTS, N_HIDDEN,
                   N_OUTPUTS)
to_device(model, device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
lossF = nn.MSELoss()

# Create logger instance
logger = Logger(model_name='LSTM', data_name='EPianoDataset')

print("LSTM state_dict:")
for param_tensor in model.state_dict():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())

print("\nOptimizer state_dict:")
for var_name in optimizer.state_dict():
    print(var_name, "\t", optimizer.state_dict()[var_name])
"""
TRAINING
"""
for epoch in range(N_EPOCHS):
    train_running_loss = 0.0
    train_accuracy = 0.0
    iterations = 0
Пример #24
0
        return scheduler

    def _serviceInitialize(self):
        ctx = ServiceContext()
        queue = Queue()
        ctx.registerQueueService(queue)
        ctx.registerSchedService(self.initializeScheduler())

    def _pluginInitialize(self):
        self.manager.setPluginPlaces(["lib/plugins"])
        self.manager.setConfigParser(self.config,None)
        self.manager.setCategoriesFilter({'Handler':HandleBase,"Input":InputBase})
        self.manager.collectPlugins()

    def _watiInputDone(self):
        plugins = self.manager.getPluginsOfCategory('Input')
        map(lambda plugin:plugin.plugin_object.proc.join(),plugins)

    def start(self):
        self._configInitialize()
        self._mangerInitialize()
        self._serviceInitialize()
        self._pluginInitialize()
        self._watiInputDone()

if __name__ == "__main__":
    from lib.utils import Logger
    Logger.basicConfig()

    app = DmsOrchestrator()
    app.start()
Пример #25
0
def main(model,
         auxiliary=True,
         model_label='rcnn',
         rnn_type='gru',
         padding='pre',
         reg='s',
         prefix="crawl",
         embedding_file_type="word2vec",
         train_fname="./data/train.csv",
         test_fname="./data/test.csv",
         embeds_fname="./data/GoogleNews-vectors-negative300.bin",
         logger_fname="./logs/log-aws",
         mode="all",
         wrong_words_fname="./data/correct_words.csv",
         format_embeds="binary",
         config="./config.json",
         output_dir="./out",
         norm_prob=False,
         norm_prob_koef=1,
         gpus=0,
         char_level=False,
         random_seed=2018,
         num_folds=5):

    embedding_type = prefix + "_" + embedding_file_type

    logger = Logger(logging.getLogger(), logger_fname)

    # ====Detect GPUs====
    logger.debug(device_lib.list_local_devices())

    # ====Load data====
    logger.info('Loading data...')
    train_df = load_data(train_fname)
    test_df = load_data(test_fname)

    target_labels = [
        'toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate'
    ]
    num_classes = len(target_labels)

    # ====Load additional data====
    logger.info('Loading additional data...')
    # swear_words = load_data(swear_words_fname, func=lambda x: set(x.T[0]), header=None)
    wrong_words_dict = load_data(wrong_words_fname,
                                 func=lambda x: {val[0]: val[1]
                                                 for val in x})

    tokinizer = RegexpTokenizer(r'\S+')
    regexps = [
        re.compile("([a-zA-Z]+)([0-9]+)"),
        re.compile("([0-9]+)([a-zA-Z]+)")
    ]

    # ====Load word vectors====
    logger.info('Loading embeddings...')
    if model != 'mvcnn':
        embed_dim = 300
        embeds = Embeds(embeds_fname,
                        embedding_file_type,
                        format=format_embeds)

    if mode in ('preprocess', 'all'):
        logger.info('Generating indirect features...')
        # https://www.kaggle.com/jagangupta/stop-the-s-toxic-comments-eda
        # Word count in each comment:
        train_df['count_word'] = train_df["comment_text"].apply(
            lambda x: len(str(x).split()))
        test_df['count_word'] = test_df["comment_text"].apply(
            lambda x: len(str(x).split()))
        # Unique word count
        train_df['count_unique_word'] = train_df["comment_text"].apply(
            lambda x: len(set(str(x).split())))
        test_df['count_unique_word'] = test_df["comment_text"].apply(
            lambda x: len(set(str(x).split())))
        # Letter count
        train_df['count_letters'] = train_df["comment_text"].apply(
            lambda x: len(str(x)))
        test_df['count_letters'] = test_df["comment_text"].apply(
            lambda x: len(str(x)))
        # punctuation count
        train_df["count_punctuations"] = train_df["comment_text"].apply(
            lambda x: len([c for c in str(x) if c in string.punctuation]))
        test_df["count_punctuations"] = test_df["comment_text"].apply(
            lambda x: len([c for c in str(x) if c in string.punctuation]))
        # upper case words count
        train_df["count_words_upper"] = train_df["comment_text"].apply(
            lambda x: len([w for w in str(x).split() if w.isupper()]))
        test_df["count_words_upper"] = test_df["comment_text"].apply(
            lambda x: len([w for w in str(x).split() if w.isupper()]))
        # title case words count
        train_df["count_words_title"] = train_df["comment_text"].apply(
            lambda x: len([w for w in str(x).split() if w.istitle()]))
        test_df["count_words_title"] = test_df["comment_text"].apply(
            lambda x: len([w for w in str(x).split() if w.istitle()]))
        # Word count percent in each comment:
        train_df['word_unique_pct'] = train_df[
            'count_unique_word'] * 100 / train_df['count_word']
        test_df['word_unique_pct'] = test_df[
            'count_unique_word'] * 100 / test_df['count_word']
        # Punct percent in each comment:
        train_df['punct_pct'] = train_df[
            'count_punctuations'] * 100 / train_df['count_word']
        test_df['punct_pct'] = test_df['count_punctuations'] * 100 / test_df[
            'count_word']
        # Average length of the words
        train_df["mean_word_len"] = train_df["comment_text"].apply(
            lambda x: np.mean([len(w) for w in str(x).split()]))
        test_df["mean_word_len"] = test_df["comment_text"].apply(
            lambda x: np.mean([len(w) for w in str(x).split()]))
        # upper case words percentage
        train_df["words_upper_pct"] = train_df[
            "count_words_upper"] * 100 / train_df['count_word']
        test_df["words_upper_pct"] = test_df[
            "count_words_upper"] * 100 / test_df['count_word']
        # title case words count
        train_df["words_title_pct"] = train_df[
            "count_words_title"] * 100 / train_df['count_word']
        test_df["words_title_pct"] = test_df[
            "count_words_title"] * 100 / test_df['count_word']
        # remove columns
        train_df = train_df.drop('count_word', 1)
        train_df = train_df.drop('count_unique_word', 1)
        train_df = train_df.drop('count_punctuations', 1)
        train_df = train_df.drop('count_words_upper', 1)
        train_df = train_df.drop('count_words_title', 1)
        test_df = test_df.drop('count_word', 1)
        test_df = test_df.drop('count_unique_word', 1)
        test_df = test_df.drop('count_punctuations', 1)
        test_df = test_df.drop('count_words_upper', 1)
        test_df = test_df.drop('count_words_title', 1)

        logger.info('Cleaning text...')
        train_df['comment_text_clear'] = clean_text(train_df['comment_text'],
                                                    tokinizer,
                                                    wrong_words_dict,
                                                    regexps,
                                                    autocorrect=False)
        test_df['comment_text_clear'] = clean_text(test_df['comment_text'],
                                                   tokinizer,
                                                   wrong_words_dict,
                                                   regexps,
                                                   autocorrect=False)
        if reg == 'w':
            # remove all punctuations
            train_df.to_csv(os.path.join(output_dir, 'train_clear_w.csv'),
                            index=False)
            test_df.to_csv(os.path.join(output_dir, 'test_clear_w.csv'),
                           index=False)
            train_df = pd.read_csv(
                os.path.join(output_dir, 'train_clear_w.csv'))
            test_df = pd.read_csv(os.path.join(output_dir, 'test_clear_w.csv'))
        elif reg == 's':
            # split by S+ keep all punctuations
            train_df.to_csv(os.path.join(output_dir, 'train_clear.csv'),
                            index=False)
            test_df.to_csv(os.path.join(output_dir, 'test_clear.csv'),
                           index=False)
            train_df = pd.read_csv(os.path.join(output_dir, 'train_clear.csv'))
            test_df = pd.read_csv(os.path.join(output_dir, 'test_clear.csv'))

    if mode == 'preprocess':
        return

    if mode == 'processed':
        if reg == 'w':
            train_df = pd.read_csv(
                os.path.join(output_dir, 'train_clear_w.csv'))
            test_df = pd.read_csv(os.path.join(output_dir, 'test_clear_w.csv'))
        elif reg == 's':
            train_df = pd.read_csv(os.path.join(output_dir, 'train_clear.csv'))
            test_df = pd.read_csv(os.path.join(output_dir, 'test_clear.csv'))

    logger.info('Calc text length...')
    train_df.fillna('unknown', inplace=True)
    test_df.fillna('unknown', inplace=True)
    train_df['text_len'] = train_df['comment_text_clear'].apply(
        lambda words: len(words.split()))
    test_df['text_len'] = test_df['comment_text_clear'].apply(
        lambda words: len(words.split()))
    max_seq_len = np.round(train_df['text_len'].mean() +
                           3 * train_df['text_len'].std()).astype(int)
    logger.debug('Max seq length = {}'.format(max_seq_len))

    # ====Prepare data to NN====
    logger.info('Converting texts to sequences...')
    max_words = 100000
    if char_level:
        max_seq_len = 1200

    train_df['comment_seq'], test_df[
        'comment_seq'], word_index = convert_text2seq(
            train_df['comment_text_clear'].tolist(),
            test_df['comment_text_clear'].tolist(),
            max_words,
            max_seq_len,
            embeds,
            lower=True,
            char_level=char_level,
            uniq=True,
            use_only_exists_words=True,
            position=padding)
    logger.debug('Dictionary size = {}'.format(len(word_index)))

    logger.info('Preparing embedding matrix...')
    if model != 'mvcnn':
        embedding_matrix, words_not_found = get_embedding_matrix(
            embed_dim, embeds, max_words, word_index)

    logger.debug('Embedding matrix shape = {}'.format(
        np.shape(embedding_matrix)))
    logger.debug('Number of null word embeddings = {}'.format(
        np.sum(np.sum(embedding_matrix, axis=1) == 0)))

    # ====Train/test split data====
    # train/val
    x_aux = np.matrix([
        train_df["word_unique_pct"].tolist(), train_df["punct_pct"].tolist(),
        train_df["mean_word_len"].tolist(),
        train_df["words_upper_pct"].tolist(),
        train_df["words_title_pct"].tolist()
    ],
                      dtype='float32').transpose((1, 0))
    x = np.array(train_df['comment_seq'].tolist())
    y = np.array(train_df[target_labels].values)
    x_train_nn, x_test_nn, x_aux_train_nn, x_aux_test_nn, y_train_nn, y_test_nn, train_idxs, test_idxs = \
        split_data(x, np.squeeze(np.asarray(x_aux)),y,test_size=0.2,shuffle=True,random_state=2018)
    # test set
    test_df_seq = np.array(test_df['comment_seq'].tolist())
    test_aux = np.matrix([
        train_df["word_unique_pct"].tolist(), train_df["punct_pct"].tolist(),
        train_df["mean_word_len"].tolist(),
        train_df["words_upper_pct"].tolist(),
        train_df["words_title_pct"].tolist()
    ],
                         dtype='float32').transpose((1, 0))
    test_df_seq_aux = np.squeeze(np.asarray(test_aux))
    y_nn = []
    logger.debug('X shape = {}'.format(np.shape(x_train_nn)))

    # ====Train models====
    params = Params(config)
    if model_label == None:
        logger.warn('Should choose a model to train')
        return

    if model_label == 'dense':
        model = dense(
            embedding_matrix,
            num_classes,
            max_seq_len,
            dense_dim=params.get('dense').get('dense_dim'),
            n_layers=params.get('dense').get('n_layers'),
            concat=params.get('dense').get('concat'),
            dropout_val=params.get('dense').get('dropout_val'),
            l2_weight_decay=params.get('dense').get('l2_weight_decay'),
            pool=params.get('dense').get('pool'),
            train_embeds=params.get('dense').get('train_embeds'),
            add_sigmoid=True,
            gpus=gpus)
    if model_label == 'cnn':
        model = cnn(embedding_matrix,
                    num_classes,
                    max_seq_len,
                    num_filters=params.get('cnn').get('num_filters'),
                    l2_weight_decay=params.get('cnn').get('l2_weight_decay'),
                    dropout_val=params.get('cnn').get('dropout_val'),
                    dense_dim=params.get('cnn').get('dense_dim'),
                    train_embeds=params.get('cnn').get('train_embeds'),
                    n_cnn_layers=params.get('cnn').get('n_cnn_layers'),
                    pool=params.get('cnn').get('pool'),
                    add_embeds=params.get('cnn').get('add_embeds'),
                    auxiliary=auxiliary,
                    add_sigmoid=True,
                    gpus=gpus)
    if model_label == 'cnn2d':
        model = cnn2d(
            embedding_matrix,
            num_classes,
            max_seq_len,
            num_filters=params.get('cnn2d').get('num_filters'),
            l2_weight_decay=params.get('cnn2d').get('l2_weight_decay'),
            dropout_val=params.get('cnn2d').get('dropout_val'),
            dense_dim=params.get('cnn2d').get('dense_dim'),
            train_embeds=params.get('cnn2d').get('train_embeds'),
            add_embeds=params.get('cnn2d').get('add_embeds'),
            auxiliary=auxiliary,
            add_sigmoid=True,
            gpus=gpus)

    if model_label == 'lstm':
        model = rnn(
            embedding_matrix,
            num_classes,
            max_seq_len,
            l2_weight_decay=params.get('lstm').get('l2_weight_decay'),
            rnn_dim=params.get('lstm').get('rnn_dim'),
            dropout_val=params.get('lstm').get('dropout_val'),
            dense_dim=params.get('lstm').get('dense_dim'),
            n_branches=params.get('lstm').get('n_branches'),
            n_rnn_layers=params.get('lstm').get('n_rnn_layers'),
            n_dense_layers=params.get('lstm').get('n_dense_layers'),
            train_embeds=params.get('lstm').get('train_embeds'),
            mask_zero=params.get('lstm').get('mask_zero'),
            kernel_regularizer=params.get('lstm').get('kernel_regularizer'),
            recurrent_regularizer=params.get('lstm').get(
                'recurrent_regularizer'),
            activity_regularizer=params.get('lstm').get(
                'activity_regularizer'),
            dropout=params.get('lstm').get('dropout'),
            recurrent_dropout=params.get('lstm').get('recurrent_dropout'),
            auxiliary=auxiliary,
            add_sigmoid=True,
            gpus=gpus,
            rnn_type='lstm')
    if model_label == 'gru':
        model = rnn(
            embedding_matrix,
            num_classes,
            max_seq_len,
            l2_weight_decay=params.get('gru').get('l2_weight_decay'),
            rnn_dim=params.get('gru').get('rnn_dim'),
            dropout_val=params.get('gru').get('dropout_val'),
            dense_dim=params.get('gru').get('dense_dim'),
            n_branches=params.get('gru').get('n_branches'),
            n_rnn_layers=params.get('gru').get('n_rnn_layers'),
            n_dense_layers=params.get('gru').get('n_dense_layers'),
            train_embeds=params.get('gru').get('train_embeds'),
            mask_zero=params.get('gru').get('mask_zero'),
            kernel_regularizer=params.get('gru').get('kernel_regularizer'),
            recurrent_regularizer=params.get('gru').get(
                'recurrent_regularizer'),
            activity_regularizer=params.get('gru').get('activity_regularizer'),
            dropout=params.get('gru').get('dropout'),
            recurrent_dropout=params.get('gru').get('recurrent_dropout'),
            auxiliary=auxiliary,
            add_sigmoid=True,
            gpus=gpus,
            rnn_type='gru')

    if model_label == 'charrnn':
        model = charrnn(
            len(word_index),
            num_classes,
            max_seq_len,
            rnn_dim=params.get('charrnn').get('rnn_dim'),
            dropout_val=params.get('charrnn').get('dropout_val'),
            auxiliary=auxiliary,
            dropout=params.get('charrnn').get('dropout'),
            recurrent_dropout=params.get('charrnn').get('recurrent_dropout'),
            add_sigmoid=True,
            gpus=gpus,
            rnn_type=rnn_type)
    if model_label == 'cnn2rnn':
        model = cnn2rnn(embedding_matrix,
                        num_classes,
                        max_seq_len,
                        rnn_type=rnn_type)
    if model_label == 'dpcnn':
        model = dpcnn(embedding_matrix,
                      num_classes,
                      max_seq_len,
                      num_filters=params.get('dpcnn').get('num_filters'),
                      dense_dim=params.get('dpcnn').get('dense_dim'),
                      add_sigmoid=True,
                      gpus=gpus)

    if model_label == 'rcnn':
        model = rcnn(
            embedding_matrix,
            num_classes,
            max_seq_len,
            rnn_dim=params.get('rcnn').get('rnn_dim'),
            dropout_val=params.get('rcnn').get('dropout_val'),
            dense_dim=params.get('rcnn').get('dense_dim'),
            train_embeds=params.get('rcnn').get('train_embeds'),
            auxiliary=auxiliary,
            dropout=params.get('rcnn').get('dropout'),
            recurrent_dropout=params.get('rcnn').get('recurrent_dropout'),
            add_sigmoid=True,
            gpus=gpus,
            rnn_type=rnn_type)
    if model_label == 'capsule':
        model = capsule(
            embedding_matrix,
            num_classes,
            max_seq_len,
            auxiliary=auxiliary,
            Num_capsule=params.get('capsule').get('Num_capsule'),
            Routings=params.get('capsule').get('Routing'),
            add_sigmoid=params.get('capsule').get('add_sigmoid'),
            mask_zero=params.get('capsule').get('mask_zero'),
            gpus=gpus,
            rnn_type='gru')  # lstm may diverge but gru works better

    if model == 'mvcnn':
        embeds_fname1 = "./data/crawl-300d-2M.vec"  # "./data/crawl-300d-2M.vec  word2vec-raw.txt
        embeds_fname2 = "./data/glove.840B.300d.txt"
        embeds_fname3 = "./data/GoogleNews-vectors-negative300.bin"
        embed_dim = 300
        embeds1 = Embeds(embeds_fname1, "glove", format='file')
        embeds2 = Embeds(embeds_fname2, "fasttext", format='file')
        embeds3 = Embeds(embeds_fname3, "word2vec", format='binary')
        embedding_matrix1, words_not_found1 = get_embedding_matrix(
            embed_dim, embeds1, max_words, word_index)
        embedding_matrix2, words_not_found2 = get_embedding_matrix(
            embed_dim, embeds2, max_words, word_index)
        #embedding_matrix3, words_not_found3 = get_embedding_matrix(embed_dim, embeds3, max_words, word_index)
        model = mvcnn(embedding_matrix1,
                      embedding_matrix2,
                      num_classes,
                      max_seq_len,
                      auxiliary=auxiliary,
                      gpus=gpus)

    # ====k-fold cross validations split data====
    logger.info('Run k-fold cross validation...')
    params = Params(config)
    kf = KFold(n_splits=num_folds, shuffle=True, random_state=random_seed)
    oof_train = np.zeros((x.shape[0], num_classes))
    oof_test_skf = []

    for i, (train_index, test_index) in enumerate(kf.split(x, y)):
        print("TRAIN:", train_index, "TEST:", test_index)
        x_train, x_aux_train, x_test, x_aux_test = x[train_index], x_aux[
            train_index], x[test_index], x_aux[test_index]
        y_train, y_test = y[train_index], y[test_index]
        logger.info('Start training {}-th fold'.format(i))
        if auxiliary:
            inputs = [x_train, x_aux_train]
            inputs_val = [x_test, x_aux_test]
            output = [test_df_seq, test_df_seq_aux]
        else:
            inputs = x_train
            inputs_val = x_test
            output = test_df_seq
        hist = train(
            x_train=
            inputs,  # [x_train, x_aux_train] when auxiliary input is allowed.
            y_train=y_train,
            x_val=inputs_val,  # [x_test, x_aux_test],
            y_val=y_test,
            model=model,
            batch_size=params.get(model_label).get('batch_size'),
            num_epochs=params.get(model_label).get('num_epochs'),
            learning_rate=params.get(model_label).get('learning_rate'),
            early_stopping_delta=params.get(model_label).get(
                'early_stopping_delta'),
            early_stopping_epochs=params.get(model_label).get(
                'early_stopping_epochs'),
            use_lr_strategy=params.get(model_label).get('use_lr_strategy'),
            lr_drop_koef=params.get(model_label).get('lr_drop_koef'),
            epochs_to_drop=params.get(model_label).get('epochs_to_drop'),
            model_checkpoint_dir=os.path.join('.', 'model_checkpoint', reg,
                                              model_label, embedding_type,
                                              padding, str(i)),
            logger=logger)

        model.load_weights(
            os.path.join('.', 'model_checkpoint', reg, model_label,
                         embedding_type, padding, str(i), 'weights.h5'))
        oof_train[test_index, :] = model.predict(
            inputs_val)  # model.predict([x_test, x_aux_test])
        proba = model.predict(
            output)  # model.predict([test_df_seq, test_df_seq_aux])
        oof_test_skf.append(proba)
        result = pd.read_csv("./data/sample_submission.csv")
        result[target_labels] = proba
        ithfold_path = "./cv/{}/{}/{}/{}/{}".format(reg, model_label,
                                                    embedding_type, padding, i)
        if not os.path.exists(ithfold_path):
            os.makedirs(ithfold_path)

        result.to_csv(os.path.join(ithfold_path, 'sub.csv'), index=False)
        # model.save(os.path.join(ithfold_path,'weights.h5'))

    # dump oof_test and oof_train for later slacking
    # oof_train:
    oof_train_path = "./cv/{}/{}/{}/{}/oof_train".format(
        reg, model_label, embedding_type, padding)
    if not os.path.exists(oof_train_path):
        os.makedirs(oof_train_path)

    np.savetxt(os.path.join(oof_train_path, "oof_train.csv"),
               oof_train,
               fmt='%.24f',
               delimiter=' ')
    # oof_test: stacking version
    oof_test = np.array(oof_test_skf).mean(axis=0)
    oof_test_path = "./cv/{}/{}/{}/{}/oof_test".format(reg, model_label,
                                                       embedding_type, padding)
    if not os.path.exists(oof_test_path):
        os.makedirs(oof_test_path)

    np.savetxt(os.path.join(oof_test_path, "oof_test.csv"),
               oof_test,
               fmt='%.24f',
               delimiter=' ')
    # oof_test: submission version
    result[target_labels] = oof_test
    oof_test_bag_path = "./cv/{}/{}/{}/{}/bagged".format(
        reg, model_label, embedding_type, padding)
    if not os.path.exists(oof_test_bag_path):
        os.makedirs(oof_test_bag_path)

    result.to_csv(os.path.join(oof_test_bag_path, "sub.csv"), index=False)
Пример #26
0
Файл: test.py Проект: koddev/men
from multiprocessing import Pool

import align.detect_face as detect_face
import cv2
import numpy as np

# import tensorflow as tf
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

from lib.face_utils import judge_side_face
from lib.utils import Logger, mkdir
from project_root_dir import project_dir
from src.sort import Sort

logger = Logger()


def main():
    opencv()


def testMultiProcess():
    p = Pool(processes=2)
    sonuclar = p.map(detectExtract, ["1", "2"])


def opencv():
    video_name = os.path.join("videos", "istiklal2.mp4")
    cam = cv2.VideoCapture(video_name)
    frames = []
Пример #27
0
def train(word_emb,
          vision_model,
          language_model,
          ent_loss_model,
          rel_loss_model,
          train_loader,
          val_loader,
          word_dict,
          ent_dict,
          pred_dict,
          n_epochs,
          val_freq,
          out_dir,
          cfg,
          grad_freq=0):

    os.makedirs(out_dir, exist_ok=True)
    params = list(vision_model.parameters()) + list(
        language_model.parameters())
    params = [param for param in params if param.requires_grad]
    named_params = list(vision_model.named_parameters()) + list(
        language_model.named_parameters())
    optimizer = torch.optim.Adam(params,
                                 lr=cfg.train.learning_rate,
                                 weight_decay=cfg.train.weight_decay)
    scheduler = torch.optim.lr_scheduler.StepLR(
        optimizer, step_size=1, gamma=cfg.train.learning_rate_decay)
    logger = Logger(os.path.join(out_dir, "log.txt"))
    tfb_logger = TFBLogger(out_dir)
    if grad_freq > 0: plt.ion()
    n_batches = len(train_loader)
    step = 0

    for epoch in range(n_epochs):

        scheduler.step()
        epoch_loss = 0.0

        if epoch % val_freq == 0:
            vision_model.train(False)
            language_model.train(False)
            ent_acc, rel_acc = validate(word_emb, vision_model, language_model,
                                        val_loader, word_dict, ent_dict,
                                        pred_dict,
                                        cfg.language_model.tokens_length,
                                        tfb_logger, step)
            logstr = "epoch %2d | ent acc(top20): %.3f | rel acc(top20): %.3f" % (
                epoch, ent_acc, rel_acc)
            logger.write("%-80s" % logstr)
            vision_model.train(True)
            language_model.train(True)

        tic_0 = time.time()

        for i, data in enumerate(train_loader):

            tic_1 = time.time()

            image_ids = data[0]
            if len(image_ids) < cfg.train.batch_size: continue
            images = data[1].cuda().float()
            sbj_boxes = data[2].cuda().float()
            obj_boxes = data[3].cuda().float()
            rel_boxes = data[4].cuda().float()
            sbj_tokens = data[5].cuda()
            obj_tokens = data[6].cuda()
            rel_tokens = data[7].cuda()
            sbj_seq_lens = data[8].cuda().long()
            obj_seq_lens = data[9].cuda().long()
            rel_seq_lens = data[10].cuda().long()

            tic_2 = time.time()

            optimizer.zero_grad()

            sbj_t_emb = language_model(word_emb(sbj_tokens), sbj_seq_lens)
            obj_t_emb = language_model(word_emb(obj_tokens), obj_seq_lens)
            rel_t_emb = language_model(word_emb(rel_tokens), rel_seq_lens)
            sbj_v_emb, obj_v_emb, rel_v_emb = vision_model(
                images, sbj_boxes, obj_boxes, rel_boxes)

            sbj_loss = ent_loss_model(sbj_v_emb, sbj_t_emb)
            obj_loss = ent_loss_model(obj_v_emb, obj_t_emb)
            rel_loss = rel_loss_model(rel_v_emb, rel_t_emb)

            loss = sbj_loss + obj_loss + rel_loss

            tic_3 = time.time()

            loss.backward()
            optimizer.step()

            tic_4 = time.time()

            if grad_freq > 0 and i % grad_freq == 0:
                for n, p in named_params:
                    if not "bias" in n:
                        name_path = n.replace(".", "/")
                        tfb_logger.histo_summary("grad/%s" % name_path,
                                                 p.grad.data.cpu().numpy(),
                                                 step)

            epoch_loss += loss.item() * train_loader.batch_size

            logstr = "epoch %2d batch %4d/%4d | loss %5.2f | %4dms | ^ %4dms | => %4dms" % \
                     (epoch+1, i+1, n_batches, loss.item(),
                      1000*(tic_4-tic_0), 1000*(tic_2-tic_0), 1000*(tic_4-tic_2))
            print("%-80s" % logstr, end="\r")
            tfb_logger.scalar_summary("loss/ent",
                                      sbj_loss.item() + obj_loss.item(), step)
            tfb_logger.scalar_summary("loss/rel", rel_loss.item(), step)
            tfb_logger.scalar_summary("loss/total", loss.item(), step)

            tic_0 = time.time()
            step += train_loader.batch_size

        epoch_loss /= n_batches * train_loader.batch_size

        logstr = "epoch %2d | train_loss: %.3f" % (epoch + 1, epoch_loss)
        logger.write("%-80s" % logstr)

        vision_model_path = os.path.join(out_dir,
                                         "vision_model_%d.pth" % (epoch + 1))
        torch.save(vision_model.state_dict(), vision_model_path)
        language_model_path = os.path.join(
            out_dir, "language_model_%d.pth" % (epoch + 1))
        torch.save(language_model.state_dict(), language_model_path)