Пример #1
0
def save_entry(sender):
    vs = sender.superview
    date = get_datetime(vs)
    data = get_info(vs, date)
    #dayif data['dateTime']
    #^ this if for implementing the postprandial alerts.
    file = _path + '/' + 'journal.jl'
    log_success = log.save(data, file)
    save_status_alert(log_success)
    if log_success:
        views[0]['label2'].text = 'Out of Sync'
Пример #2
0
def log():
    """Receives a Slack channel message and passes it off to slack_log()

    Args:
        request (object): the Flask request object, including the the form-
             encoded message fields which Slack POSTs

    Returns:
        bool: True, or HTTP 500.
    """
    if config.log: print('log()')
    if logger.save(request):
        return jsonify({'text':''})
    else:
        if config.log: print('log() failed')
        abort(500)
Пример #3
0
def main(args, logger):
    # --------------------------------------------------------------------------
    # MODEL
    logger.print('-' * 100)
    start_epoch = 1
    if args.only_test:
        if not os.path.isfile(args.model_file):
            raise IOError('No such file: %s' % args.model_file)
        model = VarmisuseModel.load(args.model_file)
    else:
        if args.checkpoint and os.path.isfile(args.model_file + '.checkpoint'):
            # Just resume training, no modifications.
            logger.print('Found a checkpoint...')
            checkpoint_file = args.model_file + '.checkpoint'
            model, start_epoch = VarmisuseModel.load_checkpoint(
                checkpoint_file, args.cuda)
        else:
            # Training starts fresh. But the model state is either pretrained or
            # newly (randomly) initialized.
            if args.pretrained:
                logger.print('Using pretrained model...')
                model = VarmisuseModel.load(args.pretrained, args)
            else:
                logger.print('Training model from scratch...')
                model = init_from_scratch(args, logger)

            # Set up optimizer
            model.init_optimizer()
            # log the parameter details
            logger.print(
                'Trainable #parameters [encoder-decoder] {} [total] {}'.format(
                    human_format(model.network.count_encoder_parameters() +
                                 model.network.count_decoder_parameters()),
                    human_format(model.network.count_parameters())))
            table = model.network.layer_wise_parameters()
            logger.print('Breakdown of the trainable paramters\n%s' % table)

    # Use the GPU?
    if args.cuda:
        model.cuda()

    if args.parallel:
        model.parallelize()

    # --------------------------------------------------------------------------
    # DATA ITERATORS
    # Two datasets: train and dev. If we sort by length it's faster.
    logger.print('-' * 100)
    logger.print('Make data loaders')

    dev_files = dict()
    dev_files['src'] = args.dev_src_file
    dev_files['src_tag'] = args.dev_src_tag_file
    dev_files['tgt'] = args.dev_tgt_file
    if not args.only_test:
        train_files = dict()
        train_files['src'] = args.train_src_file
        train_files['src_tag'] = args.train_src_tag_file
        train_files['tgt'] = args.train_tgt_file

        train_dataset = data_utils.VarmisuseDataset(model, args, train_files)
        if args.sort_by_len:
            train_sampler = data.SortedBatchSampler(train_dataset.lengths(),
                                                    args.batch_size,
                                                    shuffle=True)
        else:
            train_sampler = torch.utils.data.sampler.RandomSampler(
                train_dataset)

        train_loader = torch.utils.data.DataLoader(
            train_dataset,
            batch_size=args.batch_size,
            sampler=train_sampler,
            num_workers=args.data_workers,
            collate_fn=data_utils.batchify_varmisuse,
            pin_memory=args.cuda,
            drop_last=args.parallel)

    dev_dataset = data_utils.VarmisuseDataset(model, args, dev_files)
    dev_sampler = torch.utils.data.sampler.SequentialSampler(dev_dataset)

    dev_loader = torch.utils.data.DataLoader(
        dev_dataset,
        batch_size=args.test_batch_size,
        sampler=dev_sampler,
        num_workers=args.data_workers,
        collate_fn=data_utils.batchify_varmisuse,
        pin_memory=args.cuda,
        drop_last=args.parallel)

    # -------------------------------------------------------------------------
    # PRINT CONFIG
    logger.print('-' * 100)
    logger.print('CONFIG:\n%s' %
                 json.dumps(vars(args), indent=4, sort_keys=True))

    # --------------------------------------------------------------------------
    # DO TEST

    if args.only_test:
        stats = {
            'timer': Timer(),
            'epoch': 100000,
            'best_valid': 0,
            'no_improvement': 0
        }
        validate_official(args, dev_loader, model, stats, logger, mode='test')
        logger.save(silent=True)

    # --------------------------------------------------------------------------
    # TRAIN/VALID LOOP
    else:
        logger.print('-' * 100)
        logger.print('Starting training...')
        stats = {
            'timer': Timer(),
            'epoch': start_epoch,
            'best_valid': 0,
            'no_improvement': 0
        }

        if args.optimizer in ['sgd', 'adam'
                              ] and args.warmup_epochs >= start_epoch:
            logger.print(
                "Use warmup lrate for the %d epoch, from 0 up to %s." %
                (args.warmup_epochs, args.learning_rate))
            num_batches = len(train_loader.dataset) // args.batch_size
            warmup_factor = (args.learning_rate + 0.) / (num_batches *
                                                         args.warmup_epochs)
            stats['warmup_factor'] = warmup_factor

        for epoch in range(start_epoch, args.num_epochs + 1):
            stats['epoch'] = epoch
            if args.optimizer in ['sgd', 'adam'
                                  ] and epoch > args.warmup_epochs + 1:
                model.optimizer.param_groups[0]['lr'] = \
                    model.optimizer.param_groups[0]['lr'] * args.lr_decay

            train(args, train_loader, model, stats, logger)
            if epoch % args.print_fq == 0:
                model.save(logger.path + '/best_model.cpt')
                result = validate_official(args, dev_loader, model, stats,
                                           logger)
            logger.save(silent=True)
            if epoch % args.save_fq == 0:
                model.save(logger.path + '/model_epoch%d.cpt' % epoch)

            # Save best valid
            if ((epoch % args.print_fq == 0) and \
                              (result[args.valid_metric] > stats['best_valid'])):
                logger.print('Best valid: %s = %.2f (epoch %d, %d updates)' %
                             (args.valid_metric, result[args.valid_metric],
                              stats['epoch'], model.updates))
                stats['best_valid'] = result[args.valid_metric]
                stats['no_improvement'] = 0
            else:
                stats['no_improvement'] += 1
                if stats['no_improvement'] >= args.early_stop:
                    break
Пример #4
0
def main(args, logger):
    # --------------------------------------------------------------------------
    # DATA
    logger.print('-' * 100)
    logger.print('Load and process data files')

    train_exs = []
    if not args.only_test:
        args.dataset_weights = dict()
        for train_src, train_src_tag, train_tgt, train_rel_matrix, dataset_name in \
                zip(args.train_src_files, args.train_src_tag_files,
                    args.train_tgt_files, args.train_rel_matrix_files,\
                    args.dataset_name):
            train_files = dict()
            train_files['src'] = train_src
            train_files['src_tag'] = train_src_tag
            train_files['tgt'] = train_tgt
            train_files["rel_matrix"] = train_rel_matrix
            exs = util.load_data(args,
                                 train_files,
                                 max_examples=args.max_examples,
                                 dataset_name=dataset_name)
            lang_name = constants.DATA_LANG_MAP[dataset_name]
            args.dataset_weights[constants.LANG_ID_MAP[lang_name]] = len(exs)
            train_exs.extend(exs)

        logger.print('Num train examples = %d' % len(train_exs))
        args.num_train_examples = len(train_exs)
        for lang_id in args.dataset_weights.keys():
            weight = (1.0 * args.dataset_weights[lang_id]) / len(train_exs)
            args.dataset_weights[lang_id] = round(weight, 2)
        logger.print('Dataset weights = %s' % str(args.dataset_weights))

    dev_exs = []
    for dev_src, dev_src_tag, dev_tgt, dev_rel_matrix, dataset_name in \
            zip(args.dev_src_files, args.dev_src_tag_files,
                args.dev_tgt_files, args.dev_rel_matrix_files, args.dataset_name):
        dev_files = dict()
        dev_files['src'] = dev_src
        dev_files['src_tag'] = dev_src_tag
        dev_files['tgt'] = dev_tgt
        dev_files["rel_matrix"] = dev_rel_matrix
        exs = util.load_data(args,
                             dev_files,
                             max_examples=args.max_examples,
                             dataset_name=dataset_name,
                             test_split=True)
        dev_exs.extend(exs)
    logger.print('Num dev examples = %d' % len(dev_exs))

    # --------------------------------------------------------------------------
    # MODEL
    logger.print('-' * 100)
    start_epoch = 1
    if args.only_test:
        #if args.pretrained:
        #    model = Code2NaturalLanguage.load(args.pretrained)
        #else:
        if not os.path.isfile(args.model_file):
            raise IOError('No such file: %s' % args.model_file)
        model = Code2NaturalLanguage.load(args.model_file)
    else:
        if args.checkpoint and os.path.isfile(args.model_file + '.checkpoint'):
            # Just resume training, no modifications.
            logger.print('Found a checkpoint...')
            checkpoint_file = args.model_file + '.checkpoint'
            model, start_epoch = Code2NaturalLanguage.load_checkpoint(
                checkpoint_file, args.cuda)
        else:
            # Training starts fresh. But the model state is either pretrained or
            # newly (randomly) initialized.
            if args.pretrained:
                logger.print('Using pretrained model...')
                model = Code2NaturalLanguage.load(args.pretrained, args)
            else:
                logger.print('Training model from scratch...')
                model = init_from_scratch(args, train_exs, dev_exs, logger)

            # Set up optimizer
            model.init_optimizer()
            # log the parameter details
            logger.print(
                'Trainable #parameters [encoder-decoder] {} [total] {}'.format(
                    human_format(model.network.count_encoder_parameters() +
                                 model.network.count_decoder_parameters()),
                    human_format(model.network.count_parameters())))
            table = model.network.layer_wise_parameters()
            logger.print('Breakdown of the trainable paramters\n%s' % table)

    # Use the GPU?
    if args.cuda:
        model.cuda()

    if args.parallel:
        model.parallelize()

    # --------------------------------------------------------------------------
    # DATA ITERATORS
    # Two datasets: train and dev. If we sort by length it's faster.
    logger.print('-' * 100)
    logger.print('Make data loaders')

    if not args.only_test:
        train_dataset = data.CommentDataset(train_exs, model)
        if args.sort_by_len:
            train_sampler = data.SortedBatchSampler(train_dataset.lengths(),
                                                    args.batch_size,
                                                    shuffle=True)
        else:
            train_sampler = torch.utils.data.sampler.RandomSampler(
                train_dataset)

        train_loader = torch.utils.data.DataLoader(
            train_dataset,
            batch_size=args.batch_size,
            sampler=train_sampler,
            num_workers=args.data_workers,
            collate_fn=vector.batchify,
            pin_memory=args.cuda,
            drop_last=args.parallel)

    dev_dataset = data.CommentDataset(dev_exs, model)
    dev_sampler = torch.utils.data.sampler.SequentialSampler(dev_dataset)

    dev_loader = torch.utils.data.DataLoader(dev_dataset,
                                             batch_size=args.test_batch_size,
                                             sampler=dev_sampler,
                                             num_workers=args.data_workers,
                                             collate_fn=vector.batchify,
                                             pin_memory=args.cuda,
                                             drop_last=args.parallel)

    # -------------------------------------------------------------------------
    # PRINT CONFIG
    logger.print('-' * 100)
    logger.print('CONFIG:\n%s' %
                 json.dumps(vars(args), indent=4, sort_keys=True))

    # --------------------------------------------------------------------------
    # DO TEST

    if args.only_test:
        stats = {
            'timer': Timer(),
            'epoch': 100000,
            'best_valid': 0,
            'no_improvement': 0
        }
        validate_official(args, dev_loader, model, stats, logger, mode='test')

    # --------------------------------------------------------------------------
    # TRAIN/VALID LOOP
    else:
        logger.print('-' * 100)
        logger.print('Starting training...')
        stats = {
            'timer': Timer(),
            'epoch': start_epoch,
            'best_valid': 0,
            'no_improvement': 0
        }

        if args.optimizer in ['sgd', 'adam'
                              ] and args.warmup_epochs >= start_epoch:
            logger.print(
                "Use warmup lrate for the %d epoch, from 0 up to %s." %
                (args.warmup_epochs, args.learning_rate))
            num_batches = len(train_loader.dataset) // args.batch_size
            warmup_factor = (args.learning_rate + 0.) / (num_batches *
                                                         args.warmup_epochs)
            stats['warmup_factor'] = warmup_factor

        for epoch in range(start_epoch, args.num_epochs + 1):
            stats['epoch'] = epoch
            if args.optimizer in ['sgd', 'adam'
                                  ] and epoch > args.warmup_epochs:
                model.optimizer.param_groups[0]['lr'] = \
                    model.optimizer.param_groups[0]['lr'] * args.lr_decay

            train(args, train_loader, model, stats, logger)
            if epoch % args.print_fq == 0:
                result = validate_official(args, dev_loader, model, stats,
                                           logger)
            logger.save(silent=True)

            # Save best valid
            if ((epoch % args.print_fq == 0) and \
                              (result[args.valid_metric] > stats['best_valid'])):
                logger.print('Best valid: %s = %.2f (epoch %d, %d updates)' %
                             (args.valid_metric, result[args.valid_metric],
                              stats['epoch'], model.updates))
                model.save(logger.path + '/best_model.cpt')
                stats['best_valid'] = result[args.valid_metric]
                stats['no_improvement'] = 0
            else:
                stats['no_improvement'] += 1
                if stats['no_improvement'] >= args.early_stop:
                    break
def main():
    parser = argparse.ArgumentParser(description='gpat train')
    parser.add_argument("out")
    parser.add_argument('--resume', default=None)
    parser.add_argument('--log_dir', default='runs_active')
    parser.add_argument('--gpu', '-g', type=int, default=0)
    parser.add_argument('--iterations',
                        default=10**5,
                        type=int,
                        help='number of iterations to learn')
    parser.add_argument('--interval',
                        default=100,
                        type=int,
                        help='number of iterations to evaluate')
    parser.add_argument('--batch_size',
                        '-b',
                        type=int,
                        default=64,
                        help='learning minibatch size')
    parser.add_argument('--lr', type=float, default=1e-4)
    parser.add_argument('--loaderjob', type=int, default=8)
    parser.add_argument('--hed',
                        dest='hed',
                        action='store_true',
                        default=False)
    parser.add_argument('--from_tiff',
                        dest='from_tiff',
                        action='store_true',
                        default=False)
    parser.add_argument('--no-texture',
                        dest='texture',
                        action='store_false',
                        default=True)
    parser.add_argument('--cbp',
                        dest='cbp',
                        action='store_true',
                        default=False)
    parser.add_argument('--no-color_aug',
                        dest='color_aug',
                        action='store_false',
                        default=True)
    parser.add_argument('--model_test', default='', type=str)

    parser.add_argument('--arch',
                        default='googlenet',
                        choices=[
                            'texturecnn', 'resnet', 'googlenet', 'vgg', 'alex',
                            'trained', 'resume'
                        ])
    parser.add_argument('--opt', default='adam', choices=['adam', 'momentum'])
    parser.add_argument('--train_path', default='train_extracted_dataset.pkl')
    parser.add_argument('--test_path', default='test_extracted_dataset.pkl')

    parser.add_argument('--epoch_interval', default=20, type=int)

    parser.add_argument('--active_sample_size', type=int, default=100)
    parser.add_argument('--no-every_init',
                        dest='every_init',
                        action='store_false',
                        default=True)

    parser.add_argument('--random_sample', action='store_true', default=False)
    parser.add_argument('--fixed_ratio', action='store_true', default=False)
    parser.add_argument('--label_init',
                        choices=['random', 'clustering'],
                        default='clustering')
    parser.add_argument('--init_size', default=100, type=int)

    parser.add_argument('--uncertain', action='store_true', default=False)
    parser.add_argument('--uncertain_with_dropout',
                        action='store_true',
                        default=False)
    parser.add_argument('--uncertain_strategy',
                        choices=['entropy', 'least_confident', 'margin'],
                        default='margin')

    parser.add_argument('--clustering', action='store_true', default=False)
    parser.add_argument('--kmeans_cache',
                        default='initial_clustering_result.pkl')
    parser.add_argument('--initial_label_cache',
                        default='initial_label_cache.npy')

    parser.add_argument('--query_by_committee',
                        action='store_true',
                        default=False)
    parser.add_argument('--qbc_strategy',
                        choices=['vote', 'average_kl'],
                        default='average_kl')
    parser.add_argument('--committee_size', default=10, type=int)

    parser.add_argument('--aug_in_inference',
                        action='store_true',
                        default=False)

    args = parser.parse_args()

    device = args.gpu

    # log directory
    logger.init(args)

    # load data
    train_dataset = np.load(os.path.join(dataset_path, args.train_path))
    test_dataset = np.load(os.path.join(dataset_path, args.test_path))
    num_class = 2
    image_size = 256
    crop_size = 224

    preprocess_type = args.arch if not args.hed else 'hed'
    perm = np.random.permutation(len(test_dataset))[:10000]
    test_dataset = [test_dataset[idx] for idx in perm]
    test = CamelyonDatasetEx(test_dataset,
                             original_size=image_size,
                             crop_size=crop_size,
                             aug=False,
                             color_aug=False,
                             preprocess_type=preprocess_type)
    test_iter = iterators.MultiprocessIterator(test,
                                               args.batch_size,
                                               repeat=False,
                                               shuffle=False)

    cbp_feat = np.load('train_cbp512_feat.npy')
    labeled_data, unlabeled_data, feat = initialize_labeled_dataset(
        args, train_dataset, cbp_feat)
    print('now {} labeled samples, {} unlabeled'.format(
        len(labeled_data), len(unlabeled_data)))

    # start training
    reporter = ProgresssReporter(args)
    for iteration in range(100):

        # model construct
        if args.texture:
            model = BilinearCNN(base_cnn=args.arch,
                                pretrained_model='auto',
                                num_class=num_class,
                                texture_layer=None,
                                cbp=args.cbp,
                                cbp_size=4096)
        else:
            model = TrainableCNN(base_cnn=args.arch,
                                 pretrained_model='auto',
                                 num_class=num_class)

        # set optimizer
        optimizer = make_optimizer(model, args.opt, args.lr)

        # use gpu
        cuda.get_device_from_id(device).use()
        model.to_gpu()

        labeled_dataset = CamelyonDatasetEx(labeled_data,
                                            original_size=image_size,
                                            crop_size=crop_size,
                                            aug=True,
                                            color_aug=True,
                                            preprocess_type=preprocess_type)
        labeled_iter = iterators.MultiprocessIterator(labeled_dataset,
                                                      args.batch_size)

        # train phase
        count = 0
        train_loss = 0
        train_acc = 0
        epoch_interval = args.epoch_interval if len(
            labeled_data[0]) < 10000 else args.epoch_interval * 2
        anneal_epoch = int(epoch_interval * 0.8)
        while labeled_iter.epoch < epoch_interval:
            # train with labeled dataset
            batch = labeled_iter.next()
            x, t = chainer.dataset.concat_examples(batch, device=device)
            optimizer.update(model, x, t)
            reporter(labeled_iter.epoch)

            if labeled_iter.is_new_epoch and labeled_iter.epoch == anneal_epoch:
                optimizer.alpha *= 0.1

            if labeled_iter.epoch > args.epoch_interval - 5:
                count += len(batch)
                train_loss += model.loss.data * len(batch)
                train_acc += model.accuracy.data * len(batch)

                # if labeled_iter.is_new_epoch:
                #     train_loss_tmp = cuda.to_cpu(train_loss) / len(labeled_iter.dataset)
                #     loss_history.append(train_loss_tmp - np.sum(loss_history))

        reporter.reset()

        logger.plot('train_loss', cuda.to_cpu(train_loss) / count)
        logger.plot('train_accuracy', cuda.to_cpu(train_acc) / count)

        # test
        print('\ntest')
        with chainer.using_config('train', False), chainer.no_backprop_mode():
            evaluate_ex(model, test_iter, device)

        # logger
        logger.flush()

        if len(labeled_data[0]) >= 10000:
            print('done')
            exit()

        tmp_indices = np.random.permutation(len(unlabeled_data))[:10000]
        tmp_unlabeled_data = [unlabeled_data[idx] for idx in tmp_indices]
        tmp_cbp_feat = cbp_feat[tmp_indices]

        unlabeled_dataset = CamelyonDatasetEx(tmp_unlabeled_data,
                                              original_size=image_size,
                                              crop_size=crop_size,
                                              aug=args.aug_in_inference,
                                              color_aug=args.aug_in_inference,
                                              preprocess_type=preprocess_type)
        unlabeled_iter = iterators.MultiprocessIterator(unlabeled_dataset,
                                                        args.batch_size,
                                                        repeat=False,
                                                        shuffle=False)

        preds = np.zeros((args.committee_size, len(tmp_unlabeled_data), 2))
        # feat = np.zeros((len(unlabeled_iter.dataset), 784))
        if args.random_sample:
            tmp_query_indices = np.random.permutation(
                len(tmp_unlabeled_data))[:args.active_sample_size]
        else:
            loop_num = args.committee_size
            for loop in range(loop_num):
                count = 0
                for batch in unlabeled_iter:
                    x, t = chainer.dataset.concat_examples(batch,
                                                           device=device)
                    with chainer.no_backprop_mode():
                        y = F.softmax(model.forward(x))
                    preds[loop, count:count + len(batch)] = cuda.to_cpu(y.data)
                    count += len(batch)
                    # if loop == 0:
                    #     feat[i * batch_size: (i + 1) * batch_size] = cuda.to_cpu(x)
                unlabeled_iter.reset()
            tmp_query_indices = active_annotation(preds,
                                                  tmp_cbp_feat,
                                                  opt=args)

        # active sampling
        print('active sampling: ', end='')

        if iteration % 10 == 0:
            logger.save(model,
                        [tmp_unlabeled_data[idx] for idx in tmp_query_indices])

        query_indices = tmp_indices[tmp_query_indices]
        labeled_data, unlabeled_data, cbp_feat = query_dataset(
            labeled_data, unlabeled_data, cbp_feat, query_indices)
        print('now {} labeled samples, {} unlabeled'.format(
            len(labeled_data), len(unlabeled_data)))
Пример #6
0
def save_entry(sender):
    vs = sender.superview
    date = get_datetime(vs)
    data = get_info(vs, date)
    log_success = log.save(data)
    save_status_alert(log_success)