예제 #1
0
def main(argv):
    del argv
    
    tf.logging.set_verbosity(tf.logging.ERROR)

    hparams = _default_hparams()
    
    if FLAGS.mode == 'train':
        utils.train_and_eval(
            model_dir=FLAGS.model_dir,
            model_fn=model_fn,
            input_fn=create_input_fn,
            hparams=hparams,
            steps=FLAGS.steps,
            batch_size=FLAGS.batch_size,
       )
    elif FLAGS.mode == 'predict':
        import cv2
        from matplotlib import pyplot as plt
        from gen_tfrecords import central_crop
        with tf.Session() as sess:
            unet = utils.UNet(FLAGS.model_dir, sess)
            im = central_crop(cv2.imread(FLAGS.image), vw, vh) / 255.0
            t = time()
            mask = unet.run(im)
            print("Inference took %f ms" % (1000*(time()-t)))
            image = .3 * im + .7 * np.squeeze(mask)[...,np.newaxis]
            plt.imshow(image)
            plt.show()
    else:
        raise ValueError("Unknown mode: %s" % FLAGS.mode)
예제 #2
0
def main(argv):
    del argv
    f = open("eval_output.txt", "a+")
    hparams = _default_hparams()

    if FLAGS.predict:
        predict(FLAGS.input, hparams, path=FLAGS.path)
    elif FLAGS.test:
        print("Testing")

        output = utils.eval(model_dir=FLAGS.model_dir,
                            model_fn=model_fn,
                            input_fn=create_test_input_fn,
                            hparams=hparams,
                            steps=FLAGS.steps,
                            batch_size=FLAGS.batch_size,
                            save_checkpoints_secs=600,
                            eval_throttle_secs=1800,
                            eval_steps=5,
                            sync_replicas=FLAGS.sync_replicas,
                            task="test",
                            path=FLAGS.path)
        print("Angular median: ", np.median(output['angular_array']))
        print("Angular loss: ", output['angular_loss'])
        f.close
    elif FLAGS.eval:
        print("Evaluating")
        output = utils.eval(model_dir=FLAGS.model_dir,
                            model_fn=model_fn,
                            input_fn=create_test_input_fn,
                            hparams=hparams,
                            steps=FLAGS.steps,
                            batch_size=FLAGS.batch_size,
                            save_checkpoints_secs=600,
                            eval_throttle_secs=1800,
                            eval_steps=5,
                            sync_replicas=FLAGS.sync_replicas,
                            task="eval",
                            path=FLAGS.path)
        array_degree = output['angular_array'] * 180 / math.pi
        print("Angular median: ", np.median(array_degree))
        print("Angular loss: ", output['angular_loss'] * 180 / math.pi)
        plt.hist(array_degree, bins=50, range=(0, 180))
        plt.show()
        f.close
    else:
        utils.train_and_eval(
            model_dir=FLAGS.model_dir,
            model_fn=model_fn,
            input_fn=create_input_fn,
            hparams=hparams,
            steps=FLAGS.steps,
            batch_size=FLAGS.batch_size,
            save_checkpoints_secs=600,
            eval_throttle_secs=1800,
            eval_steps=5,
            sync_replicas=FLAGS.sync_replicas,
        )
예제 #3
0
def main():
    x, y = load_dataset('data/amazon_reviews_multilingual_JP_v1_00.tsv',
                        n=5000)

    print('Tokenization')
    x = [clean_html(text, strip=True) for text in x]
    x = [' '.join(tokenize(text)) for text in x]
    x_train, x_test, y_train.y_test = train_test_split(x,
                                                       y,
                                                       test_size=0.2,
                                                       random_state=42)

    print('Binary')
    vectorizer = CountVectorizer(binary=True)
    train_and_eval(x_train, y_train, x_test, y_test, vectorizer)

    print('COunt')
    vectorizer = CountVectorizer(binary=False)
    train_and_eval(x_train, y_train, x_test, y_test, vectorizer)

    print('TF-IDF')
    vectorizer = TfidfVectorizer()
    train_and_eval(x_train, y_train, x_test, y_test, vectorizer)

    print('Bigram')
    vectorizer = TfidfVectorizer(ngram_range=(1, 2))
    train_and_eval(x_train, y_train, x_test, y_test, vectorizer)
예제 #4
0
def main(argv):
    del argv
    
    tf.logging.set_verbosity(tf.logging.ERROR)
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333)
    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

    if FLAGS.mode == 'train':
        hparams = _default_hparams()

        utils.train_and_eval(
            model_dir=FLAGS.model_dir,
            model_fn=model_fn,
            input_fn=create_input_fn,
            hparams=hparams,
            steps=FLAGS.steps,
            batch_size=FLAGS.batch_size,
       )

    elif FLAGS.mode == 'save_desc':
        import test_net_save_desc
        print('### model save_desc')
        test_net_save_desc.save_desc(FLAGS.model_dir, FLAGS.data_dir,
                      FLAGS.n_include, FLAGS.title)

    elif FLAGS.mode == 'pr':
        import test_net

        test_net.plot(FLAGS.model_dir, FLAGS.data_dir,
                FLAGS.n_include, FLAGS.title, netvlad_feat=FLAGS.netvlad_feat,
                include_calc=FLAGS.include_calc)
    
    elif FLAGS.mode == 'best':
        import test_net

        test_net.find_best_checkpoint(FLAGS.model_dir, FLAGS.data_dir,
                FLAGS.n_include)
    
    elif FLAGS.mode == 'ex':

        utils.show_example(FLAGS.image_fl, FLAGS.model_dir)

    else:
        raise ValueError("Unrecognized mode: %s" % FLAGS.mode)
예제 #5
0
def main():
    x, y = load_dataset('data/amazon_reviews_multilingual_JP_v1_00.tsv',
                        n=1000)

    x_train, x_test, y_train, y_test = train_test_split(x,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=42)

    print('Tokenization only.')
    train_and_eval(x_train, y_train, x_test, y_test, tokenize=tokenize)

    print('Clean html.')
    train_and_eval(x_train,
                   y_train,
                   x_test,
                   y_test,
                   tokenize=tokenize,
                   preprocessor=clean_html)

    print('Normalize number.')
    train_and_eval(x_train,
                   y_train,
                   x_test,
                   y_test,
                   tokenize=tokenize,
                   preprocessor=namaline_number)

    print('Base form.')
    train_and_eval(x_train,
                   y_train,
                   x_test,
                   y_test,
                   tokenize=tokenize_base_form)

    print('Lower text.')
    train_and_eval(x_train,
                   y_train,
                   x_test,
                   y_test,
                   tokenize=tokenize,
                   lowercase=True)
예제 #6
0
def main(argv):
  del argv

  hparams = _default_hparams()

  if FLAGS.predict:
    predict(FLAGS.input, hparams)
  else:
    utils.train_and_eval(
        model_dir=FLAGS.model_dir,
        model_fn=model_fn,
        input_fn=create_input_fn,
        hparams=hparams,
        steps=FLAGS.steps,
        batch_size=FLAGS.batch_size,
        save_checkpoints_secs=600,
        eval_throttle_secs=1800,
        eval_steps=5,
        sync_replicas=FLAGS.sync_replicas,
    )
예제 #7
0
def main(argv):
    del argv

    if args.test:
        test()

    else:
        for steps in range(1, 5):
            args.steps = steps * 100000
            args.lr /= 2
            hparams = _default_hparams()
            print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
                  " steps:", args.steps, " lr=", args.lr)
            utils.train_and_eval(model_dir=args.model_dir,
                                 model_fn=model_fn,
                                 create_input_fn=create_input_fn,
                                 create_test_input_fn=create_test_input_fn,
                                 hparams=hparams,
                                 steps=args.steps,
                                 batch_size=args.batch_size,
                                 sync_replicas=args.sync_replicas)
            test()
예제 #8
0
def main(argv):
    del argv

    tf.logging.set_verbosity(tf.logging.ERROR)

    if FLAGS.mode == 'train':
        hparams = _default_hparams()

        utils.train_and_eval(
            model_dir=FLAGS.model_dir,
            model_fn=model_fn,
            input_fn=create_input_fn,
            hparams=hparams,
            steps=FLAGS.steps,
            batch_size=FLAGS.batch_size,
        )
    elif FLAGS.mode == 'pr':
        import test_net

        test_net.plot(FLAGS.model_dir,
                      FLAGS.data_dir,
                      FLAGS.n_include,
                      FLAGS.title,
                      netvlad_feat=FLAGS.netvlad_feat,
                      include_calc=FLAGS.include_calc)

    elif FLAGS.mode == 'best':
        import test_net

        test_net.find_best_checkpoint(FLAGS.model_dir, FLAGS.data_dir,
                                      FLAGS.n_include)

    elif FLAGS.mode == 'ex':

        utils.show_example(FLAGS.image_fl, FLAGS.model_dir)

    else:
        raise ValueError("Unrecognized mode: %s" % FLAGS.mode)
예제 #9
0
def main(argv):

    del argv
    f = open("eval_output.txt", "a+")
    hparams = _default_hparams()

    if FLAGS.predict:
        predict(FLAGS.input, hparams)
    elif FLAGS.eval:
        print("Running Inference")
        output = utils.eval(
            model_dir=FLAGS.model_dir,
            model_fn=model_fn,
            input_fn=create_input_fn,
            hparams=hparams,
            steps=FLAGS.steps,
            batch_size=FLAGS.batch_size,
            save_checkpoints_secs=600,
            eval_throttle_secs=1800,
            eval_steps=5,
            sync_replicas=FLAGS.sync_replicas,
        )
        f.write(str(output))
        f.close
    else:
        utils.train_and_eval(
            model_dir=FLAGS.model_dir,
            model_fn=model_fn,
            input_fn=create_input_fn,
            hparams=hparams,
            steps=FLAGS.steps,
            batch_size=FLAGS.batch_size,
            save_checkpoints_secs=600,
            eval_throttle_secs=1800,
            eval_steps=5,
            sync_replicas=FLAGS.sync_replicas,
        )
예제 #10
0
## -------------
## Train:3largebio; Test: anatomy  (retrain 0,1)

print("----------------\nTrain:3largebio; Test: anatomy  (retrain 0,1)")

cross_tuples = [
    ([Xy_bins_lb1, Xy_bins_lb2, Xy_bins_lb3], [Xy_bins_an], "1"),
]

results_path = os.path.join("data", "largebio_anatomy_paper.pkl")

if not os.path.isfile(results_path):
    df_results = u.train_and_eval(cross_tuples,
                                  classifiers,
                                  classifier_kwargs,
                                  undersample=True,
                                  save='data/largebio_anatomy_paper.pkl')
else:
    pickle_off = open("data/largebio_anatomy_paper.pkl", "rb")
    df_results = pickle.load(pickle_off)

## --------------
## Train LB with lb/conference features
print('----------------\nTrain LB with lb/conference features')

conf_lb_features = [
    'measure_aml', 'measure_dome', 'measure_logmap', 'measure_logmaplt',
    'measure_wiktionary'
]
예제 #11
0
            optimizer = optim.SGD(filter(lambda x: x.requires_grad,
                                         model.parameters()),
                                  lr=config.base_lr[0],
                                  momentum=0.9)
            scheduler = CyclicLR(optimizer=optimizer,
                                 base_lr=config.base_lr[0],
                                 max_lr=config.max_lr[0],
                                 step_size=config.step_size,
                                 mode='triangular2')
        elif config.model_name == 'resnet50':
            model = Resnet50(config.num_classes)
            optimizer = optim.SGD(filter(lambda x: x.requires_grad,
                                         model.parameters()),
                                  lr=config.base_lr[0],
                                  momentum=0.9)
            scheduler = CyclicLR(optimizer=optimizer,
                                 base_lr=config.base_lr[0],
                                 max_lr=config.max_lr[0],
                                 step_size=config.step_size,
                                 mode='triangular2')
        else:
            raise RuntimeError(
                'model name must be one of [resnet50, se_resnext50, densenet201]'
            )

        model = nn.DataParallel(model, device_ids=config.gpu_ids)
        model = model.cuda()

        train_and_eval(model, scheduler, optimizer, criterion, loader_train,
                       loader_valid, config)
    }
}, {
    "param_grid": {
        'C': [0.1, 0.5, 1, 10],
        'tol': [1e-2, 1e-3, 1e-4]
    }
}, {
    "param_grid": {
        'base_estimator': [LogisticRegression()],
        'n_estimators': [50, 100, 150, 200]
    }
}]

df_results = u.train_and_eval(cross_tuples,
                              classifiers,
                              classifier_kwargs,
                              undersample=True,
                              save='data/conference_paper.pkl')

print(df_results.loc[:, df_results.columns != 'training_df'])

lb_measures = [
    'measure_agm', 'measure_aml', 'measure_dome', 'measure_fcamap',
    'measure_logmap', 'measure_logmapbio', 'measure_logmaplt',
    'measure_pomap++', 'measure_wiktionary'
]

conf_lb_columns = np.array(lb_measures)[np.isin(
    np.array(lb_measures), df_data_bins.columns)].tolist()

print(conf_lb_columns)
    "param_grid": {
        'C': [0.1, 0.5, 1, 10],
        'tol': [1e-2, 1e-3, 1e-4]
    }
}, {
    "param_grid": {
        'base_estimator': [LogisticRegression()],
        'n_estimators': [50, 100, 150, 200]
    }
}]

cross_tuples = [
    ([Xy_bins_an, Xy_bins_lb1, Xy_bins_lb2, Xy_bins_lb3], [Xy_bins_an], "i1"),
    ([Xy_bins_an, Xy_bins_lb1, Xy_bins_lb2,
      Xy_bins_lb3], [Xy_bins_lb1, Xy_bins_lb2, Xy_bins_lb3], "i2"),
    ([Xy_bins_an], [Xy_bins_lb1, Xy_bins_lb2, Xy_bins_lb3], "ii"),
    ([Xy_bins_lb1], [Xy_bins_lb2, Xy_bins_lb3], "iii1"),
    ([Xy_bins_lb2], [Xy_bins_lb1, Xy_bins_lb3], "iii2"),
    ([Xy_bins_lb3], [Xy_bins_lb1, Xy_bins_lb2], "iii3"),
    ([Xy_bins_lb2, Xy_bins_lb3], [Xy_bins_lb1], "iv1"),
    ([Xy_bins_lb1, Xy_bins_lb3], [Xy_bins_lb2], "iv2"),
    ([Xy_bins_lb1, Xy_bins_lb2], [Xy_bins_lb3], "iv1"),
]

df_results = u.train_and_eval(cross_tuples,
                              classifiers,
                              classifier_kwargs,
                              undersample=True,
                              save='data/largebio_paper.pkl')

print(df_results.loc[:, df_results.columns != 'training_df'])
예제 #14
0
def main():
    logger = logging.getLogger('__name__')

    x, y = load_dataset("data/amazon_reviews_multilingual_JP_v1_00.tsv",
                        n=1000)

    x_train, x_test, y_train, y_test = train_test_split(x,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=44)

    logger.debug("●○ Tokenization only. ○●")
    train_and_eval(x_train, y_train, x_test, y_test, tokenize=tokenize)

    logger.debug("●○ Clean html. ○●")
    train_and_eval(x_train,
                   y_train,
                   x_test,
                   y_test,
                   tokenize=tokenize,
                   preprocessor=clean_html)

    logger.debug("●○ Normalize number. ○●")
    train_and_eval(x_train,
                   y_train,
                   x_test,
                   y_test,
                   tokenize=tokenize,
                   preprocessor=normalize_number)

    logger.debug("●○ Base form. ○●")
    train_and_eval(x_train,
                   y_train,
                   x_test,
                   y_test,
                   tokenize=tokenize_base_form)

    logger.debug("●○ Lower text. ○●")
    train_and_eval(x_train,
                   y_train,
                   x_test,
                   y_test,
                   tokenize=tokenize_base_form,
                   lowercase=True)

    logger.debug("●○ Use MaCab; tokenize only. ○●")  # not written in text

    import MeCab
    path_neologd = '/usr/lib/x86_64-linux-gnu/mecab/dic/mecab-ipadic-neologd'
    t_mecab = MeCab.Tagger("-Owakati && -d {}".format(path_neologd))

    def tokenize_by_mecab(text):
        return list(t_mecab.parse(text).strip().split(" "))

    train_and_eval(x_train,
                   y_train,
                   x_test,
                   y_test,
                   tokenize=tokenize_by_mecab)
예제 #15
0
def main():
    logger = logging.getLogger('__name__')

    x, y = load_dataset("data/amazon_reviews_multilingual_JP_v1_00.tsv",
                        n=5000)

    logger.debug("●○ Tokenization ○●")
    x = [clean_html(text, strip=True) for text in x]
    x = [" ".join(tokenize(text)) for text in x]
    x_train, x_test, y_train, y_test = train_test_split(x,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=44)

    logger.debug("●○ Binary ○●")
    vectorizer = CountVectorizer(binary=True)
    train_and_eval(x_train, y_train, x_test, y_test, vectorizer)

    logger.debug("●○ Count ○●")
    vectorizer = CountVectorizer(binary=False)
    train_and_eval(x_train, y_train, x_test, y_test, vectorizer)

    logger.debug("●○ TF-IDF; Uni-gram ○●")
    vectorizer = TfidfVectorizer(ngram_range=(1, 1))
    train_and_eval(x_train, y_train, x_test, y_test, vectorizer)

    logger.debug("●○ TF-IDF; Bi-gram ○●")
    vectorizer = TfidfVectorizer(ngram_range=(2, 2))
    train_and_eval(x_train, y_train, x_test, y_test, vectorizer)

    logger.debug("●○ TF-IDF; Uni- and Bi-grams ○●")
    vectorizer = TfidfVectorizer(ngram_range=(1, 2))
    train_and_eval(x_train, y_train, x_test, y_test, vectorizer)

    logger.debug("●○ TF-IDF; Uni-, Bi-, and Tri-grams ○●")
    vectorizer = TfidfVectorizer(ngram_range=(1, 3))
    train_and_eval(x_train, y_train, x_test, y_test, vectorizer)

    logger.debug("●○ Use MaCab; TF-IDF; Uni-gram ○●")  # not written in text

    import MeCab
    path_neologd = '/usr/lib/x86_64-linux-gnu/mecab/dic/mecab-ipadic-neologd'
    t_mecab = MeCab.Tagger("-Owakati && -d {}".format(path_neologd))

    def tokenize_by_mecab(text):
        return list(t_mecab.parse(text).strip().split(" "))

    x, y = load_dataset("data/amazon_reviews_multilingual_JP_v1_00.tsv",
                        n=5000)
    x = [clean_html(text, strip=True) for text in x]
    x = [" ".join(tokenize_by_mecab(text)) for text in x]
    x_train, x_test, y_train, y_test = train_test_split(x,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=44)
    vectorizer = TfidfVectorizer(ngram_range=(1, 1))
    train_and_eval(x_train, y_train, x_test, y_test, vectorizer)

    logger.debug(
        "●○ Use MaCab; TF-IDF; Uni- and Bi-grams ○●")  # not written in text
    vectorizer = TfidfVectorizer(ngram_range=(1, 2))
    train_and_eval(x_train, y_train, x_test, y_test, vectorizer)

    logger.debug("●○ Use MaCab; TF-IDF; Uni-, Bi-, and Tri-grams ○●"
                 )  # not written in text
    vectorizer = TfidfVectorizer(ngram_range=(1, 3))
    train_and_eval(x_train, y_train, x_test, y_test, vectorizer)