예제 #1
0
    def test(self, args):
        if args.which_direction == 'AtoB':
            sample_files = glob('./datasets/{}/*.*'.format(self.dataset_dir +
                                                           '/testA_b'))
        elif args.which_direction == 'BtoA':
            sample_files = glob('./datasets/{}/*.*'.format(self.dataset_dir +
                                                           '/testB'))
        else:
            raise Exception('--which_direction must be AtoB or BtoA')

        # write html for visual comparison
        index_path = os.path.join(
            args.test_dir, '{0}_index.html'.format(args.which_direction))
        index = open(index_path, "w")
        index.write("<html><body><table><tr>")
        index.write("<th>name</th><th>input</th><th>output</th></tr>")

        out_var, in_var = (
            self.testB,
            self.test_A) if args.which_direction == 'AtoB' else (self.testA,
                                                                 self.test_B)

        for sample_file in sample_files:
            print('Processing image: ' + sample_file)
            sample_image = [load_test_data(sample_file, args.fine_size)]
            sample_image = np.array(sample_image).astype(np.float32)
            new_shape = list(sample_image.shape) + [1]
            sample_image = np.reshape(sample_image, newshape=new_shape)
            sample_image = sample_image[:, :, :, :self.input_c_dim]
            test_path = os.path.join(args.test_dir, args.dataset_dir)
            if not os.path.exists(test_path):
                os.makedirs(test_path)
            image_path = os.path.join(
                args.test_dir, args.dataset_dir,
                '{0}_{1}'.format(args.which_direction,
                                 os.path.basename(sample_file)))
            fake_img = self.sess.run(out_var, feed_dict={in_var: sample_image})
            save_images(fake_img, [1, 1], image_path)
            index.write("<td>%s</td>" % os.path.basename(image_path))
            index.write("<td><img src='%s'></td>" %
                        (sample_file if os.path.isabs(sample_file) else
                         ('..' + os.path.sep + sample_file)))
            index.write("<td><img src='%s'></td>" %
                        (image_path if os.path.isabs(image_path) else
                         ('..' + os.path.sep + image_path)))
            index.write("</tr>")
        index.close()
예제 #2
0
def pred(_log, _config):
    p = _config

    modelname = file2name[p['modelfn']]
    mod_model = importlib.import_module('models.%s' % p['modelfn'])
    model_cls = getattr(mod_model, modelname)
    model_params = {k: v for k, v in p.items() if k in model_cls.params or k == 'modelfn'}
    model = model_cls(model_params, rnd_seed=p['seed'])
    expid = model.params_to_string(model_params)

    outdir_plot=trunc_dir('%s/train_%s/%s/predict_per_epoch/test_%s' % (p['parentdir'], p['train_years'],
                                                              p['expname'], p['test_year']))
    outdir_run=trunc_dir('%s/%s'%(outdir_plot, expid))
    tmp_dir=trunc_dir(os.path.join(outdir_run,'tmp'))
    weight_dir=trunc_dir('%s/train_%s/%s/model_weight/%s' % (p['parentdir'], p['train_years'],p['expname'], expid))
    detail_outdir=trunc_dir('%s/train_%s/%s/model_detail/' % (p['parentdir'], p['train_years'], p['expname']))

    assert os.path.isdir(weight_dir), "weight_dir " + weight_dir + " does not exist. Make sure you trained the model."
    assert os.path.isdir(detail_dir), "detail_dir " + detail_dir + " does not exist. Make sure you trained the model."

    if len(os.listdir(weight_dir)) < 1:
        raise SoftFailure('weight dir empty')

    try:
        if not os.path.isdir(outdir_run):
            os.makedirs(outdir_run)
            os.makedirs(tmp_dir)
    except OSError:
        pass
    _log.info('Processing {0}'.format(outdir_run))
    ###################
    label2tlabel={4:2,3:2,2:2,1:1,0:0,-2:0}
    topk4eval=20
    NGRAM_NFILTER, N_GRAMS = get_ngram_nfilter(p['winlen'], p['qproximity'], p['maxqlen'], p['xfilters'])

    _log.info('process {0} and output to {1}'.format(weight_dir, outdir_run))
    _log.info('{0} {1} {2} {3} {4}'.format(p['distill'], 'NGRAM_NFILTER', NGRAM_NFILTER, 'N_GRAMS', N_GRAMS))

    # prepare train data
    qids = get_train_qids(p['test_year'])
    qrelf = get_qrelf(qrelfdir, p['test_year'])
    qid_cwid_label = read_qrel(qrelf, qids, include_spam=False)
    test_qids =[qid for qid in qids if qid in qid_cwid_label]
    _log.info('%s test_num %d '%(p['test_year'], len(test_qids)))

    f_ndcg=dict()
    f_epochs = set()
    # sort weights by time and only use the first weights for each epoch
    # (in case there are duplicate weights from a failed/re-run train)
    for f in sorted(os.listdir(weight_dir),
                    key=lambda x: os.path.getctime(os.path.join(weight_dir, x))):
        if f.split('.')[-1] != 'h5':
            continue
        cols = f.split('.')[0].split('_')
        if len(cols) == 4:
            nb_epoch, loss, n_batch, n_samples = int(cols[0]), int(cols[1]), int(cols[2]), int(cols[3])
            if nb_epoch <= p['epochs'] and nb_epoch not in f_epochs:
                f_epochs.add(nb_epoch)
                f_ndcg[f]=(nb_epoch, loss, n_batch, n_samples)


    finished_epochs = {}
    for fn in sorted(os.listdir(outdir_run),
                     key=lambda x: os.path.getctime(os.path.join(outdir_run, x))):
        if fn.endswith(".run"):
            fields = fn[:-4].split("_") # trim .run
            assert len(fields) == 5

            epoch, loss = int(fields[0]), int(fields[4])
            ndcg, mapv, err = float(fields[1]), float(fields[2]), float(fields[3])

            #assert epoch not in finished_epochs
            if epoch in finished_epochs:
                _log.error("TODO two weights exist for same epoch")
            finished_epochs[epoch] = (epoch, err, ndcg, mapv, loss)

    _log.info('skipping finished epochs: {0}'.format(finished_epochs))

    def model_pred(NGRAM_NFILTER, weight_file, test_data, test_docids, test_qids):
        dump_modelplot(model.build(), detail_outdir + 'predplot_' + expid)
        model_predict = model.build_from_dump(weight_file)
        qid_cwid_pred = pred_label(model_predict, test_data, test_docids, test_qids)
        return qid_cwid_pred

    test_doc_vec, test_docids, test_qids=load_test_data(qids, rawdoc_mat_dir, qid_cwid_label, N_GRAMS, p)
    epoch_err_ndcg_loss=list()
    _log.info('start {0} {1} {2}'.format(expid, p['train_years'], p['test_year']))
    for f in sorted(f_ndcg, key=lambda x:f_ndcg[x][0]):
        nb_epoch, loss, n_batch, n_samples = f_ndcg[f]
        if nb_epoch in finished_epochs:
            epoch_err_ndcg_loss.append(finished_epochs[nb_epoch])
            continue
        weight_file = os.path.join(weight_dir, f)
        qid_cwid_pred = model_pred(NGRAM_NFILTER, weight_file, test_doc_vec, test_docids, test_qids)
        ndcg20, err20, mapv = eval_run(_log, qid_cwid_pred, expid, perlf, treceval, tmp_dir, topk4eval, qrelf)
        loss = int(loss)
        out_name = '%d_%0.4f_%0.4f_%0.4f_%d.run' % (nb_epoch, ndcg20, mapv, err20, loss)
        epoch_err_ndcg_loss.append((nb_epoch, err20, ndcg20, mapv, loss))
        print_run(qid_cwid_pred, outdir_run, out_name, expid)
        _log.info('finished {0}'.format(f))
    _log.info('finish {0} {1} {2}'.format(expid, p['train_years'], p['test_year']))

    plot_curve(epoch_err_ndcg_loss, outdir_plot, expid, p)

    if max(f_epochs) < p['epochs'] - 3:
        raise SoftFailure("prediction finished, but not all epochs are available yet. last epoch found: %s" % max(f_epochs))
예제 #3
0
from utils.utils import load_train_data, load_test_data, clean_tweets, build_word_dict, load_glove_embeddings
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier

vectorizer = TfidfVectorizer()
x_a, y_a, y_b = load_train_data('../data/OLIDv1.0')
X_train_a = vectorizer.fit_transform(clean_tweets(x_a))

clf = SVC(gamma='auto', random_state=1, kernel='linear')
# clf = RandomForestClassifier(n_estimators=100, max_depth=5,random_state=0)
clf.fit(X_train_a, y_a)

X_test_a, y_test_a = load_test_data('../data/OLIDv1.0/testset-levela.tsv',
                                    '../data/OLIDv1.0/labels-levela.csv')
X_test_a = vectorizer.transform(clean_tweets(X_test_a))
pred = clf.predict(X_test_a)
print(classification_report(y_test_a, pred))
top = 0
bt = 0
for i in range(len(pred)):
    bt += 1
    if pred[i] == y_test_a[i]:
        top += 1
print(float(top / bt))

# Task B
new_y_b = []
new_x_b = []
for i in range(len(x_a)):
예제 #4
0
def test():
    assert Calculator(["+*"]).sumData(utils.load_test_data(tD)) == tA1
    assert Calculator(["+", "*"]).sumData(utils.load_test_data(tD)) == tA2
    return "Pass!"
예제 #5
0
# Compile the model
print(
    '\n\n\n', 'Compiling model..', runID, '\n\n\tGPU ' +
    (str(args.gpus) + ' gpus' if args.gpus > 1 else args.gpuids) +
    '\t\tBatch size [ ' + str(args.bs) + ' ] ' + ' \n\n')
model.compile(loss=depth_loss_function, optimizer=optimizer)

print('Ready for training!\n')

# Callbacks
callbacks = []
if args.data == 'nyu':
    callbacks = get_nyu_callbacks(model, basemodel, train_generator,
                                  test_generator,
                                  load_test_data() if args.full else None,
                                  runPath)
if args.data == 'unreal':
    callbacks = get_nyu_callbacks(model, basemodel, train_generator,
                                  test_generator,
                                  load_test_data() if args.full else None,
                                  runPath)

# Start training
model.fit_generator(train_generator,
                    callbacks=callbacks,
                    validation_data=test_generator,
                    epochs=args.epochs,
                    shuffle=True)

# Save the final trained model: