Python load_test 예제들, utils.load_test Python 예제들

예제 #1

0

파일 보기

파일: droneML.py 프로젝트: anumanu/uav-degradation-identifivation

def compute_dtw(data, dataarray, w):
    click.echo('--- Compute DTW ---')

    timeseries, timeseries_label = load_labelled(dataarray)
    timeserie_1 = load_test(data)

    click.echo('  - data        : %s ' % data)
    click.echo('  - dataarray   : %s ' % dataarray)
    click.echo('  - w           : %d ' % w)

    click.echo('\nRunning...')

    unsorted_dtws = get_distances(timeserie_1[0], data_array=timeseries, max_warping_window=w)

    # Save plots
    dtw_plots(unsorted_dtws)
    click.echo('Done. Plots have been saved.')

    click.echo('Choose a maximum number for labelling good and bad data based on DTW values.')
    click.echo('Check the plots to take better decsision.')
    click.echo('  Example: If value for Good is 150, all data with DTW 0-150 will be labelled "Good".')
    # Enter limit for 'Good'
    good_value = raw_input(' > Enter a value for "Good" (Ex: 150) : ')
    # Enter limit for 'Bad'
    bad_value = raw_input(' > Enter a value for "Bad" (Ex: 350) : ')
    # Print and save results to CSV
    fileName = raw_input(' > Enter a file name (add .csv at the end) : ')

    label_dtws(unsorted_dtws, int(good_value), int(bad_value), fileName)

    click.echo('\nDone.')

예제 #2

0

파일 보기

파일: droneML.py 프로젝트: anumanu/uav-degradation-identifivation

def predict(k, w, train, test):
    click.echo('--- Predicting a label ---')
    #click.echo('Predicting with k=%d and w=%d.' % (k,w))

    train_data, train_label = load_labelled(train)
    test_data = load_test(test)

    click.echo('  - k     : %d ' % k)
    click.echo('  - w     : %d ' % w)
    click.echo('  - train : %s ' % train)
    click.echo('  - test  : %s ' % test)

    click.echo('\nRunning...')


    model = KnnDtw(k_neighbours = k, max_warping_window = w)
    model.fit(train_data, train_label)
    
    predicted_label, probability = model.predict(test_data)
    
    click.echo('\nPredicted label : %s ' % str(predicted_label))
    click.echo('\nDone.')

예제 #3

0

파일 보기

파일: grid.py 프로젝트: glouppe/kaggle-higgs

print "Best average score =", best[0]
print "Average threshold =", threshold
print "Best params =", params

print "Save fold predictions for stacking..."

decisions = best[5]
for i, d in enumerate(decisions):
    np.save("stack/%s-fold%d.npy" % (prefix, i), decisions[i])


# Retrain on the training set
print "Retrain on the full training set..."

clf = Classifier(**params)
w = rescale(w)
w = rebalance(y, w)

try:
    clf.fit(X, y, sample_weight=w)
except:
    clf.fit(X, y)

print "Save test predictions for stacking..."

X_test, _, _, ids = load_test()
#X_test = tf.transform(X_test.astype(np.float32))
d = clf.predict_proba(X_test)[:, 0]
d = d.flatten()
np.save("stack/%s-test.npy" % prefix, d)

예제 #4

0

파일 보기

파일: 332_aggregate-ins-1.py 프로젝트: Libardo1/Home-Credit-Default-Risk-3

utils.start(__file__)
#==============================================================================
PREF = 'f332_'

KEY = 'SK_ID_CURR'

ins_start = 0  # 1~277
ins_end = 1  # 1~277

os.system(f'rm ../feature/t*_{PREF}*')
# =============================================================================
#
# =============================================================================

train = utils.load_train([KEY])
test = utils.load_test([KEY])


# =============================================================================
#
# =============================================================================
def aggregate(args):
    path, pref = args

    df = utils.read_pickles(path)
    df = df[df['NUM_INSTALMENT_NUMBER'].between(ins_start, ins_end)]
    del df['SK_ID_PREV']

    df_agg = df.groupby(KEY).agg({**utils_agg.ins_num_aggregations})
    df_agg.columns = pd.Index(
        [e[0] + "_" + e[1] for e in df_agg.columns.tolist()])

예제 #5

0

파일 보기

파일: 012_only_train.py 프로젝트: zhq200902/Santander-Customer-Transaction-Prediction

        li = only_target1(c)
        feature[f'{PREF}_{c}'] = (df[c].isin(li)) * 1

    feature[f'{PREF}_sum'] = feature.sum(1)

    feature.iloc[:200000].to_pickle(f'../data/train_{PREF}.pkl')
    feature.iloc[200000:].reset_index(
        drop=True).to_pickle(f'../data/test_{PREF}.pkl')

    return


# =============================================================================
# main
# =============================================================================
if __name__ == "__main__":
    utils.start(__file__)

    tr = utils.load_train().drop(['ID_code', 'target'], axis=1)
    y_train = utils.load_target()['target']
    te = utils.load_test().drop(['ID_code'], axis=1)

    tr0 = tr[y_train == 0]
    tr1 = tr[y_train == 1]

    trte = pd.concat([tr, te], ignore_index=True)[tr.columns]

    fe(trte)

    utils.end(__file__)

예제 #6

0

파일 보기

        # as any of the known words - so the default idf is the max of 
        # known idf's
        max_idf = max(tfidf.idf_)
        self.word2weight = defaultdict(
            lambda: max_idf,
            [(w, tfidf.idf_[i]) for w, i in tfidf.vocabulary_.items()])

        return self

    def transform(self, X):
        return np.array([
                np.mean([self.word2vec[w] * self.word2weight[w]
                         for w in words if w in self.word2vec] or
                        [np.zeros(self.dim)], axis=0)
                for words in X
            ])

etree_w2v = Pipeline([
    ("word2vec vectorizer", MeanEmbeddingVectorizer(w2v)),
    ("extra trees", ExtraTreesClassifier(n_estimators=200))])
etree_w2v_tfidf = Pipeline([
    ("word2vec vectorizer", TfidfEmbeddingVectorizer(w2v)),
    ("extra trees", ExtraTreesClassifier(n_estimators=200))])

res = utils.load_train()
etree_w2v.fit(res[0], res[1])

test = utils.load_test()
preds = etree_w2v.predict(test[0])
print(metrics.classification_report(test[1], preds))
print(metrics.confusion_matrix(test[1], preds))

예제 #7

0

파일 보기

loop = 1

param = {
    'max_depth': 15,
    'eta': 0.1,
    'colsample_bytree': 0.6,
    'subsample': 0.5,
    'silent': 1,
    #         'scale_pos_weight':1.707, # neg/pos
    'eval_metric': 'auc',
    'objective': 'binary:logistic'
}

train = utils.load_train(file_in=file_in)
test = utils.load_test(file_in=file_in)


#==============================================================================
# logloss NO sampling
#==============================================================================
def get_valid_col(col):
    return [
        c for c in col if c.count(',') > 0 or c.count('[') > 0
        or c.count(']') > 0 or c.count('>') > 0
    ]


col = ['qid1', 'qid2', 'question1', 'question2', 'is_duplicate']
y_train = train.is_duplicate
train_sub = train[['id', 'is_duplicate']]

예제 #8

0

파일 보기

파일: algorithm1.py 프로젝트: VeLKerr/lazy-learning-fca

def classify(train, examples):
    cv_res = {
     "PP": 0,
     "PN": 0,
     "NP": 0,
     "NN": 0,
     "contradictory": 0,
    }
    plus = train["plus"]
    minus = train["minus"]
    l = len(examples)
    i = 0
    for elem in examples:
        i += 1
        print "%i/%i" % (i, l)
        result = check_hypothesis(plus, minus, elem)
        cv_res[result] += 1
    return cv_res


if __name__ == "__main__":

    index = int(sys.argv[1])

    train = utils.load_train(index)
    test = utils.load_test(index)

    res = classify(train, test)
    print res
    print utils.summary(res)

예제 #9

0

파일 보기

파일: 999_predict.py 프로젝트: KazukiOnodera/Quora

    models.append(model)
    model.save_model('../model/xgb{}.model'.format(i))

train_col = dtrain.feature_names
del train, dtrain, y_train
gc.collect()

imp = ex.getImp(models)
imp.to_csv('../output/imp-{}.csv'.format(date), index=0)

#==============================================================================
# test
#==============================================================================

test1 = utils.load_test(file_in, file_remove)

col = ['test_id', 'question1', 'question2']
sub = test1[col]
test1.drop(col, axis=1, inplace=1)

if is_mirror:
    print('q1_to_q2!')
    test2 = utils.q1_to_q2(test1)

dtest1 = xgb.DMatrix(test1[train_col])
dtest2 = xgb.DMatrix(test2[train_col])
del test1, test2
gc.collect()

sub['is_duplicate'] = 0

예제 #10

0

파일 보기

파일: extract.py 프로젝트: haydnKing/4th-year-project

def get_ppr10():
    p = utils.load_test()
    ppr10 = simple_extract(p)
    return ppr10[0]

예제 #11

0

파일 보기

파일: train.py 프로젝트: XrosLiang/LectureBank

def gae_for(args, iter='0.txt'):
    # print("Using {} dataset".format(args.ds))
    # adj_cd, features = load_data(args.ds)

    'Load features!'
    if args.ds.startswith('tf'):
        if args.labels == 'y':

            adj_cd, adj_dd, features, tags_nodes = my_load_data_tfidf_semi(
                args.wmd)
        else:
            adj_cd, adj_dd, features = my_load_data_tfidf(args.wmd)
    else:
        #
        if args.labels == 'y':
            adj_cd, adj_dd, features, tags_nodes = my_load_data_p2v_semi(
                args.wmd)
        else:
            adj_cd, adj_dd, features = my_load_data_p2v(args.wmd)
            # adj_cd, adj_dd, features = my_load_data_p2v()

    'Load test adjacency matrix'
    adj_test = load_test()
    # adj_test = load_test_10_percent(iter)

    n_nodes, feat_dim = features.shape
    # Store original adjacency matrix (without diagonal entries) for later
    adj_orig_cd = adj_cd

    'do again for adj_dd'
    adj_orig_dd = adj_dd

    adj_train_cd, train_edges, val_edges, val_edges_false = mask_train_edges(
        adj_cd)

    'do again for adj_dd'
    adj_train_dd, train_edges_dd, _, _ = mask_train_edges(adj_dd)  #

    adj_cd = adj_train_cd
    adj_dd = adj_train_dd

    test_edges, test_edges_false = make_test_edges(adj_train_cd, adj_test)

    # Some preprocessing: calculate norm
    adj_norm_cd = preprocess_graph(adj_cd)
    'For loss function: add diag values'
    adj_label_cd = adj_train_cd + sp.eye(adj_train_cd.shape[0])
    adj_label_cd = torch.FloatTensor(adj_label_cd.toarray())
    pos_weight_cd = float(adj_cd.shape[0] * adj_cd.shape[0] -
                          adj_cd.sum()) / adj_cd.sum()
    norm_cd = adj_cd.shape[0] * adj_cd.shape[0] / float(
        (adj_cd.shape[0] * adj_cd.shape[0] - adj_cd.sum()) * 2)

    'do it again for adj_dd'
    adj_norm_dd = preprocess_graph(adj_dd)
    adj_label_dd = adj_train_dd + sp.eye(adj_train_dd.shape[0])
    adj_label_dd = torch.FloatTensor(adj_label_dd.toarray())
    pos_weight_dd = float(adj_dd.shape[0] * adj_dd.shape[0] -
                          adj_dd.sum()) / adj_dd.sum()
    norm_dd = adj_dd.shape[0] * adj_dd.shape[0] / float(
        (adj_dd.shape[0] * adj_dd.shape[0] - adj_dd.sum()) * 2)

    if args.labels == 'y':
        model = GCNModelVAE_Semi(feat_dim, args.hidden1, args.hidden2,
                                 args.dropout, args.class_dim)
    else:
        model = GCNModelVAE(feat_dim, args.hidden1, args.hidden2, args.dropout)

    optimizer = optim.Adam(model.parameters(), lr=args.lr)
    print('Now start training...')
    hidden_emb = None
    for epoch in range(args.epochs):
        t = time.time()
        model.train()
        optimizer.zero_grad()

        # import pdb;pdb.set_trace

        if args.labels == 'y':
            recovered, mu, logvar, pred_nodes = model(
                features, [adj_norm_cd, adj_norm_dd])
            loss = loss_function_relation_semi(preds=recovered,
                                               labels=(adj_label_cd,
                                                       adj_label_dd),
                                               mu=mu,
                                               logvar=logvar,
                                               n_nodes=n_nodes,
                                               norm=(norm_cd, norm_dd),
                                               pos_weight=(pos_weight_cd,
                                                           pos_weight_dd),
                                               pred_nodes=pred_nodes,
                                               tags_nodes=tags_nodes)
        else:
            recovered, mu, logvar = model(features, [adj_norm_cd, adj_norm_dd])
            loss = loss_function_relation(preds=recovered,
                                          labels=(adj_label_cd, adj_label_dd),
                                          mu=mu,
                                          logvar=logvar,
                                          n_nodes=n_nodes,
                                          norm=(norm_cd, norm_dd),
                                          pos_weight=(pos_weight_cd,
                                                      pos_weight_dd))

        loss.backward()
        cur_loss = loss.item()
        optimizer.step()

        hidden_emb = mu.data.numpy()

        acc_curr, p, r, f1, map_curr, roc_curr = my_eval(
            hidden_emb, (adj_orig_cd, adj_orig_dd), val_edges, val_edges_false)

        print("Epoch:", '%04d' % (epoch + 1), "train_loss=",
              "{:.5f}".format(cur_loss), "val_ap=", "{:.5f}".format(map_curr),
              "val_ac=", "{:.5f}".format(acc_curr), "time=",
              "{:.5f}".format(time.time() - t))

    print("Optimization Finished!")

    acc_score, p, r, f1, map_score, roc_score = my_eval_test(
        hidden_emb, (adj_orig_cd, adj_orig_dd), test_edges, test_edges_false)
    # print('Test ROC score: ' + str(roc_score))
    # print('Test AP score: ' + str(ap_score))
    # print ("Test accuracy ", "{:.5f}".format(acc_score))
    # print ('P {:.5f}, R {:.5f}, F1 {:.5f}'.format(p,r,f1))
    print('Acc, P, R, F1, MAP, AUC')
    print('{:5f},{:5f},{:5f},{:5f},{:5f},{:5f}'.format(acc_score, p, r, f1,
                                                       map_score, roc_score))

    return acc_score, p, r, f1, map_score, roc_score

예제 #12

0

파일 보기

파일: 000.py 프로젝트: Libardo1/Home-Credit-Default-Risk-3

"""

import pandas as pd
import os
import utils
utils.start(__file__)
# =============================================================================

folders = ['../feature_prev', '../feature_prev_unused']

for fol in folders:
    os.system(f'rm -rf {fol}')
    os.system(f'mkdir {fol}')

train = utils.load_train(['SK_ID_CURR', 'TARGET'])
test = utils.load_test(['SK_ID_CURR'])

prev = utils.read_pickles('../data/previous_application')

prev_train = pd.merge(prev, train, on='SK_ID_CURR', how='inner')
prev_test = pd.merge(prev, test, on='SK_ID_CURR', how='inner')

utils.to_pickles(prev_train, '../data/prev_train', utils.SPLIT_SIZE)
utils.to_pickles(prev_test, '../data/prev_test', utils.SPLIT_SIZE)

utils.to_pickles(prev_train[['TARGET']], '../data/prev_label',
                 utils.SPLIT_SIZE)
"""

prev_train = utils.read_pickles('../data/prev_train')
prev_test  = utils.read_pickles('../data/prev_test')

예제 #13

0

파일 보기

파일: test_voting.py 프로젝트: sebastianden/alpaca

from alpaca import Alpaca
from utils import load_test, split_df, TimeSeriesResampler, confusion_matrix
import time
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn.pipeline import Pipeline
import numpy as np
import pandas as pd

if __name__ == '__main__':

    X, y = load_test()
    # Length of timeseries for resampler and cnn
    sz = 230
    # Number of channels for cnn
    num_channels = X.shape[-1]
    # Number of classes for cnn
    num_classes = np.unique(y).shape[0]
    classes = np.array(["0", "1", "2", "3", "4", "?"])

    repetitions = 1

    results = []
    outliers = np.empty((0, 230 * 3 + 5))

    for r in range(repetitions):
        print("Repetition #", r)

        X, y = shuffle(X, y, random_state=r)
        # Turn y to numpy array
        y = np.array(y)

예제 #14

0

파일 보기

def predict():
    model = RL(istrained=True, name='rl_noun')
    te_X, _ = load_test(size='_t')
    te_X = norm4d_per_sample(te_X)
    return model.predict(te_X)

예제 #15

0

파일 보기

파일: q3_knn.py 프로젝트: tauster/Handwritten-Digit-Classifier

import numpy as np
from run_knn import run_knn
import matplotlib.pyplot as plt
"""
CSC 2515 - Assignment 1
Tausif Sharif

Notes:
	- Runs the run_knn.py functions from here
	- Will show and save relevant plots
"""

trainInputs, trainTargets = load_train()
smallInputs, smallTargets = load_train_small()
validInputs, validTargets = load_valid()
testInputs, testTargets = load_test()

kList = [1, 3, 5, 7, 9]
classRates = range(0, len(kList))
classRatesT = range(0, len(kList))
listCount = 0

for k in kList:
    correctCount = 0
    validLables = run_knn(k, trainInputs, trainTargets, validInputs)
    for i in xrange(len(validLables)):
        if validLables[i] == validTargets[i]:
            correctCount += 1
    classRates[listCount] = (correctCount / float(len(validLables)))
    listCount += 1

예제 #16

0

파일 보기

파일: main_tf_gd.py 프로젝트: Sasha-P/Whats-cooking

import utils as utl
import tensorflow as tf
import numpy as np


cuisine_list, ingredients_list, xs, ys = utl.load_train('vector')
ts, ids = utl.load_test(ingredients_list)

cuisine_count = len(cuisine_list)
ingredients_count = len(ingredients_list)

x = tf.placeholder(tf.float32, [None, ingredients_count])
W = tf.Variable(tf.zeros([ingredients_count, cuisine_count]))
b = tf.Variable(tf.zeros([cuisine_count]))

y = tf.nn.softmax(tf.matmul(x, W) + b)

y_ = tf.placeholder(tf.float32, [None, cuisine_count])

t = tf.placeholder(tf.float32, [None, ingredients_count])

p = tf.nn.softmax(tf.matmul(t, W) + b)

cross_entropy = -tf.reduce_sum(y_*tf.log(y))
train_step = tf.train.GradientDescentOptimizer(0.001).minimize(cross_entropy)
# train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)

init = tf.initialize_all_variables()

sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
sess.run(init)

예제 #17

0

파일 보기

파일: build_baseline_features.py 프로젝트: parkerzf/kaggle-expedia

	'date_time_dow', 'date_time_hour', 'date_time_month', 'srch_ci_dow', 'srch_ci_month', \
	'srch_co_dow', 'srch_co_month', 'booking_window', 'length_of_stay']], \
	site_name_encoding, posa_continent_encoding, user_location_country_encoding, user_location_region_encoding, \
	channel_encoding, srch_destination_type_id_encoding, hotel_continent_encoding, hotel_country_encoding], axis=1)

train_is_booking_features.to_csv(utils.processed_data_path +
	'_'.join(['train_is_booking_baseline', 'year', utils.train_year]) + '.csv', 
	header=True, index=False)

del train_is_booking

#############################################################
####################   test dataset      ####################
#############################################################

test  = utils.load_test('baseline')

print 'generate test time features...'
time_features_enricher(test)

print 'generate test one hot encoding features...'
site_name_encoding, posa_continent_encoding, user_location_country_encoding, user_location_region_encoding, \
	channel_encoding, srch_destination_type_id_encoding, hotel_continent_encoding, hotel_country_encoding = \
	gen_all_top_one_hot_encoding_columns(test)

print 'fill test na features...'
fill_na_features(test)

print 'concat all test baseline features...'
test_features = pd.concat([test[['date_time', 'orig_destination_distance', \
	'is_mobile', 'is_package', 'srch_adults_cnt', 'srch_children_cnt', 'srch_rm_cnt', \

예제 #18

0

파일 보기

def predict():
    model = PlainCNN(istrained=True)
    te_X, _ = load_test(size='_t')
    te_X = norm4d_per_sample(te_X)
    return model.predict(te_X)

예제 #19

0

파일 보기

파일: 002_LabelEncoding.py 프로젝트: Libardo1/Home-Credit-Default-Risk-3

    'NAME_TYPE_SUITE',
    'NAME_INCOME_TYPE',
    'NAME_EDUCATION_TYPE',
    'NAME_FAMILY_STATUS',
    'NAME_HOUSING_TYPE',
    'OCCUPATION_TYPE',
    'WEEKDAY_APPR_PROCESS_START',
    'ORGANIZATION_TYPE',
    'FONDKAPREMONT_MODE',
    'HOUSETYPE_MODE',
    'WALLSMATERIAL_MODE',
    #                         'EMERGENCYSTATE_MODE'
]

train = utils.load_train(categorical_features)
test = utils.load_test(categorical_features)

le = LabelEncoder()
for c in categorical_features:
    train[c].fillna('na dayo', inplace=True)
    test[c].fillna('na dayo', inplace=True)
    le.fit(train[c].append(test[c]))
    train[c] = le.transform(train[c])
    test[c] = le.transform(test[c])

utils.to_feature(train.add_prefix(PREF), '../feature/train')
utils.to_feature(test.add_prefix(PREF), '../feature/test')

#==============================================================================
utils.end(__file__)

예제 #20

0

파일 보기

파일: 110_ID.py 프로젝트: Libardo1/Home-Credit-Default-Risk-3

import pandas as pd
import os
import utils
utils.start(__file__)
#==============================================================================
PREF = 'f110_'

KEY = 'SK_ID_CURR'

os.system(f'rm ../feature/t*_{PREF}*')

# =============================================================================
# load
# =============================================================================
train = utils.load_train(['SK_ID_CURR']).set_index('SK_ID_CURR')
test = utils.load_test(['SK_ID_CURR']).set_index('SK_ID_CURR')
prev = utils.read_pickles('../data/previous_application',
                          ['SK_ID_CURR', 'SK_ID_PREV'])

# =============================================================================
# prev
# =============================================================================
gr = prev.groupby('SK_ID_CURR')

train['SK_ID_PREV_min'] = gr.SK_ID_PREV.min()
train['SK_ID_PREV_mean'] = gr.SK_ID_PREV.mean()
train['SK_ID_PREV_max'] = gr.SK_ID_PREV.max()
train['SK_ID_PREV_median'] = gr.SK_ID_PREV.median()
train['SK_ID_PREV_std'] = gr.SK_ID_PREV.std()
train['SK_ID_PREV_std-d-mean'] = train['SK_ID_PREV_std'] / train[
    'SK_ID_PREV_mean']

예제 #21

0

파일 보기

    def _conditional_change(dictionary, target, value):
        dictionary[target] = test.get(value, dictionary[target])

    def _conditional_remove(dictionary, key):
        if key in test:
            if not test[key]:
                dictionary.pop(key)

    _conditional_change(settings["input"], "shape", "shape")
    _conditional_change(settings["input"], "type", "input_type")
    _conditional_change(settings["normalize"], "means", "means")
    _conditional_change(settings["normalize"], "stddevs", "stddevs")
    _conditional_change(settings["return"]["result"], "operations",
                        "operations")
    _conditional_change(settings["return"]["result"], "arguments", "arguments")
    _conditional_change(settings["return"]["result"], "type", "return_type")
    _conditional_change(settings["return"]["result"], "item", "item")

    # Remove if no output or result should be returned
    _conditional_remove(settings, "normalize")
    _conditional_remove(settings["return"], "output")
    _conditional_remove(settings["return"], "result")


if __name__ == "__main__":
    args = utils.parse_args()
    test = utils.load_test(args)
    settings = load_settings()
    imput_arguments(settings, test)
    save(settings)

예제 #22

0

파일 보기

파일: test.py 프로젝트: zhaozhiyong19890102/GRU4Rec_TensorFlow

"""
Created on Feb 27 2017
Author: Weiping Song
"""
import sys
import numpy as np
import argparse
import tensorflow as tf

from model import GRU4Rec
from utils import load_test

unfold_max = 20
cut_off = 20

test_x, test_y, n_items = load_test(unfold_max)


class Args():
    is_training = False
    layers = 1
    rnn_size = 100
    n_epochs = 10
    batch_size = 50
    keep_prob = 1
    learning_rate = 0.002
    decay = 0.98
    decay_steps = 1e3 * 5
    sigma = 0.0005
    init_as_normal = False
    grad_cap = 0

예제 #23

0

파일 보기

파일: test_knn.py 프로젝트: lwoiceshyn/Machine-Learning-Assignments

# Leo Woiceshyn, Student Number 998082159, for CSC2515 Assignment 1

import numpy as np
import utils as ut
import matplotlib.pyplot as plt
from run_knn import run_knn

#Load data
train_data, train_labels = ut.load_train()
valid_data, valid_labels = ut.load_valid()
test_data, test_labels = ut.load_test()

#Create empty arrays for accuracy values
validation_accuracies = []
test_accuracies = []

#List for k
k_values = [1,3,5,7,9]

#Validation Set
for k in k_values:

    correct_predictions = 0
    total_predictions = 0
    predicted_valid_labels = run_knn(k, train_data, train_labels, valid_data)

    #Iterate through the predicted labels and compare them to the true labels to determine validation accuracy
    for index, value in enumerate(predicted_valid_labels):
        if predicted_valid_labels[index] == valid_labels[index]:
            correct_predictions += 1
            total_predictions += 1

예제 #24

0

파일 보기

	if FLAGS.is_train:
		train_data, valid_data = load_data(FLAGS.train_data)

		model = Model(FLAGS)
		model.print_parameters()
		if tf.train.get_checkpoint_state(FLAGS.train_dir):
			model.saver.restore(sess, tf.train.latest_checkpoint(FLAGS.train_dir))
		else:
			sess.run(tf.global_variables_initializer())
		epoch = 0
		pre_loss = 1000000.0;
		while epoch < FLAGS.epoch:
			#train data
			start_time = time.time()
			train_acc, train_loss = train(sess, model, train_data, FLAGS.batch_size, trainable = True)
			epoch_time = time.time() - start_time
			lr = model.learning_rate.eval()
			print("epoch %d time: %.4f seconds, learning_rate: %.6f\n train loss: %.6f, train accuracy: %.6f" % (epoch, epoch_time, lr, train_loss, train_acc))

			valid_acc, valid_loss = train(sess, model, valid_data, FLAGS.batch_size, trainable = False)
			print("valid loss: %.6f, valid accuracy: %.6f" % (valid_loss, valid_acc))
			if valid_loss < pre_loss:
				pre_loss = valid_loss
				model.saver.save(sess, '%s/ckp' % FLAGS.train_dir, global_step = epoch)
				sess.run(model.learning_rate_decay_op)
			epoch += 1
	else:
		model = Model(FLAGS)
		model.saver.restore(sess, tf.train.latest_checkpoint(FLAGS.train_dir))
		test_data = load_test(FLAGS.test_data)
		get_test_label(sess, model, test_data, )

예제 #25

0

파일 보기

#                         'CODE_GENDER',
#                         'FLAG_OWN_CAR',
#                         'FLAG_OWN_REALTY',
                         'NAME_TYPE_SUITE',
                         'NAME_INCOME_TYPE',
                         'NAME_EDUCATION_TYPE',
                         'NAME_FAMILY_STATUS',
                         'NAME_HOUSING_TYPE',
                         'OCCUPATION_TYPE',
                         'WEEKDAY_APPR_PROCESS_START',
                         'ORGANIZATION_TYPE',
                         'FONDKAPREMONT_MODE',
                         'HOUSETYPE_MODE',
                         'WALLSMATERIAL_MODE',
#                         'EMERGENCYSTATE_MODE'
                         ]

# =============================================================================
# 
# =============================================================================
train = utils.load_train().drop(['SK_ID_CURR', 'TARGET']+categorical_features, axis=1)
test  = utils.load_test().drop(['SK_ID_CURR']+categorical_features, axis=1)


utils.to_feature(train.add_prefix(PREF), '../feature/train')
utils.to_feature(test.add_prefix(PREF),  '../feature/test')

#==============================================================================
utils.end(__file__)

예제 #26

0

파일 보기

import os
import utils

utils.start(__file__)
#==============================================================================

PREF = 'f705_'

KEY = 'SK_ID_CURR'

os.system(f'rm ../feature/t*_{PREF}*')
# =============================================================================
# load
# =============================================================================
train = utils.load_train([KEY]).set_index(KEY)
test = utils.load_test([KEY]).set_index(KEY)

prev_train = pd.read_feather('../data/prev_train_imputation_f705.f')
prev_test = pd.read_feather('../data/prev_test_imputation_f705.f')

# =============================================================================
# feature
# =============================================================================
# train
gr = prev_train.groupby(KEY)
train['prev_y_min'] = gr.y_pred.min()
train['prev_y_mean'] = gr.y_pred.mean()
train['prev_y_max'] = gr.y_pred.max()
train['prev_y_var'] = gr.y_pred.var()
train['prev_y_median'] = gr.y_pred.median()
train['prev_y_q25'] = gr.y_pred.quantile(.25)

예제 #27

0

파일 보기

파일: main_sklearn_svm.py 프로젝트: Sasha-P/Whats-cooking

from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import LinearSVC
import utils as utl

cuisine_list, ingredients_list, x, y = utl.load_train('number')
classifier = OneVsRestClassifier(LinearSVC(random_state=0)).fit(x, y)
p = classifier.predict(x)

precision = 0
for i in range(len(y)):
    if y[i] == p[i]:
        precision += 1
accuracy = (1.0 * precision) / len(y)

print('Training Set Accuracy:', accuracy)

t, ids = utl.load_test(ingredients_list)
p = classifier.predict(t)
utl.save_result('sk_svm', cuisine_list, p, ids, 'number')

예제 #28

0

파일 보기

import numpy as np
import pandas as pd
import sys
import os
from sklearn.externals import joblib

scriptpath = os.path.dirname(os.path.realpath(sys.argv[0])) + '/../'
sys.path.append(os.path.abspath(scriptpath))
import utils

test  = utils.load_test('group_by')

def predict_group_by_model(group_by_field):
	"""
	Use group by model to predict the top 5 hotel clusters for the test data
	:param group_by_field: group by field to get the related group by model 
	:return: the dataframe with the submission format according to the model
	"""
	group_by_model = joblib.load(utils.model_path + 
		'_'.join(['top', str(utils.k), 'cw', str(utils.click_weight), 'group', group_by_field, 'year', utils.train_year]) + '.pkl')
	merged_df = test.merge(group_by_model, how='left',left_on= group_by_field, right_index=True)

	merged_df.reset_index(inplace = True)
	result = merged_df[['index', 'hotel_cluster']]
	result.columns =  ['id', 'hotel_cluster']
	return result


print 'predict with orig_destination_distance model...'
result = predict_group_by_model('orig_destination_distance')
print 'predict with srch_destination_id model...'

예제 #29

0

파일 보기

def svm(training_file, development_file, test_file, counts):
    twords, tlabels_true = hs.load_file(training_file)
    dwords, dlabels_true = hs.load_file(development_file)
    test_words = utils.load_test(test_file)

    ## Length
    tlength_feature = hs.length_feature(twords)
    tlength_normalized, tl_mean, tl_std = utils.normalize(tlength_feature)
    dlength_feature = hs.length_feature(dwords)
    dlength_normalized = utils.normalize_with_params(dlength_feature, tl_mean,
                                                     tl_std)

    ## Frequency
    tfrequency_feature = hs.frequency_feature(twords, counts)
    tfrequency_normalized, tf_mean, tf_std = utils.normalize(
        tfrequency_feature)
    dfrequency_feature = hs.frequency_feature(dwords, counts)
    dfrequency_normalized = utils.normalize_with_params(
        dfrequency_feature, tf_mean, tf_std)

    ## Syllables
    tsyllables_feature = features.syllables_feature(twords)
    tsyllables_normalized, tsy_mean, tsy_std = utils.normalize(
        tsyllables_feature)
    dsyllables_feature = features.syllables_feature(dwords)
    dsyllables_normalized = utils.normalize_with_params(
        dsyllables_feature, tsy_mean, tsy_std)

    ## Vowels
    tvowels_feature = features.vowels_feature(twords)
    tvowels_normalized, tv_mean, tv_std = utils.normalize(tvowels_feature)
    dvowels_feature = features.vowels_feature(dwords)
    dvowels_normalized = utils.normalize_with_params(dvowels_feature, tv_mean,
                                                     tv_std)

    ## Consonants
    tconsonant_feature = features.vowels_feature(twords)
    tconsonant_normalized, tc_mean, tc_std = utils.normalize(
        tconsonant_feature)
    dconsonant_feature = features.vowels_feature(dwords)
    dconsonant_normalized = utils.normalize_with_params(
        dconsonant_feature, tc_mean, tc_std)

    ## Senses
    tsenses_feature = features.senses_feature(twords)
    tsenses_normalized, tse_mean, tse_std = utils.normalize(tsenses_feature)
    dsenses_feature = features.senses_feature(dwords)
    dsenses_normalized = utils.normalize_with_params(dsenses_feature, tse_mean,
                                                     tse_std)

    ## Hypernyms
    thypernyms_feature = features.hypernyms_feature(twords)
    thypernyms_normalized, th_mean, th_std = utils.normalize(
        thypernyms_feature)
    dhypernyms_feature = features.hypernyms_feature(dwords)
    dhypernyms_normalized = utils.normalize_with_params(
        dhypernyms_feature, th_mean, th_std)

    x_train = np.column_stack((tlength_normalized, tfrequency_normalized,
                               tsyllables_normalized, tsenses_normalized))
    y = tlabels_true

    x_dev = np.column_stack((dlength_normalized, dfrequency_normalized,
                             dsyllables_normalized, dsenses_normalized))

    clf = SVC(C=48,
              cache_size=200,
              class_weight=None,
              coef0=0.0,
              decision_function_shape='ovr',
              degree=3,
              gamma='scale',
              kernel='rbf',
              max_iter=-1,
              probability=False,
              random_state=None,
              shrinking=True,
              tol=0.001,
              verbose=False)
    clf.fit(x_train, y)
    y_pred = clf.predict(x_dev)

    daccuracy = hs.get_accuracy(y_pred, dlabels_true)
    dprecision = hs.get_precision(y_pred, dlabels_true)
    drecall = hs.get_recall(y_pred, dlabels_true)
    dfscore = hs.get_fscore(y_pred, dlabels_true)

    # Test Set
    # test_length_feature = hs.length_feature(test_words)
    # test_frequency_feature = hs.frequency_feature(test_words, counts)
    # test_syllables_feature = features.syllables_feature(test_words)
    # test_senses_feature = features.senses_feature(test_words)
    #
    # test_length_normalized = utils.normalize_with_params(test_length_feature, tl_mean, tl_std)
    # test_frequency_normalized = utils.normalize_with_params(test_frequency_feature, tf_mean, tf_std)
    # test_syllables_normalized = utils.normalize_with_params(test_syllables_feature, tsy_mean, tsy_std)
    # test_senses_normalized = utils.normalize_with_params(test_senses_feature, tse_mean, tse_std)
    #
    # x_test = np.column_stack((test_length_normalized, test_frequency_normalized,
    #                          test_syllables_normalized, test_senses_normalized))
    # y_pred_test = clf.predict(x_test)
    #
    # f = open('test_labels.txt', 'w')
    # for item in y_pred_test:
    #     print(item, file=f)
    # f.close()

    # training_performance = (tprecision, trecall, tfscore)
    development_performance = (daccuracy, dprecision, drecall, dfscore)
    return development_performance

예제 #30

0

파일 보기

def random_forest(training_file, development_file, test_file, counts):
    twords, tlabels_true = hs.load_file(training_file)
    dwords, dlabels_true = hs.load_file(development_file)
    test_words = utils.load_test(test_file)

    ## Length
    tlength_feature = hs.length_feature(twords)
    tlength_normalized, tl_mean, tl_std = utils.normalize(tlength_feature)
    dlength_feature = hs.length_feature(dwords)
    dlength_normalized = utils.normalize_with_params(dlength_feature, tl_mean,
                                                     tl_std)

    ## Frequency
    tfrequency_feature = hs.frequency_feature(twords, counts)
    tfrequency_normalized, tf_mean, tf_std = utils.normalize(
        tfrequency_feature)
    dfrequency_feature = hs.frequency_feature(dwords, counts)
    dfrequency_normalized = utils.normalize_with_params(
        dfrequency_feature, tf_mean, tf_std)

    ## Syllables
    tsyllables_feature = features.syllables_feature(twords)
    tsyllables_normalized, tsy_mean, tsy_std = utils.normalize(
        tsyllables_feature)
    dsyllables_feature = features.syllables_feature(dwords)
    dsyllables_normalized = utils.normalize_with_params(
        dsyllables_feature, tsy_mean, tsy_std)

    ## Vowels
    tvowels_feature = features.vowels_feature(twords)
    tvowels_normalized, tv_mean, tv_std = utils.normalize(tvowels_feature)
    dvowels_feature = features.vowels_feature(dwords)
    dvowels_normalized = utils.normalize_with_params(dvowels_feature, tv_mean,
                                                     tv_std)

    ## Consonants
    tconsonant_feature = features.vowels_feature(twords)
    tconsonant_normalized, tc_mean, tc_std = utils.normalize(
        tconsonant_feature)
    dconsonant_feature = features.vowels_feature(dwords)
    dconsonant_normalized = utils.normalize_with_params(
        dconsonant_feature, tc_mean, tc_std)

    ## Senses
    tsenses_feature = features.senses_feature(twords)
    tsenses_normalized, tse_mean, tse_std = utils.normalize(tsenses_feature)
    dsenses_feature = features.senses_feature(dwords)
    dsenses_normalized = utils.normalize_with_params(dsenses_feature, tse_mean,
                                                     tse_std)

    ## Hypernyms
    thypernyms_feature = features.hypernyms_feature(twords)
    thypernyms_normalized, th_mean, th_std = utils.normalize(
        thypernyms_feature)
    dhypernyms_feature = features.hypernyms_feature(dwords)
    dhypernyms_normalized = utils.normalize_with_params(
        dhypernyms_feature, th_mean, th_std)

    x_train = np.column_stack(
        (tlength_normalized, tfrequency_normalized, tsyllables_normalized,
         tsenses_normalized, thypernyms_normalized))
    y = tlabels_true

    x_dev = np.column_stack(
        (dlength_normalized, dfrequency_normalized, dsyllables_normalized,
         dsenses_normalized, dhypernyms_normalized))

    clf = RandomForestClassifier(bootstrap=True,
                                 class_weight=None,
                                 criterion='gini',
                                 max_depth=7,
                                 max_features=3,
                                 max_leaf_nodes=None,
                                 min_impurity_decrease=0.0,
                                 min_impurity_split=None,
                                 min_samples_leaf=8,
                                 min_samples_split=50,
                                 min_weight_fraction_leaf=0.0,
                                 n_estimators=70,
                                 n_jobs=None,
                                 oob_score=False,
                                 random_state=0,
                                 verbose=0,
                                 warm_start=False)

    clf.fit(x_train, y)
    y_pred = clf.predict(x_dev)

    daccuracy = hs.get_accuracy(y_pred, dlabels_true)
    dprecision = hs.get_precision(y_pred, dlabels_true)
    drecall = hs.get_recall(y_pred, dlabels_true)
    dfscore = hs.get_fscore(y_pred, dlabels_true)

    # Test Set
    test_length_feature = hs.length_feature(test_words)
    test_frequency_feature = hs.frequency_feature(test_words, counts)
    test_syllables_feature = features.syllables_feature(test_words)
    test_vowels_feature = features.vowels_feature(test_words)
    test_consonants_feature = features.consonants_feature(test_words)
    test_senses_feature = features.senses_feature(test_words)
    test_hypernyms_feature = features.hypernyms_feature(test_words)

    test_length_normalized = utils.normalize_with_params(
        test_length_feature, tl_mean, tl_std)
    test_frequency_normalized = utils.normalize_with_params(
        test_frequency_feature, tf_mean, tf_std)
    test_syllables_normalized = utils.normalize_with_params(
        test_syllables_feature, tsy_mean, tsy_std)
    test_vowels_normalized = utils.normalize_with_params(
        test_vowels_feature, tv_mean, tv_std)
    test_consonants_normalized = utils.normalize_with_params(
        test_consonants_feature, tc_mean, tc_std)
    test_senses_normalized = utils.normalize_with_params(
        test_senses_feature, tse_mean, tse_std)
    test_hypernyms_normalized = utils.normalize_with_params(
        test_hypernyms_feature, th_mean, th_std)

    x_test = np.column_stack(
        (test_length_normalized, test_frequency_normalized,
         test_syllables_normalized, test_senses_normalized,
         test_hypernyms_normalized))
    y_pred_test = clf.predict(x_test)

    f = open('test_labels.txt', 'w')
    for item in y_pred_test:
        print(item, file=f)
    f.close()

    # training_performance = (tprecision, trecall, tfscore)
    development_performance = (daccuracy, dprecision, drecall, dfscore)
    return development_performance

예제 #31

0

파일 보기

파일: extract.py 프로젝트: haydnKing/4th-year-project

def get_ppr10():
	p = utils.load_test()
	ppr10 = simple_extract(p)
	return ppr10[0]

예제 #32

0

파일 보기

파일: main.py 프로젝트: colinsongf/Semantic_Matching

def evaluate_nonepisode(data, config, model, loss_fn, eval):

    x_te, y_te, te_len, te_mask, text_te = utils.load_test(data, eval)
    x_te, y_te, te_len, te_mask, text_te = utils.shuffle_data(
        x_te, y_te, te_len, te_mask, text_te)
    y_te_ind = utils.create_index(y_te)

    reverse_dict = data['reverse_dict']
    num_class = np.unique(y_te)

    num_test_query = config['num_query_per_class'] * num_class.shape[0]
    x_support, y_support, x_len_support, support_m, support_text = utils.load_support(
        data, False)
    y_support_ind = utils.create_index(y_support)
    test_batch = int(math.ceil(x_te.shape[0] / float(num_test_query)))

    total_prediction = np.array([], dtype=np.int64)
    total_y_test = np.array([], dtype=np.int64)
    cum_loss = 0.0
    kl_loss = torch.nn.KLDivLoss(reduction='batchmean').to(config['device'])

    with torch.no_grad():
        for batch in range(test_batch):
            support_feature, support_class, support_len, support_ind, support_mask = utils.init_support_query(
                config['num_samples_per_class'], x_te.shape[1],
                num_class.shape[0])
            query_feature, query_class, query_len, query_ind, query_mask = utils.init_support_query(
                config['num_query_per_class'], x_te.shape[1],
                num_class.shape[0])

            begin_index = batch * (num_test_query)
            end_index = min((batch + 1) * num_test_query, x_te.shape[0])
            query_feature = x_te[begin_index:end_index]
            query_len = te_len[begin_index:end_index]
            query_class = y_te[begin_index:end_index]
            query_mask = te_mask[begin_index:end_index]
            query_text = text_te[begin_index:end_index]

            support_idx = 0
            num_class = np.unique(y_support)
            for counter in range(num_class.shape[0]):
                class_index = np.where(y_support == num_class[counter])[0]
                old_support_idx = support_idx
                support_idx = support_idx + config['num_samples_per_class']
                support_feature[old_support_idx:support_idx] = x_support[
                    class_index]
                support_class[old_support_idx:support_idx] = y_support[
                    class_index]
                support_len[old_support_idx:support_idx] = x_len_support[
                    class_index]
                support_mask[old_support_idx:support_idx] = support_m[
                    class_index]
                support_text[old_support_idx:support_idx] = support_text[
                    class_index]
            cs = np.unique(query_class)
            #Obtain indexes
            q_ind_key = {}
            s_ind_key = {}
            for i in range(len(cs)):
                q_index = np.where(query_class == cs[i])[0]
                s_index = np.where(support_class == cs[i])[0]
                q_ind_key[cs[i]] = q_index
                s_ind_key[cs[i]] = s_index

        # Changet values
            for i in range(len(cs)):
                query_class[q_ind_key[cs[i]]] = i
                support_class[s_ind_key[cs[i]]] = i

            support_ind = utils.create_index(support_class)
            query_ind = utils.create_index(query_class)

            support_feature, support_id, support_ind, support_len, support_mask = convert_to_tensor(
                support_feature, support_class, support_ind, support_len,
                support_mask, config['device'])
            query_feature, query_id, query_ind, query_len, query_mask = convert_to_tensor(
                query_feature, query_class, query_ind, query_len, query_mask,
                config['device'])
            prediction, _, support_attn, query_attn, _, _ = model.forward(
                support_feature, support_len, support_mask, query_feature,
                query_len, query_mask)

            pred = np.argmax(prediction.cpu().detach().numpy(), 1)
            total_prediction = np.concatenate((total_prediction, pred))
            total_y_test = np.concatenate((total_y_test, query_class))
    acc = accuracy_score(total_y_test, total_prediction)

    cnf = confusion_matrix(total_y_test, total_prediction)
    print("Confusion matrix:")
    print(cnf)
    return acc

예제 #33

0

파일 보기

파일: stack.py 프로젝트: glouppe/kaggle-higgs

    X_fold = np.hstack((X_fold, X_pred))

    all_X.append(X_fold)
    all_y.append(y_fold)
    all_w.append(w_fold)

X = np.vstack(all_X)
y = np.concatenate(all_y)
w = np.concatenate(all_w)

clf = Classifier(**params)
w = rescale(w)
w = rebalance(y, w)

try:
    clf.fit(X, y, sample_weight=w)
except:
    clf.fit(X, y)


# And make a submussion
print "Making submission..."
X_test, _, _, _ = load_test()
X_pred = load_predictions("stack/*-test.npy")
X_test = np.hstack((X_test, X_pred))

make_submission(clf, threshold, "output-stacking.csv", X_test=X_test)

import IPython; IPython.embed()

예제 #34

0

파일 보기

# main
# =============================================================================
if __name__ == "__main__":
    utils.start(__file__)

    # train
    tr = utils.load_train(['object_id'])

    df = pd.read_pickle(
        '../FROM_MYTEAM/LCfit_feature_allSN_i_train_v3_20181215.pkl.gz')
    df = pd.merge(tr, df, on='object_id', how='left')
    df.reset_index(drop=True, inplace=True)
    get_feature(df)

    del df['object_id']
    df.add_prefix(PREF + '_').to_pickle(f'../data/train_{PREF}.pkl')

    # test
    te = utils.load_test(['object_id'])
    df = pd.read_pickle(
        '../FROM_MYTEAM/LCfit_feature_allSN_i_test_v3_20181215.pkl.gz')
    df = pd.merge(te, df, on='object_id', how='left')
    df.reset_index(drop=True, inplace=True)
    get_feature(df)

    del df['object_id']
    df = df.add_prefix(PREF + '_')
    utils.to_pkl_gzip(df, f'../data/test_{PREF}.pkl')

    utils.end(__file__)

예제 #35

0

파일 보기

파일: l2_layer_strpool.py 프로젝트: CounterZone/COMP-540-2016

import lasagne
import utils
from lasagne.layers import *
from nolearn.lasagne import NeuralNet
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn import cross_validation
from nolearn.lasagne import TrainSplit
from nolearn.lasagne import objective
from lasagne.nonlinearities import softmax
from lasagne.updates import momentum



X, y=utils.load_train()
X_test=utils.load_test()
X=X.reshape([X.shape[0],3,32,32])
y=np.array(y,dtype="int32")
X_test=X_test.reshape([X_test.shape[0],3,32,32])

layers = [
    # layer dealing with the input data
    (InputLayer, {'shape': (None, 3, 32, 32)}),

    # first stage of our convolutional layers

    # second stage of our convolutional layers
    (Conv2DLayer, {'pad':2,'num_filters': 32, 'filter_size': 5,'W':lasagne.init.Normal(std=0.01)}),
    (ParametricRectifierLayer, {'alpha':lasagne.init.Constant(0)}),
    (Pool2DLayer, {'pool_size': 2,'stride':2,'mode':'max'}),

예제 #36

0

파일 보기

파일: submission.py 프로젝트: FlorianMuellerklein/backseat_driver

'''
X = T.tensor4('X')
Y = T.ivector('y')

# set up theano functions to generate output by feeding data through network, any test outputs should be deterministic
output_layer = ResNet_FullPre(X, n=5)
output_test = lasagne.layers.get_output(output_layer, deterministic=True)

# set up training and prediction functions
predict_proba = theano.function(inputs=[X], outputs=output_test)

'''
Load data and make predictions
'''
# load data
X_test, X_test_id = load_test(cache=True)

nn_count = 1
for ensb in range(19):
    # load network weights
    f = gzip.open('data/weights/resnet32_fullpre_' + str(nn_count) + '.pklz', 'rb')
    all_params = pickle.load(f)
    f.close()
    helper.set_all_param_values(output_layer, all_params)

    '''
    # make regular predictions
    predictions = []
    for j in range((X_test.shape[0] + BATCHSIZE - 1) // BATCHSIZE):
        sl = slice(j * BATCHSIZE, (j + 1) * BATCHSIZE)
        X_batch = X_test[sl]

예제 #37

0

파일 보기

파일: build_group_by_features.py 프로젝트: parkerzf/kaggle-expedia

result = utils.fill_all_top_5(train_is_booking, result, 'hotel_market', 'train')
print 'hotel clusters to ranking features...'
new_result = result.apply(lambda row: hotel_clusters_to_ranking_features(row), axis=1)
new_result.columns = ['_'.join(['hotel_cluster', str(hotel_cluster_id), 'rank']) for hotel_cluster_id in range(100)]
new_result = pd.concat([train_is_booking['date_time'], new_result], axis=1)

new_result.to_csv(utils.processed_data_path +
	'_'.join(['train_is_booking_group_by', 'top', str(utils.k), 'cw', str(utils.click_weight), 'year', utils.train_year]) +
	'.csv', header=True, index=False)

del train_is_booking

#############################################################
####################   test dataset      ####################
#############################################################
test  = utils.load_test('group_by')

print 'generate top k hotel clusters with orig_destination_distance model...'
result = gen_top_k_hotel_cluster(test, 'orig_destination_distance')
print 'generate top k hotel clusters with srch_destination_id model...'
result = utils.fill_all_top_5(test, result, 'srch_destination_id')
print 'generate top k hotel clusters with user_id model...'
result = utils.fill_all_top_5(test, result, 'user_id')
print 'generate top k hotel clusters with hotel_market model...'
result = utils.fill_all_top_5(test, result, 'hotel_market')
print 'hotel clusters to ranking features...'
new_result = result.apply(lambda row: hotel_clusters_to_ranking_features(row), axis=1)
new_result.columns = ['_'.join(['hotel_cluster', str(hotel_cluster_id), 'rank']) for hotel_cluster_id in range(100)]

new_result.to_csv(utils.processed_data_path +
	'_'.join(['test_groupb_by', 'top', str(utils.k), 'cw', str(utils.click_weight), 'year', utils.train_year]) +

예제 #38

0

파일 보기

파일: test.py 프로젝트: Likarian/VDSR-keras

from keras import metrics
import tensorflow as tf
import numpy as np
from utils import load_test, PSNR
import scipy.misc
import argparse

parser = argparse.ArgumentParser(description='Test function')
parser.add_argument('--test', metavar='test', type=str, help='test directory')
parser.add_argument('--network',
                    metavar='network',
                    type=str,
                    help='network weight')
args = parser.parse_args()

val_in, val_out = load_test(directory=args.test)
model = load_model(args.network)

prediction = model.predict(val_in, batch_size=1, verbose=1)

Result = PSNR(val_out, prediction)
sess = tf.Session()
RR = sess.run(Result)

print(RR)

for img_count in range(prediction.shape[0]):
    img_in = val_in[img_count, :, :, :]
    img_out = prediction[img_count, :, :, :]
    img_gt = val_out[img_count, :, :, :]
    scipy.misc.imsave('./Result/LR' + '{0:03d}'.format(img_count) + '.png',