Python get_test_data 예제들, util.get_test_data Python 예제들

예제 #1

0

파일 보기

파일: data_train.py 프로젝트: Kimposs622/kaggle_project

def train_and_get_result(_df, _dft,  store_item_nbrs, model, total_features):
	df = _df.copy()
	df_t = _dft.copy()
	RES = []
	total = 0
	for sno, ino in store_item_nbrs:
		if(sno == 35):
			continue
		res = pd.DataFrame()
		df1 = df[(df.store_nbr == sno) & (df.item_nbr == ino)]
		X_train, y_train = ut.get_train_data(df1)
		X_train = X_train.drop(['store_nbr', 'item_nbr'], axis=1)
		y_train = y_train[X_train.index.values]

		df2 = df_t[(df_t.store_nbr == sno) & (df_t.item_nbr == ino)]
		X_predict = ut.get_test_data(df2)
		res['date'] = X_predict['date']
		res['store_nbr'] = X_predict['store_nbr']
		res['item_nbr'] = X_predict['item_nbr']
		X_predict = X_predict.drop(['date', 'store_nbr', 'item_nbr'], axis=1)

		X_train = X_train[total_features[total].tolist()]
		X_predict = X_predict[total_features[total].tolist()]

		regr = ut.get_regression_model(model, len(X_train.values))
		regr.fit(ut.get_processed_X(X_train.values), y_train.values)
		res['log1p'] = np.maximum(regr.predict(ut.get_processed_X(X_predict.values)), 0.)
		RES.append(res)
		total += 1
	result = pd.concat(RES)
	return result

예제 #2

0

파일 보기

파일: movieaveragepredictor.py 프로젝트: tholt1220/MovieRatingPredictor

    def predict(self):
        test_df = util.get_test_data()
        test_df['rating'] = constants.AVG_RATING
        test_df = test_df.drop('userId', axis=1)

        for index, row in test_df.iterrows():
            if row['movieId'] in self.df.index:
                test_df.at[index, 'rating'] = self.df.loc[row['movieId']]

        test_df = test_df.drop('movieId', axis=1)

        test_df.to_csv('movieaveragepredictor.csv', index=False)

예제 #3

0

파일 보기

파일: main.py 프로젝트: yoshnary/pfn-intern-task-2019

def main(n_aggregation, dim_feature, n_epochs, batch_size, eps, outputfile):
    W = np.random.normal(0, 0.4, [dim_feature, dim_feature])
    A = np.random.normal(0, 0.4, dim_feature)
    b = np.array([0.])
    model = GraphNeuralNetwork(W, A, b, n_aggregation=n_aggregation)
    optimizer = Adam(model)

    # Training
    train_data = util.get_train_data('../../datasets')
    print('train_size: %d' % len(train_data))
    for epoch in range(n_epochs):
        train_loss = util.AverageMeter()
        train_acc = util.AverageMeter()
        for graphs, labels in util.get_shuffled_batches(
                train_data, batch_size):
            grads_flat = 0
            for graph, label in zip(graphs, labels):
                x = np.zeros([len(graph), dim_feature])
                x[:, 0] = 1
                grads_flat += calc_grads(model, graph, x, label,
                                         bce_with_logit, eps) / batch_size

                outputs = model(graph, x)
                train_loss.update(bce_with_logit(outputs, label), 1)
                train_acc.update((sigmoid(outputs) > 0.5) == label, 1)

            optimizer.update(grads_flat)

        print('epoch: %d, train_loss: %f, train_acc: %f' %
              (epoch, train_loss.avg, train_acc.avg))

    # Prediction
    test_data = util.get_test_data('../../datasets')
    with open(outputfile, 'w') as o:
        for graph in test_data:
            x = np.zeros([len(graph), dim_feature])
            x[:, 0] = 1
            logit = model(graph, x)
            pred = sigmoid(logit) > 0.5
            o.write(str(int(pred[0])) + '\n')

예제 #4

0

파일 보기

파일: create_sub.py 프로젝트: SummerBigData/Nuclei

def generate_sub(name_black, name, name_white=None):
    print 'Creating submission for %s' % name

    # For some reason if the models are created in a different script and
    # then passed as parameters to this function, then Tensorflow breaks.
    #
    # Therefore, just pass the model names and load them from json in this function.
    from keras.models import model_from_json

    with open(join('models', name_black, 'model.json')) as f:
        json = f.read()

    model_black = model_from_json(json)
    model_black.load_weights(join('models', name_black, 'model.h5'))

    if not name_white is None:
        with open(join('models', name_white, 'model.json')) as f:
            json = f.read()

        model_white = model_from_json(json)
        model_white.load_weights(join('models', name_white, 'model.h5'))

    def load_imgs(d):
        ids = os.listdir(d)
        imgs = [imread(join(d, id, 'images', id + '.png')) for id in ids]

        # Some test images are already 2d (grayscale) so don't convert them
        for i, x in enumerate(imgs):
            if len(x.shape) == 3:
                imgs[i] = cvtColor(x, bgr2gray)

        # Keep the sizes so that after passing through the unet, they can be
        # reshaped into their original size
        shapes = [x.shape for x in imgs]

        return imgs, ids, shapes

    #X, ids, sizes = load_imgs('test_data1')
    from util import get_test_data
    X, ids = get_test_data(just_X=True, ret_ids=True)
    sizes = [x.shape for x in X]
    #X_tmp, ids_tmp, size_tmp = load_imgs('test_data1')
    #X.extend(X_tmp)
    #sizes.extend(size_tmp)
    #ids.extend(ids_tmp)

    # Reshape each image to either 512, 256, or 128, whichever is closest.
    #
    # Do this because the unet uses concatenate layers and if a lot of sized
    # images are passed through the convolution layers, their dimension changes
    # and this breaks Keras.
    s = [512, 256, 128]
    for i in range(len(X)):
        for size in s:
            if X[i].shape[0] >= size or X[i].shape[1] >= size:
                new_shape = (size, size)
                break
        X[i] = imresize(X[i], new_shape)

    rle_ids = []
    rles = []
    z_ids = []

    for i, x in enumerate(X):
        if i % 100 == 0:
            print '%d / %d' % (i, len(X))

        batch = x.reshape(1, x.shape[0], x.shape[1], 1)
        if not model_white is None and np.mean(x) >= 127.5:
            p = model_white.predict(batch)[0, :, :, 0]
        else:
            p = model_black.predict(batch)[0, :, :, 0]
        p = imresize(p, sizes[i])
        """
        import matplotlib.pyplot as plt
        _, axs = plt.subplots(1, 2)
        axs[0].imshow(imresize(x, sizes[i]), 'gray')
        axs[1].imshow(p, 'gray')
        plt.show()
        """

        labels = label(p > 0.5)
        x_rles = list(labels_to_rles(labels))
        """
        if len(x_rles) == 0:
            import matplotlib.pyplot as plt
            _, axs = plt.subplots(1, 2)
            axs[0].imshow(imresize(x, sizes[i]), 'gray')
            axs[1].imshow(p, 'gray')
            plt.show()
            exit()
        """

        rles.extend(x_rles)
        rle_ids.extend([ids[i]] * len(x_rles))

        if len(x_rles) == 0:
            rles.extend([[0, 0]])
            rle_ids.extend([ids[i]])
            z_ids.append(ids[i])

    sub = pd.DataFrame()
    sub['ImageId'] = rle_ids
    sub['EncodedPixels'] = pd.Series(rles).apply(
        lambda x: ' '.join(str(y) for y in x))
    sub.to_csv(join('subs', name + '.csv'), index=False)
    '''

예제 #5

0

파일 보기

def batch_grad():

    #get data and for test and train sets
    X, Y = get_normalized_data()
    #XTrain = X[:-1000, :]
    #YTrain = Y[:-1000]
    #YTrain_ind = y2indicator(YTrain)
    #XTest = X[-1000:, :]
    #YTest = Y[-1000:]
    # = y2indicator(YTest)
    Y_ind = y2indicator(Y)

    batchSz = 500
    #Initialize random weights
    N, D = X.shape
    K = len(set(Y))
    M = 300
    W1 = np.random.randn(D, M)
    b1 = np.random.randn(M)
    W2 = np.random.randn(M, K)
    b2 = np.random.randn(K)

    learning_rate = 0.001
    reg = 0.01
    cache_w2 = 0
    cache_b2 = 0
    cache_w1 = 0
    cache_b1 = 0
    decay_rate = 0.999
    eps = 10e-10

    no_batches = int(N / batchSz)
    print("No of bathces: ", no_batches)
    for i in range(300):
        for n in range(no_batches):
            #get current batch
            XBatch = X[n * batchSz:(n * batchSz + batchSz), :]
            YBatch_ind = Y_ind[n * batchSz:(n * batchSz + batchSz), :]
            #Forward prop
            pY, Z = forward_relu(XBatch, W1, b1, W2, b2)

            #Backprop
            gW2 = derivative_w2(pY, YBatch_ind, Z) + reg * W2
            cache_w2 = decay_rate * cache_w2 + (1 - decay_rate) * gW2 * gW2
            W2 += learning_rate * gW2 / (np.sqrt(cache_w2) + eps)

            gb2 = derivative_b2(pY, YBatch_ind) + reg * b2
            cache_b2 = decay_rate * cache_b2 + (1 - decay_rate) * gb2 * gb2
            b2 += learning_rate * gb2 / (np.sqrt(cache_b2) + eps)

            gW1 = derivative_w1(pY, YBatch_ind, W2, Z, XBatch) + reg * W1
            cache_b2 = decay_rate * cache_b2 + (1 - decay_rate) * gb2 * gb2
            b2 += learning_rate * gb2 / (np.sqrt(cache_b2) + eps)

            gb1 = derivative_b1(pY, YBatch_ind, W2, Z) + reg * b1
            cache_b1 = decay_rate * cache_b1 + (1 - decay_rate) * gb1 * gb1
            b1 += learning_rate * gb1 / (np.sqrt(cache_b1) + eps)

            if n % 100 == 0:
                #Forward prop
                #pY, Z = forward_relu(XBatch, W1, b1, W2, b2)
                YBatch = Y[n * batchSz:n * batchSz + batchSz]
                P = np.argmax(pY, axis=1)
                er = error_rate(P, YBatch)

                c = cost(YBatch_ind, pY)
                print("Loop: ", i, n, "Error rate: ", er, "Cost: ", c)

    pY, Z = forward_relu(X, W1, b1, W2, b2)
    p = np.argmax(pY, axis=1)
    print("Final Final training error rate: ", error_rate(p, Y))

    XTest = get_test_data()
    pY, ZTest = forward_relu(XTest, W1, b1, W2, b2)
    YTest = np.argmax(pY, axis=1)

    f = open("test_rms.csv", "w")
    f.write("ImageId,Label\n")
    n = YTest.shape[0]
    for i in range(n):
        f.write(str(i + 1) + "," + str(YTest[i]) + "\n")
    f.close()

예제 #6

0

파일 보기

파일: test_report.py 프로젝트: ATai2/dmpci

#!/usr/bin/env python
# -*- coding:utf-8 -*-
# author:wangtaihe
# datetime:2020/12/3 11:12
# software: PyCharm

import util as util
from GetSession import DmpLogin
import json
import pytest
import Config as config

cases, list_params = util.get_test_data("data/test_report.yml")


class TestReport:
    @pytest.mark.parametrize("case,data,expected",
                             list(list_params),
                             ids=cases)
    def test_save_settingdata(self, case, data, expected):
        test = DmpLogin()
        setting_data = test.getSettingData()
        try:
            setting = json.loads(setting_data)
            data['id'] = setting['data']['basicInfoMap']['id']
            post = test.post_api("/gzapi/save", json=data)
            util.info(post)
        except Exception as e:
            util.info(e)
            post = test.post_api("/gzapi/save", json=data)
            util.info(post)

예제 #7

0

파일 보기

        P = np.zeros((N, K))

        # for each class and mean/covariance
        for c, g in self.gaussians.items():
            mean, var = g['mean'], g['var']
            log = np.log(self.priors[c])

            # Calculate Log of the probability density function, all at once
            P[:, c] = mvn.logpdf(X, mean=mean, cov=var) + log
        return np.argmax(P, axis=1)

if __name__ == '__main__':
    # Get train data
    X, Y = util.get_data(40000)
    Ntrain = len(Y) // 2
    Xtest, Ytest = util.get_test_data(40000)

    Xtrain, Ytrain = X[:Ntrain], Y[:Ntrain]
    # Xtest, Ytest = X[Ntrain:], Y[Ntrain:]

    model = NaiveBayers()
    t0 = datetime.now()
    model.fit(Xtrain, Ytrain)

    print("Training time: ", (datetime.now() - t0))

    t0 = datetime.now()
    print("Training accuracy: ", model.score(Xtrain, Ytrain))
    print("Time to compute train accuracy: ", (datetime.now() - t0), "Train size: ", len(Ytrain))

    t0 = datetime.now()

예제 #8

0

파일 보기

파일: main.py 프로젝트: balajib5497/ZS_Data_Science_Challenge_2018

    x_test['Sales_true'] = y_test
    x_test['Sales_pred'] = pred
    x_test['Country'] = util.country[x_test.index].copy()
    x_test['Date'] = util.date[x_test.index].copy()
    util.convert_from_usd(x_test, columns=['Sales_true', 'Sales_pred'])
    y_test = x_test['Sales_true']
    pred = x_test['Sales_pred']

    x_test.drop(['Sales_true', 'Sales_pred', 'Country', 'Date'], axis=1, inplace=True)
    print(r2_score(y_test, pred))
    print(util.SMAPE(y_test, pred))
    
train_data = util.get_train_data()
x = train_data.drop('Sales', axis=1)
y = train_data['Sales']
testx, test_merge = util.get_test_data()

x_train, x_test, y_train, y_test = util.get_train_test_data(train_data, test_size=0.20)

model1 = RandomForestRegressor(n_estimators=500,
                               max_depth=25)
#model1.fit(x_train, y_train)
model1.fit(x, y)

model2 = GradientBoostingRegressor(n_estimators=300, 
                                  max_depth = 15,
                                  max_features = 0.9,
                                  min_impurity_decrease = 0.5)
#model2.fit(x_train, y_train)
model2.fit(x, y)

예제 #9

0

파일 보기

파일: cnn_tf.py 프로젝트: pd60193/Digit-Recognition-using-CNNs-on-the-Street-View-House-Numbers-dataset

def main():
    #Get train and test data
    XTrain, YTrain = get_train_data()
    YTrain_ind = y2indicator(YTrain)
    XTrain = reshape(XTrain)

    XTest, YTest = get_test_data()
    YTest_ind = y2indicator(YTest)
    XTest = reshape(XTest)

    N, K = YTrain_ind.shape
    lr = np.float32(0.001)
    mu = np.float32(0.99)
    reg = np.float32(0.01)
    poolsz = (2, 2)
    M = 100
    batch_sz = 500
    no_batches = int(N / batch_sz)

    #Initial random weights
    W1_shape = (5, 5, 3, 20)
    W1_init = init_filter(W1_shape, poolsz)
    b1_init = np.zeros([W1_shape[3]])

    W2_shape = (5, 5, 25, 50)
    W2_init = init_filter(W2_shape, poolsz)
    b2_init = np.zeros([W2_shape[3]])

    W3_init = np.random.randn(W2_shape[3] * 8 * 8,
                              M) / np.sqrt(W2_shape[3] * 8 * 8 + M)
    b3_init = np.zeros([M])

    W4_init = np.random.randn(M, K) / np.sqrt(M + K)
    b4_init = np.zeros([K])

    #Tensorflow variables
    X = tf.placeholder(name='X', dtype='float32', shape=(batch_sz, 32, 32, 3))
    Y = tf.placeholder(name='Y', dtype='float32', shape=(batch_sz, K))
    W1 = tf.Variable(W1_init.astype(np.float32), name='W1')
    b1 = tf.Variable(b1_init.astype(np.float32), name='b1')
    W2 = tf.Variable(W2_init.astype(np.float32), name='W2')
    b2 = tf.Variable(b2_init.astype(np.float32), name='b2')
    W3 = tf.Variable(W3_init.astype(np.float32), name='W3')
    b3 = tf.Variable(b3_init.astype(np.float32), name='b3')
    W4 = tf.Variable(W4_init.astype(np.float32), name='W4')
    b4 = tf.Variable(b4_init.astype(np.float32), name='b4')

    #Forward prop
    Z1 = convpool(X, W1, b1)
    Z2 = convpool(Z1, W2, b2)
    Z2_shape = Z2.get_shape().as_list()
    Z2_flat = tf.reshape(Z2, [Z2_shape[0], np.prod(Z2_shape[1:])])
    Z3 = tf.nn.relu(tf.matmul(Z2_flat, W3) + b3)
    pY = tf.matmul(Z3, W4) + b4

    #Cost and prediction
    cost = tf.reduce_sum(
        tf.nn.softmax_cross_entropy_with_logits(logits=pY, labels=Y))

    #Train function
    train = tf.train.RMSPropOptimizer(lr, decay=0.99,
                                      momentum=mu).minimize(cost)

    #Get prediction
    pred = tf.argmax(pY, axis=1)

    init = tf.global_variables_initializer()
    with tf.Session() as session:
        session.run(init)
        for i in range(100):
            for n in range(no_batches):
                #get current batches
                XBatch = XTrain[n * batch_sz:(n * batch_sz + batch_sz), :]
                YBatch_ind = YTrain_ind[n * batch_sz:(n * batch_sz +
                                                      batch_sz), :]
                #Forward prop
                session.run(train, feed_dict={X: XBatch, Y: YBatch_ind})

                if (n % 200 == 0):
                    YBatch = YTrain[n * batch_sz:(n * batch_sz + batch_sz)]
                    c = session.run(cost, feed_dict={X: XBatch, Y: YBatch_ind})
                    P = session.run(pred, feed_dict={X: XBatch})
                    er = error_rate(P, YBatch)
                    print("Iteration: ", i, "Cost: ", c, "Error rate: ", er)

예제 #10

0

파일 보기

def main():
	#Get train and test data
	XTrain, YTrain = get_train_data()
	YTrain_ind = y2indicator(YTrain)
	XTrain = reshape(XTrain)
	XTest, YTest = get_test_data()
	YTest_ind = y2indicator(YTest)
	XTest = reshape(XTest)

	N,K = YTrain_ind.shape
	M=100
	lr = np.float32(0.000001)
	reg = np.float32(0.01)
	mu = np.float32(0.99)
	poolsize = (2,2)
	batch_sz = 500
	no_batches = int(N/batch_sz)

	#Initial random weight values
	W1_shape = (20, 3, 5, 5)
	W1_init = init_filter(W1_shape, poolsize)
	b1_init = np.zeros([W1_shape[0]])

	W2_shape = (50, 20, 5, 5)
	W2_init = init_filter(W2_shape, poolsize)
	b2_init = np.zeros([W2_shape[0]])

	W3_init = np.random.randn(W2_shape[0]*5*5, M)/np.sqrt(W2_shape[0]*5*5 + M)
	b3_init = np.zeros([M])

	W4_init = np.random.randn(M,K)/np.sqrt(M+K)
	b4_init = np.zeros([K])
	
	#Create theano variables
	X = T.tensor4('X', dtype='float32')			#inputs
	Y = T.matrix('Y')
	W1 = theano.shared(W1_init.astype(np.float32), 'W1')		#Weights
	b1 = theano.shared(b1_init.astype(np.float32), 'b1')
	W2 = theano.shared(W2_init.astype(np.float32), 'W2')
	b2 = theano.shared(b2_init.astype(np.float32), 'b2')
	W3 = theano.shared(W3_init.astype(np.float32), 'W3')
	b3 = theano.shared(b3_init.astype(np.float32), 'b3')
	W4 = theano.shared(W4_init.astype(np.float32), 'W4')
	b4 = theano.shared(b4_init.astype(np.float32), 'b4')

	dW1 = theano.shared(np.zeros(W1_init.shape, dtype=np.float32))	#Momentum variables
	db1 = theano.shared(np.zeros(b1_init.shape, dtype=np.float32))
	dW2 = theano.shared(np.zeros(W2_init.shape, dtype=np.float32))
	db2 = theano.shared(np.zeros(b2_init.shape, dtype=np.float32))
	dW3 = theano.shared(np.zeros(W3_init.shape, dtype=np.float32))
	db3 = theano.shared(np.zeros(b3_init.shape, dtype=np.float32))
	dW4 = theano.shared(np.zeros(W4_init.shape, dtype=np.float32))
	db4 = theano.shared(np.zeros(b4_init.shape, dtype=np.float32))

	#Forward prop equations
	Z1 = convpool(X, W1, b1)			#2 Conv-pool layer
	Z2 = convpool(Z1, W2, b2)
	Z3 = relu(Z2.flatten(ndim=2).dot(W3) + b3)		#Fully connected NN
	P = T.nnet.softmax(Z3.dot(W4) + b4)

	#Cost and prediction equations
	params = (W1, b1, W2, b2, W3, b3, W4, b4)
	reg_cost = reg*np.sum([(param*param).sum() for param in params])
	cost = (Y * T.log(P)).sum() + reg_cost
	pred = T.argmax(P, axis=1)

	#Update Weights
	W1_update = W1 + mu*dW1 + lr*T.grad(cost, W1)
	b1_update = b1 + mu*db1 + lr*T.grad(cost,b1)
	W2_update = W2 + mu*dW2 + lr*T.grad(cost, W2)
	b2_update = b2 + mu*db2 + lr*T.grad(cost,b2)
	W3_update = W3 + mu*dW3 + lr*T.grad(cost, W3)
	b3_update = b3 + mu*db3 + lr*T.grad(cost,b3)
	W4_update = W4 + mu*dW4 + lr*T.grad(cost, W4)
	b4_update = b4 + mu*db4 + lr*T.grad(cost,b4)

	#Gradient updates for momentum
	dW1_update = mu*dW1 + lr*T.grad(cost, W1)
	db1_update = mu*db1 + lr*T.grad(cost, b1)
	dW2_update = mu*dW2 + lr*T.grad(cost, W2)
	db2_update = mu*db2 + lr*T.grad(cost, b2)
	dW3_update = mu*dW3 + lr*T.grad(cost, W3)
	db3_update = mu*db3 + lr*T.grad(cost, b3)
	dW4_update = mu*dW4 + lr*T.grad(cost, W4)
	db4_update = mu*db4 + lr*T.grad(cost, b4)

	#Train function
	train = theano.function(
		inputs=[X,Y],
		updates=[ (W1, W1_update),
			(b1, b1_update),
			(W2, W2_update),
			(b2, b2_update),
			(W3, W3_update),
			(b3, b3_update),
			(W4, W4_update),
			(b4, b4_update),
			(dW1, dW1_update),
			(db1, db1_update),
			(dW2, dW2_update),
			(db2, db2_update),
			(dW3, dW3_update),
			(db3, db3_update),
			(dW4, dW4_update),
			(db4, db4_update),
		 ])

	#Get cost and prediction function
	get_res = theano.function(
		inputs=[X,Y],
		outputs=[cost,pred])

	#Run batch gradient descent
	costs = []
	for i in range(400):
		for n in range(no_batches):
			#get current batches
			XBatch = XTrain[n*batch_sz:(n*batch_sz + batch_sz), :]
			YBatch_ind = YTrain_ind[n*batch_sz:(n*batch_sz + batch_sz), :]
			#Forward prop
			train(XBatch, YBatch_ind)

			if(n%200 == 0):
				#YBatch = YTrain[n*batch_sz:(n*batch_sz + batch_sz)]
				c, P = get_res(XTest, YTest_ind)
				er = error_rate(P, YTest)	
				print("Iteration: ", i, "Cost: ", c, "Error rate: ", er)

예제 #11

0

파일 보기

파일: random_forest.py 프로젝트: nguyenlethanhkhoa/mnist

import cv2
import util
import numpy as np
import matplotlib.pyplot as plt

from sklearn.ensemble import RandomForestClassifier

train_imgs, train_labels = util.get_train_data()
test_imgs, test_labels = util.get_test_data()

random_forest_model = RandomForestClassifier()
random_forest_model.fit(train_imgs, train_labels)
random_forest_results = random_forest_model.predict(test_imgs)

util.evaluate(random_forest_results, test_labels)

test_imgs = test_imgs.reshape(-1, 28, 28)
np.random.shuffle(test_imgs)

for i in range(10):
    util.visualize(test_imgs[i], random_forest_model)