def train_and_get_result(_df, _dft, store_item_nbrs, model, total_features): df = _df.copy() df_t = _dft.copy() RES = [] total = 0 for sno, ino in store_item_nbrs: if(sno == 35): continue res = pd.DataFrame() df1 = df[(df.store_nbr == sno) & (df.item_nbr == ino)] X_train, y_train = ut.get_train_data(df1) X_train = X_train.drop(['store_nbr', 'item_nbr'], axis=1) y_train = y_train[X_train.index.values] df2 = df_t[(df_t.store_nbr == sno) & (df_t.item_nbr == ino)] X_predict = ut.get_test_data(df2) res['date'] = X_predict['date'] res['store_nbr'] = X_predict['store_nbr'] res['item_nbr'] = X_predict['item_nbr'] X_predict = X_predict.drop(['date', 'store_nbr', 'item_nbr'], axis=1) X_train = X_train[total_features[total].tolist()] X_predict = X_predict[total_features[total].tolist()] regr = ut.get_regression_model(model, len(X_train.values)) regr.fit(ut.get_processed_X(X_train.values), y_train.values) res['log1p'] = np.maximum(regr.predict(ut.get_processed_X(X_predict.values)), 0.) RES.append(res) total += 1 result = pd.concat(RES) return result
def predict(self): test_df = util.get_test_data() test_df['rating'] = constants.AVG_RATING test_df = test_df.drop('userId', axis=1) for index, row in test_df.iterrows(): if row['movieId'] in self.df.index: test_df.at[index, 'rating'] = self.df.loc[row['movieId']] test_df = test_df.drop('movieId', axis=1) test_df.to_csv('movieaveragepredictor.csv', index=False)
def main(n_aggregation, dim_feature, n_epochs, batch_size, eps, outputfile): W = np.random.normal(0, 0.4, [dim_feature, dim_feature]) A = np.random.normal(0, 0.4, dim_feature) b = np.array([0.]) model = GraphNeuralNetwork(W, A, b, n_aggregation=n_aggregation) optimizer = Adam(model) # Training train_data = util.get_train_data('../../datasets') print('train_size: %d' % len(train_data)) for epoch in range(n_epochs): train_loss = util.AverageMeter() train_acc = util.AverageMeter() for graphs, labels in util.get_shuffled_batches( train_data, batch_size): grads_flat = 0 for graph, label in zip(graphs, labels): x = np.zeros([len(graph), dim_feature]) x[:, 0] = 1 grads_flat += calc_grads(model, graph, x, label, bce_with_logit, eps) / batch_size outputs = model(graph, x) train_loss.update(bce_with_logit(outputs, label), 1) train_acc.update((sigmoid(outputs) > 0.5) == label, 1) optimizer.update(grads_flat) print('epoch: %d, train_loss: %f, train_acc: %f' % (epoch, train_loss.avg, train_acc.avg)) # Prediction test_data = util.get_test_data('../../datasets') with open(outputfile, 'w') as o: for graph in test_data: x = np.zeros([len(graph), dim_feature]) x[:, 0] = 1 logit = model(graph, x) pred = sigmoid(logit) > 0.5 o.write(str(int(pred[0])) + '\n')
def generate_sub(name_black, name, name_white=None): print 'Creating submission for %s' % name # For some reason if the models are created in a different script and # then passed as parameters to this function, then Tensorflow breaks. # # Therefore, just pass the model names and load them from json in this function. from keras.models import model_from_json with open(join('models', name_black, 'model.json')) as f: json = f.read() model_black = model_from_json(json) model_black.load_weights(join('models', name_black, 'model.h5')) if not name_white is None: with open(join('models', name_white, 'model.json')) as f: json = f.read() model_white = model_from_json(json) model_white.load_weights(join('models', name_white, 'model.h5')) def load_imgs(d): ids = os.listdir(d) imgs = [imread(join(d, id, 'images', id + '.png')) for id in ids] # Some test images are already 2d (grayscale) so don't convert them for i, x in enumerate(imgs): if len(x.shape) == 3: imgs[i] = cvtColor(x, bgr2gray) # Keep the sizes so that after passing through the unet, they can be # reshaped into their original size shapes = [x.shape for x in imgs] return imgs, ids, shapes #X, ids, sizes = load_imgs('test_data1') from util import get_test_data X, ids = get_test_data(just_X=True, ret_ids=True) sizes = [x.shape for x in X] #X_tmp, ids_tmp, size_tmp = load_imgs('test_data1') #X.extend(X_tmp) #sizes.extend(size_tmp) #ids.extend(ids_tmp) # Reshape each image to either 512, 256, or 128, whichever is closest. # # Do this because the unet uses concatenate layers and if a lot of sized # images are passed through the convolution layers, their dimension changes # and this breaks Keras. s = [512, 256, 128] for i in range(len(X)): for size in s: if X[i].shape[0] >= size or X[i].shape[1] >= size: new_shape = (size, size) break X[i] = imresize(X[i], new_shape) rle_ids = [] rles = [] z_ids = [] for i, x in enumerate(X): if i % 100 == 0: print '%d / %d' % (i, len(X)) batch = x.reshape(1, x.shape[0], x.shape[1], 1) if not model_white is None and np.mean(x) >= 127.5: p = model_white.predict(batch)[0, :, :, 0] else: p = model_black.predict(batch)[0, :, :, 0] p = imresize(p, sizes[i]) """ import matplotlib.pyplot as plt _, axs = plt.subplots(1, 2) axs[0].imshow(imresize(x, sizes[i]), 'gray') axs[1].imshow(p, 'gray') plt.show() """ labels = label(p > 0.5) x_rles = list(labels_to_rles(labels)) """ if len(x_rles) == 0: import matplotlib.pyplot as plt _, axs = plt.subplots(1, 2) axs[0].imshow(imresize(x, sizes[i]), 'gray') axs[1].imshow(p, 'gray') plt.show() exit() """ rles.extend(x_rles) rle_ids.extend([ids[i]] * len(x_rles)) if len(x_rles) == 0: rles.extend([[0, 0]]) rle_ids.extend([ids[i]]) z_ids.append(ids[i]) sub = pd.DataFrame() sub['ImageId'] = rle_ids sub['EncodedPixels'] = pd.Series(rles).apply( lambda x: ' '.join(str(y) for y in x)) sub.to_csv(join('subs', name + '.csv'), index=False) '''
def batch_grad(): #get data and for test and train sets X, Y = get_normalized_data() #XTrain = X[:-1000, :] #YTrain = Y[:-1000] #YTrain_ind = y2indicator(YTrain) #XTest = X[-1000:, :] #YTest = Y[-1000:] # = y2indicator(YTest) Y_ind = y2indicator(Y) batchSz = 500 #Initialize random weights N, D = X.shape K = len(set(Y)) M = 300 W1 = np.random.randn(D, M) b1 = np.random.randn(M) W2 = np.random.randn(M, K) b2 = np.random.randn(K) learning_rate = 0.001 reg = 0.01 cache_w2 = 0 cache_b2 = 0 cache_w1 = 0 cache_b1 = 0 decay_rate = 0.999 eps = 10e-10 no_batches = int(N / batchSz) print("No of bathces: ", no_batches) for i in range(300): for n in range(no_batches): #get current batch XBatch = X[n * batchSz:(n * batchSz + batchSz), :] YBatch_ind = Y_ind[n * batchSz:(n * batchSz + batchSz), :] #Forward prop pY, Z = forward_relu(XBatch, W1, b1, W2, b2) #Backprop gW2 = derivative_w2(pY, YBatch_ind, Z) + reg * W2 cache_w2 = decay_rate * cache_w2 + (1 - decay_rate) * gW2 * gW2 W2 += learning_rate * gW2 / (np.sqrt(cache_w2) + eps) gb2 = derivative_b2(pY, YBatch_ind) + reg * b2 cache_b2 = decay_rate * cache_b2 + (1 - decay_rate) * gb2 * gb2 b2 += learning_rate * gb2 / (np.sqrt(cache_b2) + eps) gW1 = derivative_w1(pY, YBatch_ind, W2, Z, XBatch) + reg * W1 cache_b2 = decay_rate * cache_b2 + (1 - decay_rate) * gb2 * gb2 b2 += learning_rate * gb2 / (np.sqrt(cache_b2) + eps) gb1 = derivative_b1(pY, YBatch_ind, W2, Z) + reg * b1 cache_b1 = decay_rate * cache_b1 + (1 - decay_rate) * gb1 * gb1 b1 += learning_rate * gb1 / (np.sqrt(cache_b1) + eps) if n % 100 == 0: #Forward prop #pY, Z = forward_relu(XBatch, W1, b1, W2, b2) YBatch = Y[n * batchSz:n * batchSz + batchSz] P = np.argmax(pY, axis=1) er = error_rate(P, YBatch) c = cost(YBatch_ind, pY) print("Loop: ", i, n, "Error rate: ", er, "Cost: ", c) pY, Z = forward_relu(X, W1, b1, W2, b2) p = np.argmax(pY, axis=1) print("Final Final training error rate: ", error_rate(p, Y)) XTest = get_test_data() pY, ZTest = forward_relu(XTest, W1, b1, W2, b2) YTest = np.argmax(pY, axis=1) f = open("test_rms.csv", "w") f.write("ImageId,Label\n") n = YTest.shape[0] for i in range(n): f.write(str(i + 1) + "," + str(YTest[i]) + "\n") f.close()
#!/usr/bin/env python # -*- coding:utf-8 -*- # author:wangtaihe # datetime:2020/12/3 11:12 # software: PyCharm import util as util from GetSession import DmpLogin import json import pytest import Config as config cases, list_params = util.get_test_data("data/test_report.yml") class TestReport: @pytest.mark.parametrize("case,data,expected", list(list_params), ids=cases) def test_save_settingdata(self, case, data, expected): test = DmpLogin() setting_data = test.getSettingData() try: setting = json.loads(setting_data) data['id'] = setting['data']['basicInfoMap']['id'] post = test.post_api("/gzapi/save", json=data) util.info(post) except Exception as e: util.info(e) post = test.post_api("/gzapi/save", json=data) util.info(post)
P = np.zeros((N, K)) # for each class and mean/covariance for c, g in self.gaussians.items(): mean, var = g['mean'], g['var'] log = np.log(self.priors[c]) # Calculate Log of the probability density function, all at once P[:, c] = mvn.logpdf(X, mean=mean, cov=var) + log return np.argmax(P, axis=1) if __name__ == '__main__': # Get train data X, Y = util.get_data(40000) Ntrain = len(Y) // 2 Xtest, Ytest = util.get_test_data(40000) Xtrain, Ytrain = X[:Ntrain], Y[:Ntrain] # Xtest, Ytest = X[Ntrain:], Y[Ntrain:] model = NaiveBayers() t0 = datetime.now() model.fit(Xtrain, Ytrain) print("Training time: ", (datetime.now() - t0)) t0 = datetime.now() print("Training accuracy: ", model.score(Xtrain, Ytrain)) print("Time to compute train accuracy: ", (datetime.now() - t0), "Train size: ", len(Ytrain)) t0 = datetime.now()
x_test['Sales_true'] = y_test x_test['Sales_pred'] = pred x_test['Country'] = util.country[x_test.index].copy() x_test['Date'] = util.date[x_test.index].copy() util.convert_from_usd(x_test, columns=['Sales_true', 'Sales_pred']) y_test = x_test['Sales_true'] pred = x_test['Sales_pred'] x_test.drop(['Sales_true', 'Sales_pred', 'Country', 'Date'], axis=1, inplace=True) print(r2_score(y_test, pred)) print(util.SMAPE(y_test, pred)) train_data = util.get_train_data() x = train_data.drop('Sales', axis=1) y = train_data['Sales'] testx, test_merge = util.get_test_data() x_train, x_test, y_train, y_test = util.get_train_test_data(train_data, test_size=0.20) model1 = RandomForestRegressor(n_estimators=500, max_depth=25) #model1.fit(x_train, y_train) model1.fit(x, y) model2 = GradientBoostingRegressor(n_estimators=300, max_depth = 15, max_features = 0.9, min_impurity_decrease = 0.5) #model2.fit(x_train, y_train) model2.fit(x, y)
def main(): #Get train and test data XTrain, YTrain = get_train_data() YTrain_ind = y2indicator(YTrain) XTrain = reshape(XTrain) XTest, YTest = get_test_data() YTest_ind = y2indicator(YTest) XTest = reshape(XTest) N, K = YTrain_ind.shape lr = np.float32(0.001) mu = np.float32(0.99) reg = np.float32(0.01) poolsz = (2, 2) M = 100 batch_sz = 500 no_batches = int(N / batch_sz) #Initial random weights W1_shape = (5, 5, 3, 20) W1_init = init_filter(W1_shape, poolsz) b1_init = np.zeros([W1_shape[3]]) W2_shape = (5, 5, 25, 50) W2_init = init_filter(W2_shape, poolsz) b2_init = np.zeros([W2_shape[3]]) W3_init = np.random.randn(W2_shape[3] * 8 * 8, M) / np.sqrt(W2_shape[3] * 8 * 8 + M) b3_init = np.zeros([M]) W4_init = np.random.randn(M, K) / np.sqrt(M + K) b4_init = np.zeros([K]) #Tensorflow variables X = tf.placeholder(name='X', dtype='float32', shape=(batch_sz, 32, 32, 3)) Y = tf.placeholder(name='Y', dtype='float32', shape=(batch_sz, K)) W1 = tf.Variable(W1_init.astype(np.float32), name='W1') b1 = tf.Variable(b1_init.astype(np.float32), name='b1') W2 = tf.Variable(W2_init.astype(np.float32), name='W2') b2 = tf.Variable(b2_init.astype(np.float32), name='b2') W3 = tf.Variable(W3_init.astype(np.float32), name='W3') b3 = tf.Variable(b3_init.astype(np.float32), name='b3') W4 = tf.Variable(W4_init.astype(np.float32), name='W4') b4 = tf.Variable(b4_init.astype(np.float32), name='b4') #Forward prop Z1 = convpool(X, W1, b1) Z2 = convpool(Z1, W2, b2) Z2_shape = Z2.get_shape().as_list() Z2_flat = tf.reshape(Z2, [Z2_shape[0], np.prod(Z2_shape[1:])]) Z3 = tf.nn.relu(tf.matmul(Z2_flat, W3) + b3) pY = tf.matmul(Z3, W4) + b4 #Cost and prediction cost = tf.reduce_sum( tf.nn.softmax_cross_entropy_with_logits(logits=pY, labels=Y)) #Train function train = tf.train.RMSPropOptimizer(lr, decay=0.99, momentum=mu).minimize(cost) #Get prediction pred = tf.argmax(pY, axis=1) init = tf.global_variables_initializer() with tf.Session() as session: session.run(init) for i in range(100): for n in range(no_batches): #get current batches XBatch = XTrain[n * batch_sz:(n * batch_sz + batch_sz), :] YBatch_ind = YTrain_ind[n * batch_sz:(n * batch_sz + batch_sz), :] #Forward prop session.run(train, feed_dict={X: XBatch, Y: YBatch_ind}) if (n % 200 == 0): YBatch = YTrain[n * batch_sz:(n * batch_sz + batch_sz)] c = session.run(cost, feed_dict={X: XBatch, Y: YBatch_ind}) P = session.run(pred, feed_dict={X: XBatch}) er = error_rate(P, YBatch) print("Iteration: ", i, "Cost: ", c, "Error rate: ", er)
def main(): #Get train and test data XTrain, YTrain = get_train_data() YTrain_ind = y2indicator(YTrain) XTrain = reshape(XTrain) XTest, YTest = get_test_data() YTest_ind = y2indicator(YTest) XTest = reshape(XTest) N,K = YTrain_ind.shape M=100 lr = np.float32(0.000001) reg = np.float32(0.01) mu = np.float32(0.99) poolsize = (2,2) batch_sz = 500 no_batches = int(N/batch_sz) #Initial random weight values W1_shape = (20, 3, 5, 5) W1_init = init_filter(W1_shape, poolsize) b1_init = np.zeros([W1_shape[0]]) W2_shape = (50, 20, 5, 5) W2_init = init_filter(W2_shape, poolsize) b2_init = np.zeros([W2_shape[0]]) W3_init = np.random.randn(W2_shape[0]*5*5, M)/np.sqrt(W2_shape[0]*5*5 + M) b3_init = np.zeros([M]) W4_init = np.random.randn(M,K)/np.sqrt(M+K) b4_init = np.zeros([K]) #Create theano variables X = T.tensor4('X', dtype='float32') #inputs Y = T.matrix('Y') W1 = theano.shared(W1_init.astype(np.float32), 'W1') #Weights b1 = theano.shared(b1_init.astype(np.float32), 'b1') W2 = theano.shared(W2_init.astype(np.float32), 'W2') b2 = theano.shared(b2_init.astype(np.float32), 'b2') W3 = theano.shared(W3_init.astype(np.float32), 'W3') b3 = theano.shared(b3_init.astype(np.float32), 'b3') W4 = theano.shared(W4_init.astype(np.float32), 'W4') b4 = theano.shared(b4_init.astype(np.float32), 'b4') dW1 = theano.shared(np.zeros(W1_init.shape, dtype=np.float32)) #Momentum variables db1 = theano.shared(np.zeros(b1_init.shape, dtype=np.float32)) dW2 = theano.shared(np.zeros(W2_init.shape, dtype=np.float32)) db2 = theano.shared(np.zeros(b2_init.shape, dtype=np.float32)) dW3 = theano.shared(np.zeros(W3_init.shape, dtype=np.float32)) db3 = theano.shared(np.zeros(b3_init.shape, dtype=np.float32)) dW4 = theano.shared(np.zeros(W4_init.shape, dtype=np.float32)) db4 = theano.shared(np.zeros(b4_init.shape, dtype=np.float32)) #Forward prop equations Z1 = convpool(X, W1, b1) #2 Conv-pool layer Z2 = convpool(Z1, W2, b2) Z3 = relu(Z2.flatten(ndim=2).dot(W3) + b3) #Fully connected NN P = T.nnet.softmax(Z3.dot(W4) + b4) #Cost and prediction equations params = (W1, b1, W2, b2, W3, b3, W4, b4) reg_cost = reg*np.sum([(param*param).sum() for param in params]) cost = (Y * T.log(P)).sum() + reg_cost pred = T.argmax(P, axis=1) #Update Weights W1_update = W1 + mu*dW1 + lr*T.grad(cost, W1) b1_update = b1 + mu*db1 + lr*T.grad(cost,b1) W2_update = W2 + mu*dW2 + lr*T.grad(cost, W2) b2_update = b2 + mu*db2 + lr*T.grad(cost,b2) W3_update = W3 + mu*dW3 + lr*T.grad(cost, W3) b3_update = b3 + mu*db3 + lr*T.grad(cost,b3) W4_update = W4 + mu*dW4 + lr*T.grad(cost, W4) b4_update = b4 + mu*db4 + lr*T.grad(cost,b4) #Gradient updates for momentum dW1_update = mu*dW1 + lr*T.grad(cost, W1) db1_update = mu*db1 + lr*T.grad(cost, b1) dW2_update = mu*dW2 + lr*T.grad(cost, W2) db2_update = mu*db2 + lr*T.grad(cost, b2) dW3_update = mu*dW3 + lr*T.grad(cost, W3) db3_update = mu*db3 + lr*T.grad(cost, b3) dW4_update = mu*dW4 + lr*T.grad(cost, W4) db4_update = mu*db4 + lr*T.grad(cost, b4) #Train function train = theano.function( inputs=[X,Y], updates=[ (W1, W1_update), (b1, b1_update), (W2, W2_update), (b2, b2_update), (W3, W3_update), (b3, b3_update), (W4, W4_update), (b4, b4_update), (dW1, dW1_update), (db1, db1_update), (dW2, dW2_update), (db2, db2_update), (dW3, dW3_update), (db3, db3_update), (dW4, dW4_update), (db4, db4_update), ]) #Get cost and prediction function get_res = theano.function( inputs=[X,Y], outputs=[cost,pred]) #Run batch gradient descent costs = [] for i in range(400): for n in range(no_batches): #get current batches XBatch = XTrain[n*batch_sz:(n*batch_sz + batch_sz), :] YBatch_ind = YTrain_ind[n*batch_sz:(n*batch_sz + batch_sz), :] #Forward prop train(XBatch, YBatch_ind) if(n%200 == 0): #YBatch = YTrain[n*batch_sz:(n*batch_sz + batch_sz)] c, P = get_res(XTest, YTest_ind) er = error_rate(P, YTest) print("Iteration: ", i, "Cost: ", c, "Error rate: ", er)
import cv2 import util import numpy as np import matplotlib.pyplot as plt from sklearn.ensemble import RandomForestClassifier train_imgs, train_labels = util.get_train_data() test_imgs, test_labels = util.get_test_data() random_forest_model = RandomForestClassifier() random_forest_model.fit(train_imgs, train_labels) random_forest_results = random_forest_model.predict(test_imgs) util.evaluate(random_forest_results, test_labels) test_imgs = test_imgs.reshape(-1, 28, 28) np.random.shuffle(test_imgs) for i in range(10): util.visualize(test_imgs[i], random_forest_model)