def checkModel(modelToUse, columns): ''' This function checks and makes sure that the model provided is doing what it is supposed to do. This is a sanity check ... ''' rewards = [] env = kagglegym.make() observation = env.reset() train = observation.train # Just to make things easier to visualize # and also to speed things up ... # ----------------------------------------- train = train[['timestamp', 'y'] + columns] train = train.groupby('timestamp').aggregate(np.mean) train.y = np.cumsum(train.y) # easier to visualize print('fitting a model') model = fitModel(modelToUse, train, columns) print('predict the same data') yHat = model.predict(train) # We already select required columns plt.figure() plt.plot(yHat, color='black', lw=2, label='predicted') plt.plot(train.y, '.', mec='None', mfc='orange', label='original') plt.legend(loc='lower right') return
def getScore(modelToUse, columns): print('Starting a new calculation for score') rewards = [] env = kagglegym.make() observation = env.reset() print('fitting a model') model = fitModel(modelToUse, observation.train.copy(), columns) print('Starting to fit a model') while True: prediction = model.predict(observation.features.copy()) target = observation.target target['y'] = prediction timestamp = observation.features["timestamp"][0] if timestamp % 100 == 0: print(timestamp) observation, reward, done, info = env.step(target) rewards.append(reward) if done: break return info['public_score'], rewards
def main(): # Preprocess data, define and train model env = kagglegym.make() obs = env.reset() excl = ['id', 'sample', 'y', 'timestamp'] cols = [c for c in obs.train.columns if c not in excl] data = preprocess_data(obs, cols) model = train_model(data[0], data[1]) logs = predict_targets(env, obs, model, data[2], cols) return logs
def getScore(slope): rewards = [] print(slope) env = kagglegym.make() observation = env.reset() while True: target = observation.target timestamp = observation.features["timestamp"][0] target['y'] = slope observation, reward, done, info = env.step(target) rewards.append(reward) if done: break return info['public_score'], rewards
@author: aelsalla ''' #Simple two layer neural net minimizing the mean squared value. I am trying to switch to R2 loss later (see my attempt in the code) import kagglegym from keras.models import Sequential from keras.layers import Dense, Dropout, Activation, Flatten from keras.layers import Convolution1D, GlobalMaxPooling1D, Embedding from keras.utils import np_utils from keras import backend as K import numpy as np # Create environment env = kagglegym.make() # Get first observation observation = env.reset() # Data mean_vals = observation.train.mean() traindf = observation.train.drop(axis=1, labels=["id", "timestamp"]).fillna(mean_vals) Y_train = traindf["y"] X_train = traindf.drop(axis=1, labels=["y"]) # Model input_shape=108 #input_shape=X_train.shape
import kagglegym import numpy as np import pandas as pd import random from sklearn import ensemble, linear_model, metrics env = kagglegym.make() o = env.reset() train = o.train print(train.shape) d_mean= train.median(axis=0) train["nbnulls"]=train.isnull().sum(axis=1) col=[x for x in train.columns if x not in ['id', 'timestamp', 'y']] rnd=17 #keeping na information on some columns (best selected by the tree algorithms) add_nas_ft=True nas_cols=['technical_9', 'technical_0', 'technical_32', 'technical_16', 'technical_38', 'technical_44', 'technical_20', 'technical_30', 'technical_13'] #columns kept for evolution from one month to another (best selected by the tree algorithms) add_diff_ft=True diff_cols=['technical_22','technical_20', 'technical_30', 'technical_13', 'technical_34'] #homemade class used to infer randomly on the way the model learns class createLinearFeatures: def __init__(self, n_neighbours=1, max_elts=None, verbose=True, random_state=None): self.rnd=random_state self.n=n_neighbours self.max_elts=max_elts