示例#1
0
def checkModel(modelToUse, columns):
    '''
        This  function checks and makes sure that the 
        model provided is doing what it is supposed to
        do. This is a sanity check ...
    '''

    rewards = []
    env = kagglegym.make()
    observation = env.reset()

    train = observation.train

    # Just to make things easier to visualize
    # and also to speed things up ...
    # -----------------------------------------
    train = train[['timestamp', 'y'] + columns]
    train = train.groupby('timestamp').aggregate(np.mean)
    train.y = np.cumsum(train.y)  # easier to visualize

    print('fitting a model')
    model = fitModel(modelToUse, train, columns)

    print('predict the same data')
    yHat = model.predict(train)  # We already select required columns

    plt.figure()
    plt.plot(yHat, color='black', lw=2, label='predicted')
    plt.plot(train.y, '.', mec='None', mfc='orange', label='original')
    plt.legend(loc='lower right')

    return
示例#2
0
def getScore(modelToUse, columns):

    print('Starting a new calculation for score')
    rewards = []
    env = kagglegym.make()
    observation = env.reset()

    print('fitting a model')
    model = fitModel(modelToUse, observation.train.copy(), columns)

    print('Starting to fit a model')
    while True:

        prediction = model.predict(observation.features.copy())
        target = observation.target
        target['y'] = prediction

        timestamp = observation.features["timestamp"][0]
        if timestamp % 100 == 0:
            print(timestamp)

        observation, reward, done, info = env.step(target)
        rewards.append(reward)
        if done: break

    return info['public_score'], rewards
示例#3
0
def main():
    # Preprocess data, define and train model
    env = kagglegym.make()
    obs = env.reset()

    excl = ['id', 'sample', 'y', 'timestamp']
    cols = [c for c in obs.train.columns if c not in excl]

    data = preprocess_data(obs, cols)
    model = train_model(data[0], data[1])
    logs = predict_targets(env, obs, model, data[2], cols)

    return logs
示例#4
0
def getScore(slope):
    rewards = []
    print(slope)
    env = kagglegym.make()
    observation = env.reset()

    while True:
        target    = observation.target
        timestamp = observation.features["timestamp"][0]
        target['y'] = slope

        observation, reward, done, info = env.step(target)
        rewards.append(reward)
        if done: break
            
    return info['public_score'], rewards
示例#5
0
@author: aelsalla
'''

#Simple two layer neural net minimizing the mean squared value. I am trying to switch to R2 loss later (see my attempt in the code)
import kagglegym

from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution1D, GlobalMaxPooling1D, Embedding
from keras.utils import np_utils
from keras import backend as K

import numpy as np

# Create environment
env = kagglegym.make()

# Get first observation
observation = env.reset()

# Data
mean_vals = observation.train.mean()
traindf = observation.train.drop(axis=1, labels=["id", "timestamp"]).fillna(mean_vals)

Y_train = traindf["y"]
X_train = traindf.drop(axis=1, labels=["y"])


# Model
input_shape=108
#input_shape=X_train.shape
import kagglegym
import numpy as np
import pandas as pd
import random
from sklearn import ensemble, linear_model, metrics

env = kagglegym.make()
o = env.reset()
train = o.train
print(train.shape)
d_mean= train.median(axis=0)
train["nbnulls"]=train.isnull().sum(axis=1)
col=[x for x in train.columns if x not in ['id', 'timestamp', 'y']]

rnd=17

#keeping na information on some columns (best selected by the tree algorithms)
add_nas_ft=True
nas_cols=['technical_9', 'technical_0', 'technical_32', 'technical_16', 'technical_38', 
'technical_44', 'technical_20', 'technical_30', 'technical_13']
#columns kept for evolution from one month to another (best selected by the tree algorithms)
add_diff_ft=True
diff_cols=['technical_22','technical_20', 'technical_30', 'technical_13', 'technical_34']

#homemade class used to infer randomly on the way the model learns
class createLinearFeatures:
    
    def __init__(self, n_neighbours=1, max_elts=None, verbose=True, random_state=None):
        self.rnd=random_state
        self.n=n_neighbours
        self.max_elts=max_elts