def get_models(): models = [ nx.logistic(), nx.extratrees(), nx.randomforest(), nx.mlpc(), nx.logisticPCA(), nx.example_predictions(), fifty() ] return models
def get_models(): models = [nx.linear(), nx.ridge_mean(), nx.extratrees(), nx.randomforest(), nx.mlpc(), nx.linearPCA(), nx.example_predictions(), nx.fifty()] return models
def check(self, data, verbose=True): """ Run Numerai upload checks. Parameters ---------- data : nx.Data Data object of Numerai dataset. verbose : bool By default, True, output is printed to stdout. Returns ------- check : dict A dictionary where the keys are the (name, tournament) pairs and the values are Pandas DataFrames that contain the results of the checks. """ # calc example predictions example_y = {} for tournament in self.tournaments(as_str=False): ep = nx.production(nx.example_predictions(), data, tournament=tournament, verbosity=0) ep = ep.loc[self.ids] example_y[tournament] = ep.y[:, 0] df_dict = {} columns = ['validation', 'test', 'live', 'all', 'pass'] data = data.loc[self.ids] regions = data.region pairs = list(self.pairs(as_str=False)) # check each model, tournament pair for pair in pairs: print('{}, {}'.format(pair[0], nx.tournament_str(pair[1]))) df = pd.DataFrame(columns=columns) idx = pairs.index(pair) y = self.y[:, idx] for region in ('validation', 'test', 'live', 'all'): yexi = example_y[pair[1]] if region == 'all': yi = y else: idx = regions == region yi = y[idx] yexi = yexi[idx] df.loc['corr', region] = pearsonr(yi, yexi)[0] df.loc['rcorr', region] = spearmanr(yi, yexi)[0] df.loc['min', region] = yi.min() df.loc['max', region] = yi.max() maz = np.abs((yi - yi.mean()) / yi.std()).max() df.loc['maz', region] = maz df.loc['corr', 'pass'] = (df.loc['corr'][:-1] >= 0.2).all() df.loc['rcorr', 'pass'] = (df.loc['rcorr'][:-1] >= 0.2).all() df.loc['min', 'pass'] = (df.loc['min'][:-1] >= 0.3).all() df.loc['max', 'pass'] = (df.loc['max'][:-1] <= 0.7).all() df.loc['maz', 'pass'] = (df.loc['maz'][:-1] <= 15).all() print(df) df_dict[pair] = df return df_dict