def transform(self, X, y=None): enc = lambda i:str(i) if self.ascategory else i r = X for c in date_cols(r): logger.debug('encoding date column {}'.format(c)) r[c + '_year'] = r[c].apply(lambda ts:enc(ts.year)) r[c + '_month'] = r[c].apply(lambda ts:enc(ts.month)) r[c + '_week'] = r[c].apply(lambda ts:enc(ts.week)) r[c + '_wom'] = r[c].apply(lambda ts:enc((ts.day-1) // 7 + 1)) r[c + '_day'] = r[c].apply(lambda ts:enc(ts.day)) r[c + '_dow'] = r[c].apply(lambda ts:enc(ts.dayofweek)) r[c + '_hour'] = r[c].apply(lambda ts:enc(ts.hour)) r[c] = r[c].apply(lambda ts:float(ts.toordinal() - self.t0.toordinal())) return r
def _compute_model_correls(df, model='linear', columns=None, sparse=False): for c in date_cols(df): df[c] = _index_encode(df[c]) res = pd.DataFrame(index=df.columns, columns=columns) for i in df.columns: xi = pd.get_dummies(df.loc[:, i], sparse=sparse) for j in columns: if j == i: res.loc[i, j] = 1. else: xj = df.loc[:, j] rf = _get_model(model, xj) rf.fit(xi, xj) res.loc[i, j] = _get_score(model, rf, xi, xj) del(rf) return res.astype(float) # not sure why this is needed
def _compute_pearson_correls(df, columns): for c in object_cols(df) + date_cols(df): df[c] = _index_encode(df[c]) cor = np.corrcoef(df, rowvar=0) res = pd.DataFrame(cor, index=df.columns, columns=df.columns) return res[columns]