Exemplo n.º 1
0
 def transform(self, X, y=None):
     enc = lambda i:str(i) if self.ascategory else i
     r = X
     for c in date_cols(r):
         logger.debug('encoding date column {}'.format(c))
         r[c + '_year'] = r[c].apply(lambda ts:enc(ts.year))
         r[c + '_month'] = r[c].apply(lambda ts:enc(ts.month))
         r[c + '_week'] = r[c].apply(lambda ts:enc(ts.week))
         r[c + '_wom'] = r[c].apply(lambda ts:enc((ts.day-1) // 7 + 1))
         r[c + '_day'] = r[c].apply(lambda ts:enc(ts.day))
         r[c + '_dow'] = r[c].apply(lambda ts:enc(ts.dayofweek))
         r[c + '_hour'] = r[c].apply(lambda ts:enc(ts.hour))
         r[c] = r[c].apply(lambda ts:float(ts.toordinal() - self.t0.toordinal()))
     return r
Exemplo n.º 2
0
def _compute_model_correls(df, model='linear', columns=None, sparse=False):
    for c in date_cols(df):
        df[c] = _index_encode(df[c])
    res = pd.DataFrame(index=df.columns, columns=columns)
    for i in df.columns:   
        xi = pd.get_dummies(df.loc[:, i], sparse=sparse)
        for j in columns:
            if j == i:
                res.loc[i, j] = 1.
            else:                
                xj = df.loc[:, j]
                rf = _get_model(model, xj)
                rf.fit(xi, xj)
                res.loc[i, j] = _get_score(model, rf, xi, xj)
                del(rf)
    return res.astype(float)  # not sure why this is needed
Exemplo n.º 3
0
def _compute_pearson_correls(df, columns):
    for c in object_cols(df) + date_cols(df):
        df[c] = _index_encode(df[c])
    cor = np.corrcoef(df, rowvar=0)
    res = pd.DataFrame(cor, index=df.columns, columns=df.columns) 
    return res[columns]