def demo_44(): close = get_tick('AAL') vol = get_daily_vol(close) sampled_idx = cusum_filter(close, vol) t1 = get_t1(close, sampled_idx, num_days=1) trgt = vol events = get_3barriers(close, t_events=sampled_idx, trgt=trgt, ptsl=1, t1=t1) print(events.head()) num_threads = 24 num_co_events = mp_pandas_obj(get_num_co_events, ('molecule', events.index), num_threads, close_idx=close.index, t1=events['t1']) num_co_events = num_co_events.loc[~num_co_events.index.duplicated( keep='last')] num_co_events = num_co_events.reindex(close.index).fillna(0) num_threads = 24 tw = mp_pandas_obj(get_sample_tw, ('molecule', events.index), num_threads, t1=events['t1'], num_co_events=num_co_events) exp_decay = get_time_decay(tw, last_w=.1, is_exp=True) print(exp_decay.head())
def avg_active_signals(signals, num_threads): # Compute the average signal # 1) time points where singal changes t_pnts = set(signals['t1'].dropna().values) t_pnts = t_pnts.union(signals.index.values) t_pnts = list(t_pnts) t_pnts.sort() out = mp_pandas_obj(mp_avg_active_signals, ('molecule', t_pnts), num_threads, signals=signals) return out
def demo(): close = get_tick('AAL') vol = get_daily_vol(close) sampled_idx = cusum_filter(close, vol) t1 = get_t1(close, sampled_idx, num_days=5) trgt = vol events = get_3barriers(close, t_events=sampled_idx, trgt=trgt, ptsl=1, t1=t1) print(events.head()) num_threads = 1 num_co_events = mp_pandas_obj(get_num_co_events, ('molecule', events.index), num_threads, close_idx=close.index, t1=events['t1']) fig, ax1 = plt.subplots(figsize=(16, 8)) ax1.set_xlabel('time (s)') ax1.set_ylabel('num_co_events', color='red') ax1.plot(num_co_events, color='red') ax1.tick_params(axis='y', labelcolor='red') ax2 = ax1.twinx() # instantiate a second axes that shares the same x-axis ax2.set_ylabel('volatility', color='blue') # we already handled the x-label with ax1 ax2.plot(vol, color='blue') ax2.tick_params(axis='y', labelcolor='blue') fig.tight_layout() # otherwise the right y-label is slightly clipped plt.savefig(PNG_PATH + "num_co_events.png") plt.close() fig, ax1 = plt.subplots(figsize=(16, 8)) ax1.set_xlabel('time') ax1.set_ylabel('num_co_events', color='red') ax1.scatter(num_co_events.index, num_co_events.values, color='red') ax2 = ax1.twinx() ret = close.pct_change().dropna() ax2.set_ylabel('return', color='blue') ax2.scatter(ret.index, ret.values, color='blue') plt.savefig(PNG_PATH + "num_co_events_scatter.png") plt.close()
def feat_importance(X, cont, clf=None, n_estimators=1000, n_splits=10, max_samples=1., num_threads=24, pct_embargo=0., scoring='accuracy', method='SFI', min_w_leaf=0., **kwargs): """ takes in a classifier and type of importance score and runs through the cross validation score """ n_jobs = (-1 if num_threads > 1 else 1) # Build classifiers if clf is None: base_clf = DecisionTreeClassifier(criterion='entropy', max_features=1, class_weight='balanced', min_weight_fraction_leaf=min_w_leaf) clf = BaggingClassifier(base_estimator=base_clf, n_estimators=n_estimators, max_features=1., max_samples=max_samples, oob_score=True, n_jobs=n_jobs) fit_clf = clf.fit(X, cont['bin'], sample_weight=cont['w'].values) if hasattr(fit_clf, 'oob_score_'): oob = fit_clf.oob_score_ else: oob = None # cv score will use true out of sample training sets if method == 'MDI': imp = feat_imp_MDI(fit_clf, feat_names=X.columns) oos = cv_score(clf, X=X, y=cont['bin'], n_splits=n_splits, sample_weight=cont['w'], t1=cont['t1'], pct_embargo=pct_embargo, scoring=scoring).mean() elif method == 'MDA': imp, oos = feat_imp_MDA(clf, X=X, y=cont['bin'], n_splits=n_splits, sample_weight=cont['w'], t1=cont['t1'], pct_embargo=pct_embargo, scoring=scoring) elif method == 'SFI': cv_gen = PurgedKFold(n_splits=n_splits, t1=cont['t1'], pct_embargo=pct_embargo) oos = cv_score(clf, X=X, y=cont['bin'], sample_weight=cont['w'], scoring=scoring, cv_gen=cv_gen) clf.n_jobs = 24 imp = mp_pandas_obj(aux_feat_imp_SFI, ('feat_names', X.columns), num_threads, clf=clf, X=X, cont=cont, scoring=scoring, cv_gen=cv_gen) return imp, oob, oos