def demo_44():
    close = get_tick('AAL')
    vol = get_daily_vol(close)
    sampled_idx = cusum_filter(close, vol)
    t1 = get_t1(close, sampled_idx, num_days=1)
    trgt = vol
    events = get_3barriers(close,
                           t_events=sampled_idx,
                           trgt=trgt,
                           ptsl=1,
                           t1=t1)
    print(events.head())

    num_threads = 24
    num_co_events = mp_pandas_obj(get_num_co_events,
                                  ('molecule', events.index),
                                  num_threads,
                                  close_idx=close.index,
                                  t1=events['t1'])
    num_co_events = num_co_events.loc[~num_co_events.index.duplicated(
        keep='last')]
    num_co_events = num_co_events.reindex(close.index).fillna(0)
    num_threads = 24
    tw = mp_pandas_obj(get_sample_tw, ('molecule', events.index),
                       num_threads,
                       t1=events['t1'],
                       num_co_events=num_co_events)
    exp_decay = get_time_decay(tw, last_w=.1, is_exp=True)
    print(exp_decay.head())
def avg_active_signals(signals, num_threads):
    # Compute the average signal
    # 1) time points where singal changes
    t_pnts = set(signals['t1'].dropna().values)
    t_pnts = t_pnts.union(signals.index.values)
    t_pnts = list(t_pnts)
    t_pnts.sort()
    out = mp_pandas_obj(mp_avg_active_signals, ('molecule', t_pnts),
                        num_threads,
                        signals=signals)
    return out
def demo():
    close = get_tick('AAL')
    vol = get_daily_vol(close)
    sampled_idx = cusum_filter(close, vol)
    t1 = get_t1(close, sampled_idx, num_days=5)
    trgt = vol
    events = get_3barriers(close,
                           t_events=sampled_idx,
                           trgt=trgt,
                           ptsl=1,
                           t1=t1)
    print(events.head())

    num_threads = 1
    num_co_events = mp_pandas_obj(get_num_co_events,
                                  ('molecule', events.index),
                                  num_threads,
                                  close_idx=close.index,
                                  t1=events['t1'])

    fig, ax1 = plt.subplots(figsize=(16, 8))
    ax1.set_xlabel('time (s)')
    ax1.set_ylabel('num_co_events', color='red')
    ax1.plot(num_co_events, color='red')
    ax1.tick_params(axis='y', labelcolor='red')
    ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis
    ax2.set_ylabel('volatility',
                   color='blue')  # we already handled the x-label with ax1
    ax2.plot(vol, color='blue')
    ax2.tick_params(axis='y', labelcolor='blue')
    fig.tight_layout()  # otherwise the right y-label is slightly clipped
    plt.savefig(PNG_PATH + "num_co_events.png")
    plt.close()

    fig, ax1 = plt.subplots(figsize=(16, 8))
    ax1.set_xlabel('time')
    ax1.set_ylabel('num_co_events', color='red')
    ax1.scatter(num_co_events.index, num_co_events.values, color='red')
    ax2 = ax1.twinx()
    ret = close.pct_change().dropna()
    ax2.set_ylabel('return', color='blue')
    ax2.scatter(ret.index, ret.values, color='blue')
    plt.savefig(PNG_PATH + "num_co_events_scatter.png")
    plt.close()
예제 #4
0
def feat_importance(X,
                    cont,
                    clf=None,
                    n_estimators=1000,
                    n_splits=10,
                    max_samples=1.,
                    num_threads=24,
                    pct_embargo=0.,
                    scoring='accuracy',
                    method='SFI',
                    min_w_leaf=0.,
                    **kwargs):
    """
    takes in a classifier and type of importance score and runs through the cross validation score
    """
    n_jobs = (-1 if num_threads > 1 else 1)
    # Build classifiers
    if clf is None:
        base_clf = DecisionTreeClassifier(criterion='entropy',
                                          max_features=1,
                                          class_weight='balanced',
                                          min_weight_fraction_leaf=min_w_leaf)
        clf = BaggingClassifier(base_estimator=base_clf,
                                n_estimators=n_estimators,
                                max_features=1.,
                                max_samples=max_samples,
                                oob_score=True,
                                n_jobs=n_jobs)
    fit_clf = clf.fit(X, cont['bin'], sample_weight=cont['w'].values)
    if hasattr(fit_clf, 'oob_score_'):
        oob = fit_clf.oob_score_
    else:
        oob = None
    # cv score will use true out of sample training sets
    if method == 'MDI':
        imp = feat_imp_MDI(fit_clf, feat_names=X.columns)
        oos = cv_score(clf,
                       X=X,
                       y=cont['bin'],
                       n_splits=n_splits,
                       sample_weight=cont['w'],
                       t1=cont['t1'],
                       pct_embargo=pct_embargo,
                       scoring=scoring).mean()
    elif method == 'MDA':
        imp, oos = feat_imp_MDA(clf,
                                X=X,
                                y=cont['bin'],
                                n_splits=n_splits,
                                sample_weight=cont['w'],
                                t1=cont['t1'],
                                pct_embargo=pct_embargo,
                                scoring=scoring)
    elif method == 'SFI':
        cv_gen = PurgedKFold(n_splits=n_splits,
                             t1=cont['t1'],
                             pct_embargo=pct_embargo)
        oos = cv_score(clf,
                       X=X,
                       y=cont['bin'],
                       sample_weight=cont['w'],
                       scoring=scoring,
                       cv_gen=cv_gen)
        clf.n_jobs = 24
        imp = mp_pandas_obj(aux_feat_imp_SFI, ('feat_names', X.columns),
                            num_threads,
                            clf=clf,
                            X=X,
                            cont=cont,
                            scoring=scoring,
                            cv_gen=cv_gen)
    return imp, oob, oos