示例#1
0
def demo():
    # SNIPPET 16.1 TREE CLUSTERING USING SCIPY FUNCTIONALITY
    close = pd.DataFrame()
    for ind_t in ['AAL', 'MSFT', 'CSCO', 'AAPL']:
        if close.empty:
            close = get_tick(ind_t).to_frame()
            close.columns = [ind_t]
        else:
            t_close = get_tick(ind_t).to_frame()
            t_close.columns = [ind_t]
            close = pd.merge(close,t_close, how='inner', left_index=True, right_index=True)
    cov, corr = close.cov(), close.corr()
    print(corr)
    dist = ((1 - corr) / 2.) ** .5  # distance matrix
    print(dist)
    # linkage matrix N-1 x 4 matrix
    # y1, y2 report the constituents, y3 reports th distance between y1 and y2, 
    # y4 is number of items in the cluster
    link = sch.linkage(dist, 'single')  # linkage matrix
    print(link)

    quasi_diag = getQuasiDiag(link)
    pdb.set_trace()
    print(quasi_diag)
    rec_bisec = getRecBipart(cov, quasi_diag)
    print(rec_bisec)
def demo3():
    close = get_tick('AAL')
    frac_df = frac_diff_FFD(close.to_frame(), 0.5)
    vol = get_daily_vol(close)
    events = cusum_filter(close, 2 * vol)
    t1 = get_t1(close, events, num_days=5)
    sampled = get_3barriers(close,
                            events,
                            ptsl=2,
                            trgt=vol,
                            min_ret=0,
                            num_threads=12,
                            t1=t1,
                            side=None)
    data = sampled.dropna()
    print(data)
    features_df = frac_df.loc[data.index].dropna()
    features = features_df.values
    # get the labels of these events
    label = data['t1_type'].loc[features_df.index].values
    clf = RandomForestClassifier()
    # learn on these features and labels
    clf.fit(features, label)
    # predict the features (on the same data so overfitting could be an issue)
    print(clf.predict(features))
def demo():
    close = get_tick('AAL')
    # adjust weights to deal with nonstationarity
    # plot_weights([0, 1], 11, size=6)
    # plot_weights([1, 2], 11, size=6)
    print(adfuller(close, 12))

    outputs = []
    ds = np.linspace(0, 1, 11)
    for d in ds:
        df1 = np.log(close).resample('1D').last().to_frame()
        df2 = frac_diff(df1, d, thres=.1)
        df2 = adfuller(close, maxlag=1, regression='c', autolag=None)
        # Pvalue
        outputs.append(df2[1])
    plt.plot(ds, outputs)
    plt.savefig(PNG_PATH + "frac_diff.png")
    plt.close()

    outputs = []
    ds = np.linspace(0, 1, 11)
    for d in ds:
        df1 = np.log(close).resample('1D').last().to_frame()
        df2 = frac_diff_FFD(df1, d, thres=.1)
        df2 = adfuller(close, maxlag=1, regression='c', autolag=None)
        # Pvalue
        outputs.append(df2[1])
    plt.plot(ds, outputs)
    plt.savefig(PNG_PATH + "frac_diff_FFD.png")
    plt.close()
def demo3():
    close = get_tick('AAL')
    vol = get_daily_vol(close)
    sampled_idx = cusum_filter(close, vol)
    t1 = get_t1(close, sampled_idx, num_days=7)
    side = macd_side(close)
    events = get_3barriers(close,
                           t_events=sampled_idx,
                           trgt=vol,
                           ptsl=[1, 2],
                           t1=t1,
                           side=side)
    events = events.dropna()
    bins = get_bins(events, close)

    clf = RandomForestClassifier()
    x = np.hstack([
        events['side'].values[:, np.newaxis],
        close.loc[events.index].values[:, np.newaxis]
    ])  # action and px
    # if return was positive, bins = 1
    y = bins['bin'].values  # supervised answer
    clf.fit(x, y)
    predicted_probs = np.array([x[1] for x in clf.predict_proba(x)])

    # get_signal(events.drop(columns=['side']), 0.2, predicted_probs, events['side'], 2, 1)
    get_signal(events.drop(columns=['side']), 0.2, predicted_probs,
               events['side'], 2, 12)
def demo_44():
    close = get_tick('AAL')
    vol = get_daily_vol(close)
    sampled_idx = cusum_filter(close, vol)
    t1 = get_t1(close, sampled_idx, num_days=1)
    trgt = vol
    events = get_3barriers(close,
                           t_events=sampled_idx,
                           trgt=trgt,
                           ptsl=1,
                           t1=t1)
    print(events.head())

    num_threads = 24
    num_co_events = mp_pandas_obj(get_num_co_events,
                                  ('molecule', events.index),
                                  num_threads,
                                  close_idx=close.index,
                                  t1=events['t1'])
    num_co_events = num_co_events.loc[~num_co_events.index.duplicated(
        keep='last')]
    num_co_events = num_co_events.reindex(close.index).fillna(0)
    num_threads = 24
    tw = mp_pandas_obj(get_sample_tw, ('molecule', events.index),
                       num_threads,
                       t1=events['t1'],
                       num_co_events=num_co_events)
    exp_decay = get_time_decay(tw, last_w=.1, is_exp=True)
    print(exp_decay.head())
示例#6
0
def macd_demo():
    close = get_tick('AAL')
    vol = get_daily_vol(close)
    sampled_idx = cusum_filter(close, vol)
    t1 = get_t1(close, sampled_idx, num_days=7)
    side = macd_side(close)
    events = get_3barriers(close,
                           t_events=sampled_idx,
                           trgt=vol,
                           ptsl=[1, 2],
                           t1=t1,
                           side=side)
    events = events.dropna()
    # print(events.head())
    bins = get_bins(events, close)
    # print(bins.head())

    clf = RandomForestClassifier()
    x = np.hstack([
        events['side'].values[:, np.newaxis],
        close.loc[events.index].values[:, np.newaxis]
    ])  # action and px
    y = bins['bin'].values  # supervised answer
    clf.fit(x, y)
    pred = clf.predict(x)
    # As dictated by MACD indicator
    # print(events['side'].values)
    # print(help(talib.MACD))
    macd, signal, hist = talib.MACD(close.values)
    print(np.max(macd[100:] - signal[100:] - hist[100:]))
    print(macd[np.isfinite(macd)].shape)
    signal = signal[np.isfinite(signal)]
    print(2 * ((signal > 0).astype(float) - 0.5))
    macd.fill(1)
    print(macd)
示例#7
0
def demo():
    close = get_tick('AAL')

    # Daily Volatility
    vol = get_daily_vol(close)
    # print(vol.head())

    # cusum filter
    # cusum = cusum_filter(close, 0.1)
    sampled_idx = cusum_filter(close, vol)
    # print(sampled_idx)

    # get vertical barrier
    t1 = get_t1(close, sampled_idx, num_days=7)
    # print(t1.head())

    # gets the events and which barrier was hit
    # ptsl = 1 for long ptsl = -1 for short
    events = get_3barriers(close,
                           t_events=sampled_idx,
                           trgt=vol,
                           ptsl=1,
                           t1=t1)
    print(events.head())
    # print(events['t1_type'].unique())
    print(events['t1_type'].describe())

    # returns 2 columsn, bin (profit or loss or timout) and return
    bins = get_bins(events, close)
    # print(bins)
    # print(bins['bin'].value_counts())

    dropped_bins = drop_labels(bins)
    # print(bins.shape)
    print(dropped_bins.head())
def demo2():
    # SNIPPET 14.2 IMPLEMENTATION OF A HOLDING PERIOD ESTIMATOR
    # tPos = pd.Series([4, 3, -2, 1, 0, 6, 8, 0, -5, 7])
    # getHoldingPeriod(tPos)

    # SNIPPET 14.3 ALGORITHM FOR DERIVING HHI CONCENTRATION
    close = get_tick('AAL')
    ret = close.diff().values
    ret = ret[~np.isnan(ret)]
    pdb.set_trace()
    rHHIPos = getHHI(
        ret[ret >= 0])  # concentration of positive returns per bet
    print(rHHIPos)
    rHHINeg = getHHI(ret[ret < 0])  # concentration of negative returns per bet
    print(rHHINeg)
    tHHI = getHHI(close.groupby(
        pd.Grouper(freq='M')).count())  # concentr. bets/month
    print(tHHI)
def demo_42():
    close = get_tick('AAL')
    vol = get_daily_vol(close)
    sampled_idx = cusum_filter(close, vol)
    t1 = get_t1(close, sampled_idx, num_days=5)
    trgt = vol
    events = get_3barriers(close,
                           t_events=sampled_idx,
                           trgt=trgt,
                           ptsl=1,
                           t1=t1)
    print(events.head())

    ind_m = get_ind_matrix(close.index, events['t1'])
    avg_uniq = get_avg_uniq(ind_m)
    print(avg_uniq.head())
    phi = seq_bootstrap(ind_m)
    print(phi)
def demo():
    close = get_tick('AAL')
    vol = get_daily_vol(close)
    sampled_idx = cusum_filter(close, vol)
    t1 = get_t1(close, sampled_idx, num_days=5)
    trgt = vol
    events = get_3barriers(close,
                           t_events=sampled_idx,
                           trgt=trgt,
                           ptsl=1,
                           t1=t1)
    print(events.head())

    num_threads = 1
    num_co_events = mp_pandas_obj(get_num_co_events,
                                  ('molecule', events.index),
                                  num_threads,
                                  close_idx=close.index,
                                  t1=events['t1'])

    fig, ax1 = plt.subplots(figsize=(16, 8))
    ax1.set_xlabel('time (s)')
    ax1.set_ylabel('num_co_events', color='red')
    ax1.plot(num_co_events, color='red')
    ax1.tick_params(axis='y', labelcolor='red')
    ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis
    ax2.set_ylabel('volatility',
                   color='blue')  # we already handled the x-label with ax1
    ax2.plot(vol, color='blue')
    ax2.tick_params(axis='y', labelcolor='blue')
    fig.tight_layout()  # otherwise the right y-label is slightly clipped
    plt.savefig(PNG_PATH + "num_co_events.png")
    plt.close()

    fig, ax1 = plt.subplots(figsize=(16, 8))
    ax1.set_xlabel('time')
    ax1.set_ylabel('num_co_events', color='red')
    ax1.scatter(num_co_events.index, num_co_events.values, color='red')
    ax2 = ax1.twinx()
    ret = close.pct_change().dropna()
    ax2.set_ylabel('return', color='blue')
    ax2.scatter(ret.index, ret.values, color='blue')
    plt.savefig(PNG_PATH + "num_co_events_scatter.png")
    plt.close()
def demo3():
    close = get_tick('FB')
    computeDD_TuW(close)
示例#12
0
def demo():
    close = get_tick('AAL').to_frame()
    close['pct_change'] = close['px'].pct_change()
    close['log_ret'] = np.log(close['px']) - np.log(close['px'].shift(1))
    print(get_bsadf(close['log_ret'].to_frame(), 50, 'ct', 10))
def demo2():
    close = get_tick('AAL')
    x = np.random.randn(close.shape[0])
    dummy = pd.DataFrame({'Close': x.cumsum()}, index=close.index)
    dummy.plot()
    plt.savefig(PNG_PATH + "dummy.png")
    plt.close()

    frac_df = frac_diff_FFD(dummy, .5)
    print(frac_df.head())

    frac_df = frac_diff_FFD(frac_diff_FFD(dummy, 1), -1)
    print(frac_df.head())

    corrs = []
    ds = []
    # want to keep correlation high, if d is too high correlation goes away and no predictive power
    for d in np.linspace(0, 2, 11):
        frac_df = frac_diff_FFD(dummy, d)
        close_frac = frac_df["Close"]
        corr = close_frac.corr(dummy["Close"])
        print(d, corr)
        if np.isfinite(corr):
            corrs.append(corr)
            ds.append(d)
    plt.plot(ds, corrs)
    # correlation decreases as parameter 'd' is increased
    plt.savefig(PNG_PATH + "frac_df_dummy_corr.png")
    plt.close()

    ps = []
    ds = []
    # Also want to get stationarity without removing correlation, value of d around 0.6 is usually a good compromise
    # p value close to 0 around 0.6 with correlation still present
    for d in np.linspace(0, 2, 11):
        frac_df = frac_diff_FFD(dummy, d)
        close_frac = frac_df["Close"]
        close_ = dummy["Close"].loc[close_frac.index]
        if len(close_) > 0:
            # Coefficient of the first argument will change
            res = statsmodels.tsa.stattools.coint(close_frac, close_)
            ps.append(res[1])
            ds.append(d)
    plt.plot(ds, ps)
    # correlation decreases as parameter 'd' is increased
    plt.savefig(PNG_PATH + "frac_df_dummy_p_v_d.png")
    plt.close()
    print(ps)

    ps = []
    ds = []
    for d in np.linspace(0, 2, 11):
        frac_df = frac_diff_FFD(dummy, d)
        close_frac = frac_df["Close"]
        if len(close_frac) > 0:
            # Coefficient of the first argument will change
            # goodness-of-fit test of whether sample data have the skewness and kurtosis matching a normal distribution
            res = stats.jarque_bera(close_frac)
            ps.append(res[1])
            ds.append(d)
    plt.plot(ds, ps)
    plt.savefig(PNG_PATH + "frac_df_dummy_jarque_bera.png")
    plt.close()
    print(ps)