def demo(): # SNIPPET 16.1 TREE CLUSTERING USING SCIPY FUNCTIONALITY close = pd.DataFrame() for ind_t in ['AAL', 'MSFT', 'CSCO', 'AAPL']: if close.empty: close = get_tick(ind_t).to_frame() close.columns = [ind_t] else: t_close = get_tick(ind_t).to_frame() t_close.columns = [ind_t] close = pd.merge(close,t_close, how='inner', left_index=True, right_index=True) cov, corr = close.cov(), close.corr() print(corr) dist = ((1 - corr) / 2.) ** .5 # distance matrix print(dist) # linkage matrix N-1 x 4 matrix # y1, y2 report the constituents, y3 reports th distance between y1 and y2, # y4 is number of items in the cluster link = sch.linkage(dist, 'single') # linkage matrix print(link) quasi_diag = getQuasiDiag(link) pdb.set_trace() print(quasi_diag) rec_bisec = getRecBipart(cov, quasi_diag) print(rec_bisec)
def demo3(): close = get_tick('AAL') frac_df = frac_diff_FFD(close.to_frame(), 0.5) vol = get_daily_vol(close) events = cusum_filter(close, 2 * vol) t1 = get_t1(close, events, num_days=5) sampled = get_3barriers(close, events, ptsl=2, trgt=vol, min_ret=0, num_threads=12, t1=t1, side=None) data = sampled.dropna() print(data) features_df = frac_df.loc[data.index].dropna() features = features_df.values # get the labels of these events label = data['t1_type'].loc[features_df.index].values clf = RandomForestClassifier() # learn on these features and labels clf.fit(features, label) # predict the features (on the same data so overfitting could be an issue) print(clf.predict(features))
def demo(): close = get_tick('AAL') # adjust weights to deal with nonstationarity # plot_weights([0, 1], 11, size=6) # plot_weights([1, 2], 11, size=6) print(adfuller(close, 12)) outputs = [] ds = np.linspace(0, 1, 11) for d in ds: df1 = np.log(close).resample('1D').last().to_frame() df2 = frac_diff(df1, d, thres=.1) df2 = adfuller(close, maxlag=1, regression='c', autolag=None) # Pvalue outputs.append(df2[1]) plt.plot(ds, outputs) plt.savefig(PNG_PATH + "frac_diff.png") plt.close() outputs = [] ds = np.linspace(0, 1, 11) for d in ds: df1 = np.log(close).resample('1D').last().to_frame() df2 = frac_diff_FFD(df1, d, thres=.1) df2 = adfuller(close, maxlag=1, regression='c', autolag=None) # Pvalue outputs.append(df2[1]) plt.plot(ds, outputs) plt.savefig(PNG_PATH + "frac_diff_FFD.png") plt.close()
def demo3(): close = get_tick('AAL') vol = get_daily_vol(close) sampled_idx = cusum_filter(close, vol) t1 = get_t1(close, sampled_idx, num_days=7) side = macd_side(close) events = get_3barriers(close, t_events=sampled_idx, trgt=vol, ptsl=[1, 2], t1=t1, side=side) events = events.dropna() bins = get_bins(events, close) clf = RandomForestClassifier() x = np.hstack([ events['side'].values[:, np.newaxis], close.loc[events.index].values[:, np.newaxis] ]) # action and px # if return was positive, bins = 1 y = bins['bin'].values # supervised answer clf.fit(x, y) predicted_probs = np.array([x[1] for x in clf.predict_proba(x)]) # get_signal(events.drop(columns=['side']), 0.2, predicted_probs, events['side'], 2, 1) get_signal(events.drop(columns=['side']), 0.2, predicted_probs, events['side'], 2, 12)
def demo_44(): close = get_tick('AAL') vol = get_daily_vol(close) sampled_idx = cusum_filter(close, vol) t1 = get_t1(close, sampled_idx, num_days=1) trgt = vol events = get_3barriers(close, t_events=sampled_idx, trgt=trgt, ptsl=1, t1=t1) print(events.head()) num_threads = 24 num_co_events = mp_pandas_obj(get_num_co_events, ('molecule', events.index), num_threads, close_idx=close.index, t1=events['t1']) num_co_events = num_co_events.loc[~num_co_events.index.duplicated( keep='last')] num_co_events = num_co_events.reindex(close.index).fillna(0) num_threads = 24 tw = mp_pandas_obj(get_sample_tw, ('molecule', events.index), num_threads, t1=events['t1'], num_co_events=num_co_events) exp_decay = get_time_decay(tw, last_w=.1, is_exp=True) print(exp_decay.head())
def macd_demo(): close = get_tick('AAL') vol = get_daily_vol(close) sampled_idx = cusum_filter(close, vol) t1 = get_t1(close, sampled_idx, num_days=7) side = macd_side(close) events = get_3barriers(close, t_events=sampled_idx, trgt=vol, ptsl=[1, 2], t1=t1, side=side) events = events.dropna() # print(events.head()) bins = get_bins(events, close) # print(bins.head()) clf = RandomForestClassifier() x = np.hstack([ events['side'].values[:, np.newaxis], close.loc[events.index].values[:, np.newaxis] ]) # action and px y = bins['bin'].values # supervised answer clf.fit(x, y) pred = clf.predict(x) # As dictated by MACD indicator # print(events['side'].values) # print(help(talib.MACD)) macd, signal, hist = talib.MACD(close.values) print(np.max(macd[100:] - signal[100:] - hist[100:])) print(macd[np.isfinite(macd)].shape) signal = signal[np.isfinite(signal)] print(2 * ((signal > 0).astype(float) - 0.5)) macd.fill(1) print(macd)
def demo(): close = get_tick('AAL') # Daily Volatility vol = get_daily_vol(close) # print(vol.head()) # cusum filter # cusum = cusum_filter(close, 0.1) sampled_idx = cusum_filter(close, vol) # print(sampled_idx) # get vertical barrier t1 = get_t1(close, sampled_idx, num_days=7) # print(t1.head()) # gets the events and which barrier was hit # ptsl = 1 for long ptsl = -1 for short events = get_3barriers(close, t_events=sampled_idx, trgt=vol, ptsl=1, t1=t1) print(events.head()) # print(events['t1_type'].unique()) print(events['t1_type'].describe()) # returns 2 columsn, bin (profit or loss or timout) and return bins = get_bins(events, close) # print(bins) # print(bins['bin'].value_counts()) dropped_bins = drop_labels(bins) # print(bins.shape) print(dropped_bins.head())
def demo2(): # SNIPPET 14.2 IMPLEMENTATION OF A HOLDING PERIOD ESTIMATOR # tPos = pd.Series([4, 3, -2, 1, 0, 6, 8, 0, -5, 7]) # getHoldingPeriod(tPos) # SNIPPET 14.3 ALGORITHM FOR DERIVING HHI CONCENTRATION close = get_tick('AAL') ret = close.diff().values ret = ret[~np.isnan(ret)] pdb.set_trace() rHHIPos = getHHI( ret[ret >= 0]) # concentration of positive returns per bet print(rHHIPos) rHHINeg = getHHI(ret[ret < 0]) # concentration of negative returns per bet print(rHHINeg) tHHI = getHHI(close.groupby( pd.Grouper(freq='M')).count()) # concentr. bets/month print(tHHI)
def demo_42(): close = get_tick('AAL') vol = get_daily_vol(close) sampled_idx = cusum_filter(close, vol) t1 = get_t1(close, sampled_idx, num_days=5) trgt = vol events = get_3barriers(close, t_events=sampled_idx, trgt=trgt, ptsl=1, t1=t1) print(events.head()) ind_m = get_ind_matrix(close.index, events['t1']) avg_uniq = get_avg_uniq(ind_m) print(avg_uniq.head()) phi = seq_bootstrap(ind_m) print(phi)
def demo(): close = get_tick('AAL') vol = get_daily_vol(close) sampled_idx = cusum_filter(close, vol) t1 = get_t1(close, sampled_idx, num_days=5) trgt = vol events = get_3barriers(close, t_events=sampled_idx, trgt=trgt, ptsl=1, t1=t1) print(events.head()) num_threads = 1 num_co_events = mp_pandas_obj(get_num_co_events, ('molecule', events.index), num_threads, close_idx=close.index, t1=events['t1']) fig, ax1 = plt.subplots(figsize=(16, 8)) ax1.set_xlabel('time (s)') ax1.set_ylabel('num_co_events', color='red') ax1.plot(num_co_events, color='red') ax1.tick_params(axis='y', labelcolor='red') ax2 = ax1.twinx() # instantiate a second axes that shares the same x-axis ax2.set_ylabel('volatility', color='blue') # we already handled the x-label with ax1 ax2.plot(vol, color='blue') ax2.tick_params(axis='y', labelcolor='blue') fig.tight_layout() # otherwise the right y-label is slightly clipped plt.savefig(PNG_PATH + "num_co_events.png") plt.close() fig, ax1 = plt.subplots(figsize=(16, 8)) ax1.set_xlabel('time') ax1.set_ylabel('num_co_events', color='red') ax1.scatter(num_co_events.index, num_co_events.values, color='red') ax2 = ax1.twinx() ret = close.pct_change().dropna() ax2.set_ylabel('return', color='blue') ax2.scatter(ret.index, ret.values, color='blue') plt.savefig(PNG_PATH + "num_co_events_scatter.png") plt.close()
def demo3(): close = get_tick('FB') computeDD_TuW(close)
def demo(): close = get_tick('AAL').to_frame() close['pct_change'] = close['px'].pct_change() close['log_ret'] = np.log(close['px']) - np.log(close['px'].shift(1)) print(get_bsadf(close['log_ret'].to_frame(), 50, 'ct', 10))
def demo2(): close = get_tick('AAL') x = np.random.randn(close.shape[0]) dummy = pd.DataFrame({'Close': x.cumsum()}, index=close.index) dummy.plot() plt.savefig(PNG_PATH + "dummy.png") plt.close() frac_df = frac_diff_FFD(dummy, .5) print(frac_df.head()) frac_df = frac_diff_FFD(frac_diff_FFD(dummy, 1), -1) print(frac_df.head()) corrs = [] ds = [] # want to keep correlation high, if d is too high correlation goes away and no predictive power for d in np.linspace(0, 2, 11): frac_df = frac_diff_FFD(dummy, d) close_frac = frac_df["Close"] corr = close_frac.corr(dummy["Close"]) print(d, corr) if np.isfinite(corr): corrs.append(corr) ds.append(d) plt.plot(ds, corrs) # correlation decreases as parameter 'd' is increased plt.savefig(PNG_PATH + "frac_df_dummy_corr.png") plt.close() ps = [] ds = [] # Also want to get stationarity without removing correlation, value of d around 0.6 is usually a good compromise # p value close to 0 around 0.6 with correlation still present for d in np.linspace(0, 2, 11): frac_df = frac_diff_FFD(dummy, d) close_frac = frac_df["Close"] close_ = dummy["Close"].loc[close_frac.index] if len(close_) > 0: # Coefficient of the first argument will change res = statsmodels.tsa.stattools.coint(close_frac, close_) ps.append(res[1]) ds.append(d) plt.plot(ds, ps) # correlation decreases as parameter 'd' is increased plt.savefig(PNG_PATH + "frac_df_dummy_p_v_d.png") plt.close() print(ps) ps = [] ds = [] for d in np.linspace(0, 2, 11): frac_df = frac_diff_FFD(dummy, d) close_frac = frac_df["Close"] if len(close_frac) > 0: # Coefficient of the first argument will change # goodness-of-fit test of whether sample data have the skewness and kurtosis matching a normal distribution res = stats.jarque_bera(close_frac) ps.append(res[1]) ds.append(d) plt.plot(ds, ps) plt.savefig(PNG_PATH + "frac_df_dummy_jarque_bera.png") plt.close() print(ps)