def get_current_buylist(acct_val=20000, risk_factor=0.0012): stocks = dlq.load_stocks() # using SP600 # first check if index is bullish; if price is above 200 SMA is_bullish = check_index_bullish(stocks) # TODO: plot the indexes with their SMAs rank_df = pd.DataFrame() # if bullish, we should buy stocks from ranked list if is_bullish: # get current index constituents barchart_const = cu.load_sp600_files() tickers = [t.replace('.', '_') for t in barchart_const.index ] # quandl data has underscores instead of periods # get volatility-weighted exponential fit to data to rank stocks for t in tqdm(tickers): one_df = calc_latest_metrics(stocks[t], t) rank_df = rank_df.append(one_df) filtered_df = rank_df[(rank_df['bullish'] == True) & (rank_df['gap'] == False)].sort_values( by='rank_score', ascending=False) filtered_df = get_cost_shares_etc(filtered_df, acct_val=acct_val, risk_factor=risk_factor) to_buy = filtered_df[filtered_df['cumulative_cost'] <= (acct_val - 100)] # save $100 for commissions money_left = acct_val - to_buy['cost'].sum() next_stock = filtered_df[filtered_df['cumulative_cost'] > acct_val].iloc[0] next_stock['rounded_shares'] = money_left // next_stock['Adj_Close'] next_stock['cost'] = next_stock['rounded_shares'] * next_stock['Adj_Close'] to_buy = to_buy.append(next_stock.to_frame().T) to_buy['cumulative_cost'] = to_buy['cost'].cumsum() # save for later reference # today_ny = datetime.datetime.now(pytz.timezone('America/New_York')).strftime('%m-%d-%Y') last_date = stocks[t].index[-1].strftime('%m-%d-%Y') to_buy.to_csv('to_buy_' + last_date + '.csv') rank_df.to_csv('rank_df_' + last_date + '.csv') filtered_df.to_csv('filtered_df_' + last_date + '.csv') print(to_buy) return to_buy
# get average values of candles in clusters sp500_norm['Cluster'] = labels sp500_norm.groupby('Cluster').mean() # try KNN from sklearn.neighbors import KNeighborsRegressor as KNN knn = KNN(k=5) # default k import sys sys.path.append('../stock_prediction/code') import dl_quandl_EOD as dlq dfs = dlq.load_stocks() def get_open_normalised_prices_features_targets(dfs, symbol, start, end): """ Obtains a pandas DataFrame containing open normalised prices for high, low and close for a particular equities symbol from Yahoo Finance. That is, it creates High/Open, Low/Open and Close/Open columns. """ df = dfs[symbol] df['1d_pct_chg'] = df['Adj_Close'].pct_change() df["H/O"] = df["Adj_High"] / df["Adj_Open"] df["L/O"] = df["Adj_Low"] / df["Adj_Open"] df["C/O"] = df["Adj_Close"] / df["Adj_Open"] df.drop(["Open", "High", "Low", "Close", "Volume", "Adj Close"],
%matplotlib inline from plotly.offline import init_notebook_mode, iplot from plotly.graph_objs import Scatter, Scattergl, Figure, Layout def calc_vol(st, mean_vol): """ takes dataframe of stock data (st) and calculates tp, 50d-mva, and volatility also takes dictionary (mean_vol) as arg """ st['typical_price'] = st[['Adj_High', 'Adj_Low', 'Adj_Close']].mean(axis=1) st['50d_mva'] = talib.SMA(st['typical_price'].values, timeperiod=50) st['volatility'] = ((st['typical_price'] - -st['50d_mva'])/st['typical_price']).rolling(50).std() mean_vol[t] = np.mean(st['volatility']) stocks = dq.load_stocks() tickers = sorted(stocks.keys()) # get stocks that are still trading and have larger volumes vols = [] latest_tickers = [] for t in tickers: if latest_date in stocks[t].index: vol = np.mean(stocks[t].iloc[-100:]['Adj_Volume'] * stocks[t].iloc[-100:]['Adj_Close']) if vol > 1e8: # 10 million or greater per day vols.append(vol) latest_tickers.append(t) # need to multithread...
def load_stocks(stocks=None, TAs=True, finra_shorts=True, short_interest=True, verbose=False, debug=False, earliest_date='20150101', TAfunc='create_tas', calc_scores=True): """ :param stocks: list of strings; tickers (must be caps), if None, will use all stocks possible :param TAs: boolean, if true, calculates technical indicators :param shorts: boolean, if true, adds all short data :param verbose: boolean, prints more debug if true :param earliest_date: if using an abbreviated EOD .h5 file (for quicker loading), provide earliest date :param TAfunc: string, function name for TA creation in calculate_ta_signals.py :param calc_scores: boolean, if true will calculate custom scoring metric :returns: dict of pandas dataframes with tickers as keys, dict of dataframes merged with short interest data (sh_int), dict of dataframes merged with finra data (fin_sh) """ print('loading stocks...') all_stocks_dfs = dlq.load_stocks(verbose=verbose, earliest_date=earliest_date) dfs = {} existing_stocks = set(all_stocks_dfs.keys()) if stocks is None: stocks = existing_stocks for s in stocks: if s in existing_stocks: dfs[s] = all_stocks_dfs[s] else: if verbose: print('stock', s, 'not in quandl data!') ret_stocks = sorted(dfs.keys()) # sometimes some stocks are not in there jobs = [] if TAs: print('calculating TAs...') with ProcessPoolExecutor(max_workers=None) as executor: for s in ret_stocks: r = executor.submit(getattr(cts, TAfunc), dfs[s], verbose=verbose, return_df=True) jobs.append((s, r)) for s, r in jobs: res = r.result() if res is not None: dfs[s] = res else: print('result is None for', s) del jobs gc.collect() sh_int = {} fin_sh = {} # not sure if processpool helping here at all...maybe even want to do # thread pool, or just loop it if finra_shorts: for s in ret_stocks: dfs[s].reset_index(inplace=True) print('getting finra shorts and merging...') finra_sh_df = sfs.load_all_data() finra_sh_df.rename(columns={'Symbol': 'Ticker'}, inplace=True) fn_stocks = set(finra_sh_df['Ticker'].unique()) fn_grp = finra_sh_df.groupby(['Ticker', 'Date']).sum() jobs = [] with ProcessPoolExecutor() as executor: for s in ret_stocks: if s in fn_stocks: r = executor.submit(make_fn_df, s, dfs[s], fn_grp.loc[s]) jobs.append((s, r)) for s, r in jobs: res = r.result() if res is not None: fin_sh[s] = res else: print('result is None for', s) del jobs gc.collect() if short_interest: print('getting short interest and merging...') if 'Date' not in dfs[ret_stocks[0]].columns: for s in ret_stocks: dfs[s].reset_index(inplace=True) ss_sh = sse.get_short_interest_data(all_cols=True) ss_sh.rename(columns={'Symbol': 'Ticker'}, inplace=True) ss_sh_grp = ss_sh.groupby('Ticker') sh_stocks = set(ss_sh['Ticker'].unique()) if debug: for s in ret_stocks: if s in sh_stocks: sh_int[s] = make_sh_df(s, dfs[s], ss_sh_grp.get_group(s)) else: jobs = [] with ProcessPoolExecutor() as executor: for s in ret_stocks: if s in sh_stocks: r = executor.submit(make_sh_df, s, dfs[s], ss_sh_grp.get_group(s), verbose, calc_scores) jobs.append((s, r)) for s, r in jobs: res = r.result() if res is not None: sh_int[s] = res else: print('result is None for', s) del jobs gc.collect() return dfs, sh_int, fin_sh
def portfolio_rebalance(position_check=True, acct_val=20000, risk_factor=0.001): """ to be done once a week If stock is below 100 day MA, if had a 15% or more gap, left index, if no longer in 20% of rankings, sell it also does position resizing if position_check is True """ stocks = dlq.load_stocks() # get current holdings from IB or stored list # use latest stored csv for now holdings_df = get_latest_holding_file() # get index constituents barchart_const = cu.load_sp600_files() tickers = set([t.replace('.', '_') for t in barchart_const.index ]) # quandl data has underscores instead of periods full_df = pd.DataFrame() for t in holdings_df.index: one_df = calc_latest_metrics(stocks[t], t, gap_threshold=0.15) full_df = full_df.append(one_df) full_df = full_df.sort_values(by='rank_score', ascending=False) top_20_pct = set(full_df.index[:120]) kickout = full_df[ (full_df['bullish'] == False) | (full_df['gap'] == True) | ([f not in top_20_pct.union(tickers) for f in full_df.index.tolist()])] if kickout.shape[0] > 0: print('liquidate:') print(kickout) print('\n') # calculate money available for new purchases # TODO: get available cash from IB prices = pd.Series([stocks[t]['Adj_Close'][-1] for t in kickout.index], index=kickout.index) money_available = sum( holdings_df.loc[kickout.index]['rounded_shares'] * prices) rebal_money_available = 0 if position_check: # TODO: don't rebalance (sell) things if going up steadily -- need to quantify # full_df = get_cost_shares_etc(full_df, acct_val=acct_val, risk_factor=risk_factor).copy() full_df['current_shares'] = holdings_df.loc[ full_df.index]['rounded_shares'] full_df['pct_diff_shares'] = ( full_df['rounded_shares'] - full_df['current_shares']) / full_df['current_shares'] end_cols = ['current_shares', 'rounded_shares', 'pct_diff_shares'] full_df = full_df[[c for c in full_df.columns if c not in end_cols] + end_cols] full_df['cost_diff'] = full_df['Adj_Close'] * ( full_df['current_shares'] - full_df['rounded_shares']) to_rebalance = full_df.loc[full_df['pct_diff_shares'].abs( ) >= 0.1].copy( ) # book suggested 5% as threshold for resizing, use 10% for less transaction cost # also ignore any kickout stocks to_rebalance = to_rebalance.loc[[ i for i in to_rebalance.index if i not in kickout.index ]].copy() if to_rebalance.shape[0] > 0: # first get rebalance sells, and find out how much available -- add to available from kickout neg_rebal = to_rebalance.loc[ to_rebalance['pct_diff_shares'] < 0].copy( ) # copy is important to use here to avoid settingwithcopy warning neg_rebal.loc[:, 'sell_shares'] = neg_rebal[ 'current_shares'] - neg_rebal['rounded_shares'] print('share shares:') print(neg_rebal['sell_shares']) neg_rebal_prices = pd.Series( [stocks[t]['Adj_Close'][-1] for t in neg_rebal.index], index=neg_rebal.index) rebal_money_available = sum( (neg_rebal['current_shares'] - neg_rebal['rounded_shares']) * neg_rebal_prices) money_available += rebal_money_available # TODO: check if market bullish -- if not, can't add any more shares or buy new # add to current holdings until no more money available pos_rebal = to_rebalance.loc[ to_rebalance['pct_diff_shares'] > 0].copy() pos_rebal_prices = pd.Series( [stocks[t]['Adj_Close'][-1] for t in pos_rebal.index], index=pos_rebal.index) pos_rebal.sort_values(by='rank_score', inplace=True, ascending=False) pos_rebal.loc[:, 'add_shares'] = pos_rebal[ 'rounded_shares'] - pos_rebal['current_shares'] pos_rebal.loc[:, 'cumulative_cost'] = pos_rebal[ 'add_shares'] * pos_rebal_prices if pos_rebal['cumulative_cost'].sum() < money_available: print('add to holdings: ') print(pos_rebal[['add_shares']]) # TODO: get next best stocks to add to portfolio else: can_buy = pos_rebal[ pos_rebal['cumulative_cost'] <= money_available] print('add all possible rebalances to holdings:') print(can_buy)