def get_current_buylist(acct_val=20000, risk_factor=0.0012):
    stocks = dlq.load_stocks()

    # using SP600
    # first check if index is bullish; if price is above 200 SMA
    is_bullish = check_index_bullish(stocks)

    # TODO: plot the indexes with their SMAs
    rank_df = pd.DataFrame()

    # if bullish, we should buy stocks from ranked list
    if is_bullish:
        # get current index constituents
        barchart_const = cu.load_sp600_files()
        tickers = [t.replace('.', '_') for t in barchart_const.index
                   ]  # quandl data has underscores instead of periods

        # get volatility-weighted exponential fit to data to rank stocks
        for t in tqdm(tickers):
            one_df = calc_latest_metrics(stocks[t], t)
            rank_df = rank_df.append(one_df)

    filtered_df = rank_df[(rank_df['bullish'] == True)
                          & (rank_df['gap'] == False)].sort_values(
                              by='rank_score', ascending=False)
    filtered_df = get_cost_shares_etc(filtered_df,
                                      acct_val=acct_val,
                                      risk_factor=risk_factor)

    to_buy = filtered_df[filtered_df['cumulative_cost'] <=
                         (acct_val - 100)]  # save $100 for commissions
    money_left = acct_val - to_buy['cost'].sum()
    next_stock = filtered_df[filtered_df['cumulative_cost'] > acct_val].iloc[0]
    next_stock['rounded_shares'] = money_left // next_stock['Adj_Close']
    next_stock['cost'] = next_stock['rounded_shares'] * next_stock['Adj_Close']
    to_buy = to_buy.append(next_stock.to_frame().T)
    to_buy['cumulative_cost'] = to_buy['cost'].cumsum()

    # save for later reference
    # today_ny = datetime.datetime.now(pytz.timezone('America/New_York')).strftime('%m-%d-%Y')
    last_date = stocks[t].index[-1].strftime('%m-%d-%Y')
    to_buy.to_csv('to_buy_' + last_date + '.csv')
    rank_df.to_csv('rank_df_' + last_date + '.csv')
    filtered_df.to_csv('filtered_df_' + last_date + '.csv')
    print(to_buy)

    return to_buy
# get average values of candles in clusters
sp500_norm['Cluster'] = labels
sp500_norm.groupby('Cluster').mean()

# try KNN
from sklearn.neighbors import KNeighborsRegressor as KNN

knn = KNN(k=5)  # default k

import sys

sys.path.append('../stock_prediction/code')

import dl_quandl_EOD as dlq

dfs = dlq.load_stocks()


def get_open_normalised_prices_features_targets(dfs, symbol, start, end):
    """
    Obtains a pandas DataFrame containing open normalised prices
    for high, low and close for a particular equities symbol
    from Yahoo Finance. That is, it creates High/Open, Low/Open
    and Close/Open columns.
    """
    df = dfs[symbol]
    df['1d_pct_chg'] = df['Adj_Close'].pct_change()
    df["H/O"] = df["Adj_High"] / df["Adj_Open"]
    df["L/O"] = df["Adj_Low"] / df["Adj_Open"]
    df["C/O"] = df["Adj_Close"] / df["Adj_Open"]
    df.drop(["Open", "High", "Low", "Close", "Volume", "Adj Close"],
示例#3
0
%matplotlib inline
from plotly.offline import init_notebook_mode, iplot
from plotly.graph_objs import Scatter, Scattergl, Figure, Layout

def calc_vol(st, mean_vol):
    """
    takes dataframe of stock data (st) and calculates tp, 50d-mva, and volatility
    also takes dictionary (mean_vol) as arg
    """
    st['typical_price'] = st[['Adj_High', 'Adj_Low', 'Adj_Close']].mean(axis=1)
    st['50d_mva'] = talib.SMA(st['typical_price'].values, timeperiod=50)
    st['volatility'] = ((st['typical_price'] - -st['50d_mva'])/st['typical_price']).rolling(50).std()
    mean_vol[t] = np.mean(st['volatility'])


stocks = dq.load_stocks()
tickers = sorted(stocks.keys())


# get stocks that are still trading and have larger volumes
vols = []
latest_tickers = []
for t in tickers:
    if latest_date in stocks[t].index:
        vol = np.mean(stocks[t].iloc[-100:]['Adj_Volume'] * stocks[t].iloc[-100:]['Adj_Close'])
        if vol > 1e8:  # 10 million or greater per day
            vols.append(vol)
            latest_tickers.append(t)


# need to multithread...
def load_stocks(stocks=None,
                TAs=True,
                finra_shorts=True,
                short_interest=True,
                verbose=False,
                debug=False,
                earliest_date='20150101',
                TAfunc='create_tas',
                calc_scores=True):
    """
    :param stocks: list of strings; tickers (must be caps), if None, will use all stocks possible
    :param TAs: boolean, if true, calculates technical indicators
    :param shorts: boolean, if true, adds all short data
    :param verbose: boolean, prints more debug if true
    :param earliest_date: if using an abbreviated EOD .h5 file (for quicker
                            loading), provide earliest date
    :param TAfunc: string, function name for TA creation in calculate_ta_signals.py
    :param calc_scores: boolean, if true will calculate custom scoring metric

    :returns: dict of pandas dataframes with tickers as keys,
                dict of dataframes merged with short interest data (sh_int),
                dict of dataframes merged with finra data (fin_sh)
    """
    print('loading stocks...')
    all_stocks_dfs = dlq.load_stocks(verbose=verbose,
                                     earliest_date=earliest_date)
    dfs = {}
    existing_stocks = set(all_stocks_dfs.keys())
    if stocks is None:
        stocks = existing_stocks

    for s in stocks:
        if s in existing_stocks:
            dfs[s] = all_stocks_dfs[s]
        else:
            if verbose:
                print('stock', s, 'not in quandl data!')

    ret_stocks = sorted(dfs.keys())  # sometimes some stocks are not in there

    jobs = []
    if TAs:
        print('calculating TAs...')
        with ProcessPoolExecutor(max_workers=None) as executor:
            for s in ret_stocks:
                r = executor.submit(getattr(cts, TAfunc),
                                    dfs[s],
                                    verbose=verbose,
                                    return_df=True)
                jobs.append((s, r))

        for s, r in jobs:
            res = r.result()
            if res is not None:
                dfs[s] = res
            else:
                print('result is None for', s)

        del jobs
        gc.collect()

    sh_int = {}
    fin_sh = {}
    # not sure if processpool helping here at all...maybe even want to do
    # thread pool, or just loop it
    if finra_shorts:
        for s in ret_stocks:
            dfs[s].reset_index(inplace=True)

        print('getting finra shorts and merging...')
        finra_sh_df = sfs.load_all_data()
        finra_sh_df.rename(columns={'Symbol': 'Ticker'}, inplace=True)
        fn_stocks = set(finra_sh_df['Ticker'].unique())
        fn_grp = finra_sh_df.groupby(['Ticker', 'Date']).sum()
        jobs = []
        with ProcessPoolExecutor() as executor:
            for s in ret_stocks:
                if s in fn_stocks:
                    r = executor.submit(make_fn_df, s, dfs[s], fn_grp.loc[s])
                    jobs.append((s, r))

        for s, r in jobs:
            res = r.result()
            if res is not None:
                fin_sh[s] = res
            else:
                print('result is None for', s)

        del jobs
        gc.collect()

    if short_interest:
        print('getting short interest and merging...')
        if 'Date' not in dfs[ret_stocks[0]].columns:
            for s in ret_stocks:
                dfs[s].reset_index(inplace=True)

        ss_sh = sse.get_short_interest_data(all_cols=True)
        ss_sh.rename(columns={'Symbol': 'Ticker'}, inplace=True)
        ss_sh_grp = ss_sh.groupby('Ticker')
        sh_stocks = set(ss_sh['Ticker'].unique())
        if debug:
            for s in ret_stocks:
                if s in sh_stocks:
                    sh_int[s] = make_sh_df(s, dfs[s], ss_sh_grp.get_group(s))
        else:
            jobs = []
            with ProcessPoolExecutor() as executor:
                for s in ret_stocks:
                    if s in sh_stocks:
                        r = executor.submit(make_sh_df, s, dfs[s],
                                            ss_sh_grp.get_group(s), verbose,
                                            calc_scores)
                        jobs.append((s, r))

            for s, r in jobs:
                res = r.result()
                if res is not None:
                    sh_int[s] = res
                else:
                    print('result is None for', s)

            del jobs
            gc.collect()

    return dfs, sh_int, fin_sh
def portfolio_rebalance(position_check=True,
                        acct_val=20000,
                        risk_factor=0.001):
    """
    to be done once a week

    If stock is below 100 day MA, if had a 15% or more gap, left index, if no longer in 20% of rankings, sell it

    also does position resizing if position_check is True
    """
    stocks = dlq.load_stocks()

    # get current holdings from IB or stored list
    # use latest stored csv for now
    holdings_df = get_latest_holding_file()

    # get index constituents
    barchart_const = cu.load_sp600_files()
    tickers = set([t.replace('.', '_') for t in barchart_const.index
                   ])  # quandl data has underscores instead of periods

    full_df = pd.DataFrame()
    for t in holdings_df.index:
        one_df = calc_latest_metrics(stocks[t], t, gap_threshold=0.15)
        full_df = full_df.append(one_df)

    full_df = full_df.sort_values(by='rank_score', ascending=False)
    top_20_pct = set(full_df.index[:120])

    kickout = full_df[
        (full_df['bullish'] == False) | (full_df['gap'] == True) |
        ([f not in top_20_pct.union(tickers) for f in full_df.index.tolist()])]
    if kickout.shape[0] > 0:
        print('liquidate:')
        print(kickout)
        print('\n')
        # calculate money available for new purchases
        # TODO: get available cash from IB
        prices = pd.Series([stocks[t]['Adj_Close'][-1] for t in kickout.index],
                           index=kickout.index)
        money_available = sum(
            holdings_df.loc[kickout.index]['rounded_shares'] * prices)

    rebal_money_available = 0
    if position_check:
        # TODO: don't rebalance (sell) things if going up steadily -- need to quantify
        #
        full_df = get_cost_shares_etc(full_df,
                                      acct_val=acct_val,
                                      risk_factor=risk_factor).copy()
        full_df['current_shares'] = holdings_df.loc[
            full_df.index]['rounded_shares']
        full_df['pct_diff_shares'] = (
            full_df['rounded_shares'] -
            full_df['current_shares']) / full_df['current_shares']
        end_cols = ['current_shares', 'rounded_shares', 'pct_diff_shares']
        full_df = full_df[[c for c in full_df.columns if c not in end_cols] +
                          end_cols]
        full_df['cost_diff'] = full_df['Adj_Close'] * (
            full_df['current_shares'] - full_df['rounded_shares'])
        to_rebalance = full_df.loc[full_df['pct_diff_shares'].abs(
        ) >= 0.1].copy(
        )  # book suggested 5% as threshold for resizing, use 10% for less transaction cost
        # also ignore any kickout stocks
        to_rebalance = to_rebalance.loc[[
            i for i in to_rebalance.index if i not in kickout.index
        ]].copy()
        if to_rebalance.shape[0] > 0:
            # first get rebalance sells, and find out how much available -- add to available from kickout
            neg_rebal = to_rebalance.loc[
                to_rebalance['pct_diff_shares'] < 0].copy(
                )  # copy is important to use here to avoid settingwithcopy warning
            neg_rebal.loc[:, 'sell_shares'] = neg_rebal[
                'current_shares'] - neg_rebal['rounded_shares']
            print('share shares:')
            print(neg_rebal['sell_shares'])
            neg_rebal_prices = pd.Series(
                [stocks[t]['Adj_Close'][-1] for t in neg_rebal.index],
                index=neg_rebal.index)
            rebal_money_available = sum(
                (neg_rebal['current_shares'] - neg_rebal['rounded_shares']) *
                neg_rebal_prices)
            money_available += rebal_money_available

            # TODO: check if market bullish -- if not, can't add any more shares or buy new
            # add to current holdings until no more money available
            pos_rebal = to_rebalance.loc[
                to_rebalance['pct_diff_shares'] > 0].copy()
            pos_rebal_prices = pd.Series(
                [stocks[t]['Adj_Close'][-1] for t in pos_rebal.index],
                index=pos_rebal.index)
            pos_rebal.sort_values(by='rank_score',
                                  inplace=True,
                                  ascending=False)
            pos_rebal.loc[:, 'add_shares'] = pos_rebal[
                'rounded_shares'] - pos_rebal['current_shares']
            pos_rebal.loc[:, 'cumulative_cost'] = pos_rebal[
                'add_shares'] * pos_rebal_prices
            if pos_rebal['cumulative_cost'].sum() < money_available:
                print('add to holdings: ')
                print(pos_rebal[['add_shares']])
                # TODO: get next best stocks to add to portfolio
            else:
                can_buy = pos_rebal[
                    pos_rebal['cumulative_cost'] <= money_available]
                print('add all possible rebalances to holdings:')
                print(can_buy)