Пример #1
0
        def get_net_data(BEG, END):
            beg_idx, end_idx = env.get_data_idxs_range(BEG, END)

            raw_dates = env.get_raw_dates(beg_idx, end_idx)
            input = env.get_input(beg_idx, end_idx)
            px = env.get_adj_close_px(beg_idx, end_idx)
            px_pred_hor = env.get_adj_close_px(
                beg_idx + get_config().PRED_HORIZON,
                end_idx + get_config().PRED_HORIZON)
            tradeable_mask = env.get_tradeable_mask(beg_idx, end_idx)
            port_mask = env.get_portfolio_mask(beg_idx, end_idx)

            ds_sz = px_pred_hor.shape[1]

            raw_dates = raw_dates[:ds_sz]
            raw_week_days = np.full(raw_dates.shape, 0, dtype=np.int32)
            for i in range(raw_dates.shape[0]):
                date = date_from_timestamp(raw_dates[i])
                raw_week_days[i] = date.isoweekday()

            input = input[:, :ds_sz, :]
            tradeable_mask = tradeable_mask[:, :ds_sz]
            port_mask = port_mask[:, :ds_sz]
            px = px[:, :ds_sz]

            labels = (px_pred_hor - px) / px
            batch_num = get_batches_num(ds_sz, get_config().BPTT_STEPS)

            return beg_idx, ds_sz, batch_num, raw_dates, raw_week_days, tradeable_mask, port_mask, px, input, labels
Пример #2
0
 def get_portfolio_mask(self, BEG_DATA_IDX, END_DATA_IDX):
     if get_config().is_snp_index() or get_config().is_all_stocks_index(
     ) or get_config().is_universal_net():
         return self.tradable_mask[:, BEG_DATA_IDX:END_DATA_IDX +
                                   1] & self.snp_mask[:, BEG_DATA_IDX:
                                                      END_DATA_IDX + 1]
     else:
         return self.get_tradeable_mask(BEG_DATA_IDX, END_DATA_IDX)
Пример #3
0
def create_folders():
    if not os.path.exists(get_config().WEIGHTS_FOLDER_PATH):
        os.makedirs(get_config().WEIGHTS_FOLDER_PATH)
    if not os.path.exists(get_config().TRAIN_FIG_PATH):
        os.makedirs(get_config().TRAIN_FIG_PATH)
    if not os.path.exists(get_config().TEST_FIG_PATH):
        os.makedirs(get_config().TEST_FIG_PATH)
Пример #4
0
            def eval():
                nonlocal raw_dates, beg_data_idx, ds_size, batch_num, input, labels, px, mask, port_mask, eq, pred_hist, state
                curr_progress = 0
                cash = 1
                pos = np.zeros((total_tickers))
                pos_px = np.zeros((total_tickers))
                losses = np.zeros((batch_num))

                for b in range(batch_num):
                    if state is None:
                        state = net.zero_state(total_tickers)

                    b_b_i, b_e_i = get_batch_range(b)
                    _input = input[:, b_b_i:b_e_i, :]
                    _labels = labels[:, b_b_i:b_e_i]
                    _mask = mask[:, b_b_i:b_e_i]

                    state, loss, predictions = net.eval(
                        state, _input, _labels, _mask.astype(np.float32))
                    pred_hist[:, b_b_i:b_e_i] = predictions[:, :, 0]

                    for i in range(predictions.shape[1]):
                        data_idx = b * get_config().BPTT_STEPS + i
                        curr_px = px[:, data_idx]
                        global_data_idx = beg_data_idx + data_idx

                        date = datetime.datetime.fromtimestamp(
                            raw_dates[data_idx]).date()

                        open_pos = False
                        close_pos = False
                        if get_config().REBALANCE_FRI:
                            if date.isoweekday() == 5:
                                open_pos = True
                            if date.isoweekday() == 5:
                                close_pos = True
                        else:
                            if data_idx % get_config().REBALANCE_FREQ == 0:
                                close_pos = True
                                open_pos = True

                        if close_pos:
                            rpl = np.sum(pos * (curr_px - pos_px))
                            cash += rpl
                            pos[:] = 0
                        if open_pos:
                            pos_px = curr_px
                            pos_mask = port_mask[:, data_idx]
                            num_stks = np.sum(pos_mask)
                            if get_config().CAPM:
                                exp, cov = env.get_exp_and_cov(
                                    pos_mask, global_data_idx -
                                    get_config().COVARIANCE_LENGTH + 1,
                                    global_data_idx)
                                exp = get_config().REBALANCE_FREQ * exp
                                cov = get_config().REBALANCE_FREQ * get_config(
                                ).REBALANCE_FREQ * cov
                                if get_config().CAPM_USE_NET_PREDICTIONS:
                                    exp = predictions[:, i, 0][pos_mask]

                                capm = Capm(num_stks)
                                capm.init()

                                best_sharpe = None
                                best_weights = None
                                best_constriant = None
                                while i <= 10000:
                                    w, sharpe, constraint = capm.get_params(
                                        exp, cov)
                                    # print("Iteration: %d Sharpe: %.2f Constraint: %.6f" % (i, sharpe, constraint))
                                    if w is None:
                                        break
                                    if best_sharpe is None or sharpe >= best_sharpe:
                                        best_weights = w
                                        best_sharpe = sharpe
                                        best_constriant = constraint
                                    capm.fit(exp, cov)
                                    capm.rescale_weights()

                                    i += 1
                                date = datetime.datetime.fromtimestamp(
                                    raw_dates[data_idx]).date()
                                print(
                                    "Date: %s sharpe: %.2f constraint: %.6f" %
                                    (date.strftime('%Y-%m-%d'), best_sharpe,
                                     best_constriant))

                                pos[pos_mask] = best_weights / curr_px[pos_mask]
                            else:
                                pos[pos_mask] = 1 / num_stks / curr_px[
                                    pos_mask] * np.sign(predictions[pos_mask,
                                                                    i, 0])

                        urpl = np.sum(pos * (curr_px - pos_px))
                        nlv = cash + urpl

                        eq[data_idx] = nlv

                    losses[b] = loss
                    curr_progress = progress.print_progress(
                        curr_progress, b, tr_batch_num)

                progress.print_progess_end()
                avg_loss = np.mean(np.sqrt(losses))
                return avg_loss
Пример #5
0
 def get_batch_range(b):
     return b * get_config().BPTT_STEPS, (b +
                                          1) * get_config().BPTT_STEPS
Пример #6
0
import csv
import os.path
import pandas as pd
import datetime

from portfolio.net_shiva import NetShiva
from portfolio.multi_stock_config import get_config, Mode
from portfolio.stat import print_alloc, get_draw_down, get_sharpe_ratio, get_capital, get_avg_yeat_ret
from portfolio.graphs import plot_equity_curve, show_plots, create_time_serie_fig, plot_time_serie
import progress
import matplotlib.pyplot as plt
from portfolio.capm import Capm

from portfolio.multi_stock_env import Env, date_from_timestamp

if get_config().MODE == Mode.TRAIN:
    plt.ioff()


def create_folders():
    if not os.path.exists(get_config().WEIGHTS_FOLDER_PATH):
        os.makedirs(get_config().WEIGHTS_FOLDER_PATH)
    if not os.path.exists(get_config().TRAIN_FIG_PATH):
        os.makedirs(get_config().TRAIN_FIG_PATH)
    if not os.path.exists(get_config().TEST_FIG_PATH):
        os.makedirs(get_config().TEST_FIG_PATH)


def get_batches_num(ds_sz, bptt_steps):
    return ds_sz // bptt_steps + (0 if ds_sz % bptt_steps == 0 else 1)
Пример #7
0
def idx_to_date(idx):
    return get_config().TRAIN_BEG + datetime.timedelta(days=idx)
Пример #8
0
# stocks = get_snp_tickers()

# 2015-10-30
# stocks = ['HPQ']
# stocks = ['BAC']


def create_folders():
    if not os.path.exists(get_config().DATA_FOLDER_PATH):
        os.makedirs(get_config().DATA_FOLDER_PATH)


net = NetShiva()
for stock in stocks:
    print("Processing %s stock" % stock)
    get_config().TICKER = stock
    get_config().reload()

    _tickers = [stock]
    create_folders()
    download_data(_tickers,
                  get_config().DATA_PATH,
                  get_config().HIST_BEG,
                  get_config().HIST_END)
    preprocess_data(_tickers,
                    get_config().DATA_PATH,
                    get_config().HIST_BEG,
                    get_config().HIST_END,
                    get_config().DATA_NPZ_PATH,
                    get_config().DATA_FEATURES)
    train(net)
Пример #9
0
 def is_train():
     return get_config().MODE == Mode.TRAIN
Пример #10
0
        df['date'] = df['ts'].apply(date_from_timestamp)

        predications.append(df)
    except:
        pass

os.chdir(pwd)

pred_df = pd.concat(predications, ignore_index=True)
pred_df_length = len(pred_df['date'])

# add stock idx column
pred_df['stk_idx'] = pd.Series(np.zeros((pred_df_length), dtype=np.int32),
                               index=pred_df.index)

get_config().TICKER = 'UNIVERSAL_NET'
get_config().reload()
env = Env()
total_stocks = len(env.tickers)

# update stk_idx column for all records
for stock in stocks:
    stk_idx = env._ticker_to_idx(stock)
    pred_df.loc[pred_df['ticker'] == stock, 'stk_idx'] = stk_idx

beg_idx, end_idx = env.get_data_idxs_range(get_config().TRAIN_BEG,
                                           get_config().TRAIN_END)
trading_days = end_idx + 1 - beg_idx
adj_px = env.get_adj_close_px(beg_idx, end_idx)
tradable_maks = env.get_portfolio_mask(beg_idx, end_idx)
# tradable_maks = env.get_tradeable_mask(beg_idx, end_idx)
Пример #11
0
    'XOM',
    'XRAY',
    'XRX',
    'XYL',
    'YUM',
    'ZBH',
    'ZION',
    'ZTS',
    'Q',
    'BHF',
    'RMD',
    'PKG',
    'MGM',
]

get_config().PREDICTION_MODE = True


def create_folders():
    if not os.path.exists(get_config().DATA_FOLDER_PATH):
        os.makedirs(get_config().DATA_FOLDER_PATH)


ticker_exch_map = get_snp_tickers_exch_map()


def get_net_data(env, BEG, END):
    beg_idx, end_idx = env.get_data_idxs_range(BEG, END)

    raw_dates = env.get_raw_dates(beg_idx, end_idx)
    input = env.get_input(beg_idx, end_idx)
Пример #12
0
 def get_adj_close_px(self, BEG_DATA_IDX, END_DATA_IDX):
     return self.raw_data[:, BEG_DATA_IDX:END_DATA_IDX + 1,
                          get_config().ADJ_CLOSE_DATA_IDX]
Пример #13
0
    def __init__(self):
        print('loading data...')
        tickers, raw_dt, raw_data = load_npz_data(get_config().DATA_NPZ_PATH)
        # tickers, raw_dt, raw_data = load_npz_data('data/stocks/DIS/DIS.npz')
        print('data load complete')

        self._tickers = tickers
        self.stks = self.tickers.shape[0]

        days = raw_dt.shape[0]

        def _idx_to_date(idx):
            return datetime.datetime.fromtimestamp(raw_dt[idx]).date()

        # calc data dates range
        HIST_BEG = _idx_to_date(0)
        HIST_END = _idx_to_date(-1)

        def _date_to_idx(date):
            if HIST_BEG <= date <= HIST_END:
                return (date - HIST_BEG).days
            return None

        # calc snp_mask
        snp_mask = np.full((self.stks, days), False)

        snp_mask_df = pd.read_csv('data/snp/snp_mask.csv')

        for idx, row in snp_mask_df.iterrows():
            _from = datetime.datetime.strptime(row['from'], '%Y-%m-%d').date()
            _to = datetime.datetime.strptime(row['to'], '%Y-%m-%d').date()
            _ticker = row['ticker']
            stk_idx = self._ticker_to_idx(_ticker)
            if stk_idx is None:
                continue
            _from = max(_from, HIST_BEG)
            _from = min(_from, HIST_END)
            _to = min(_to, HIST_END)
            _to = max(_to, HIST_BEG)
            _from_idx = _date_to_idx(_from)
            _to_idx = _date_to_idx(_to)

            snp_mask[stk_idx, _from_idx:_to_idx + 1] = True

        # calc tradable_mask, traded_stocks_per_day, trading_day_mask
        tradable_mask = np.all(raw_data > 0.0, axis=2)
        traded_stocks_per_day = tradable_mask[:, :].sum(0)
        trading_day_mask = traded_stocks_per_day >= get_config(
        ).MIN_STOCKS_TRADABLE_PER_TRADING_DAY
        # trading_day_mask = traded_stocks_per_day > 0

        self.trading_days = np.sum(trading_day_mask)

        # leave tradeable days only
        self.raw_dt = raw_dt[trading_day_mask]
        self.raw_data = raw_data[:, trading_day_mask, :]
        self.traded_stocks_per_day = traded_stocks_per_day[trading_day_mask]
        self.tradable_mask = tradable_mask[:, trading_day_mask]
        self.snp_mask = snp_mask[:, trading_day_mask]

        # prepare input array
        input = np.zeros((self.stks, self.trading_days, 6))
        # fill array for each stock
        for stk_idx in range(self.stks):
            stk_raw_data = self.raw_data[stk_idx, :, :]
            tradable_mask = self.tradable_mask[stk_idx]

            # dirty hack: fill prices with first known px
            tr_days_idxs = np.nonzero(tradable_mask)[0]
            first_adj_volume = None
            first_volume = None
            if tr_days_idxs.shape[0] > 0:
                first_adj_close_px = stk_raw_data[
                    tr_days_idxs[0],
                    get_config().ADJ_CLOSE_DATA_IDX]
                first_adj_volume = stk_raw_data[
                    tr_days_idxs[0],
                    get_config().ADJ_VOLUME_DATA_IDX]
            if first_adj_close_px is None:
                stk_raw_data[:, :] = 1
            else:
                last_px = first_adj_close_px
                last_volume = first_adj_volume
                for day_idx in range(self.trading_days):
                    if tradable_mask[day_idx] == 0:
                        stk_raw_data[day_idx,
                                     get_config().ADJ_CLOSE_DATA_IDX] = last_px
                        stk_raw_data[day_idx,
                                     get_config().ADJ_OPEN_DATA_IDX] = last_px
                        stk_raw_data[day_idx,
                                     get_config().ADJ_HIGH_DATA_IDX] = last_px
                        stk_raw_data[day_idx,
                                     get_config().ADJ_LOW_DATA_IDX] = last_px
                        stk_raw_data[
                            day_idx,
                            get_config().ADJ_VOLUME_DATA_IDX] = last_volume
                    else:
                        last_px = stk_raw_data[day_idx,
                                               get_config().ADJ_CLOSE_DATA_IDX]
                        last_volume = stk_raw_data[
                            day_idx, get_config().ADJ_VOLUME_DATA_IDX]

            self.raw_data[stk_idx, :, :] = stk_raw_data

            stk_data = stk_raw_data[tradable_mask, :]
            a_o = stk_data[:, get_config().ADJ_OPEN_DATA_IDX]
            a_c = stk_data[:, get_config().ADJ_CLOSE_DATA_IDX]
            a_h = stk_data[:, get_config().ADJ_HIGH_DATA_IDX]
            a_l = stk_data[:, get_config().ADJ_LOW_DATA_IDX]
            a_v = stk_data[:, get_config().ADJ_VOLUME_DATA_IDX]

            if a_c.shape[0] == 0:
                continue

            prev_a_c = np.roll(a_c, 1)
            prev_a_c[0] = a_c[0]
            prev_a_v = np.roll(a_v, 1)
            prev_a_v[0] = a_v[0]

            x_o = (a_o - prev_a_c) / prev_a_c
            x_c = (a_c - prev_a_c) / prev_a_c
            x_h = (a_h - prev_a_c) / prev_a_c
            x_l = (a_l - prev_a_c) / prev_a_c
            x_v = (a_v - prev_a_v) / prev_a_v

            input[stk_idx, tradable_mask, 0] = x_o
            input[stk_idx, tradable_mask, 1] = x_c
            input[stk_idx, tradable_mask, 2] = x_h
            input[stk_idx, tradable_mask, 3] = x_l
            input[stk_idx, tradable_mask, 4] = x_v
            input[stk_idx, tradable_mask, 5] = 1
        self.input = input
Пример #14
0
def date_to_idx(date):
    if get_config().TRAIN_BEG <= date <= get_config().TRAIN_END:
        return (date - get_config().TRAIN_BEG).days
    return None
Пример #15
0
def train(net):
    create_folders()

    env = Env()
    # net = NetShiva()

    if not os.path.exists(get_config().TRAIN_STAT_PATH):
        with open(get_config().TRAIN_STAT_PATH, 'a', newline='') as f:
            writer = csv.writer(f)
            writer.writerow(
                ('epoch', 'tr loss', 'tr dd', 'tr sharpe', 'tr y avg',
                 'tst loss', 'tst dd', 'tst sharpe', 'tst y avg'))

    total_tickers = len(env.tickers)

    def open_train_stat_file():
        return open(get_config().TRAIN_STAT_PATH, 'a', newline='')

    def is_train():
        return get_config().MODE == Mode.TRAIN

    with open_train_stat_file() if is_train() else dummy_context_mgr() as f:
        if is_train():
            writer = csv.writer(f)
        if get_config().EPOCH_WEIGHTS_TO_LOAD != 0:
            net.load_weights(get_config().WEIGHTS_PATH,
                             get_config().EPOCH_WEIGHTS_TO_LOAD)
            epoch = get_config().EPOCH_WEIGHTS_TO_LOAD
            if is_train():
                epoch += 1
        else:
            net.init()
            epoch = 0

        def get_net_data(BEG, END):
            beg_idx, end_idx = env.get_data_idxs_range(BEG, END)

            raw_dates = env.get_raw_dates(beg_idx, end_idx)
            input = env.get_input(beg_idx, end_idx)
            px = env.get_adj_close_px(beg_idx, end_idx)
            px_pred_hor = env.get_adj_close_px(
                beg_idx + get_config().PRED_HORIZON,
                end_idx + get_config().PRED_HORIZON)
            tradeable_mask = env.get_tradeable_mask(beg_idx, end_idx)
            port_mask = env.get_portfolio_mask(beg_idx, end_idx)

            ds_sz = px_pred_hor.shape[1]

            raw_dates = raw_dates[:ds_sz]
            raw_week_days = np.full(raw_dates.shape, 0, dtype=np.int32)
            for i in range(raw_dates.shape[0]):
                date = date_from_timestamp(raw_dates[i])
                raw_week_days[i] = date.isoweekday()

            input = input[:, :ds_sz, :]
            tradeable_mask = tradeable_mask[:, :ds_sz]
            port_mask = port_mask[:, :ds_sz]
            px = px[:, :ds_sz]

            labels = (px_pred_hor - px) / px
            batch_num = get_batches_num(ds_sz, get_config().BPTT_STEPS)

            return beg_idx, ds_sz, batch_num, raw_dates, raw_week_days, tradeable_mask, port_mask, px, input, labels

        tr_beg_data_idx, tr_ds_sz, tr_batch_num, tr_raw_dates, tr_week_days, tr_tradeable_mask, tr_port_mask, tr_px, tr_input, tr_labels = get_net_data(
            get_config().TRAIN_BEG,
            get_config().TRAIN_END)
        tr_eq = np.zeros((tr_ds_sz))
        tr_pred = np.zeros((total_tickers, tr_ds_sz))

        if get_config().TEST:
            tst_beg_data_idx, tst_ds_sz, tst_batch_num, tst_raw_dates, tst_week_days, tst_tradeable_mask, tst_port_mask, tst_px, tst_input, tst_labels = get_net_data(
                get_config().TEST_BEG,
                get_config().TEST_END)
            tst_eq = np.zeros((tst_ds_sz))
            tst_pred = np.zeros((total_tickers, tst_ds_sz))

        def get_batch_range(b):
            return b * get_config().BPTT_STEPS, (b +
                                                 1) * get_config().BPTT_STEPS

        while epoch <= get_config().MAX_EPOCH:

            print("Eval %d epoch on train set..." % epoch)
            batch_num = tr_batch_num
            ds_size = tr_ds_sz
            input = tr_input
            labels = tr_labels
            px = tr_px
            mask = tr_tradeable_mask
            port_mask = tr_port_mask
            eq = tr_eq
            beg_data_idx = tr_beg_data_idx
            raw_dates = tr_raw_dates
            pred_hist = tr_pred
            state = None

            def eval_with_state_reset():
                nonlocal raw_dates, beg_data_idx, ds_size, batch_num, input, labels, px, mask, port_mask, eq, pred_hist, state
                curr_progress = 0
                cash = 1
                pos = np.zeros((total_tickers))
                pos_px = np.zeros((total_tickers))
                losses = np.zeros((batch_num))
                for i in range(ds_size):
                    state = net.zero_state(total_tickers)
                    b_b_i = max(0,
                                i + 1 - get_config().RESET_HIDDEN_STATE_FREQ)
                    b_e_i = i + 1
                    _input = input[:, b_b_i:b_e_i, :]
                    _labels = labels[:, b_b_i:b_e_i]
                    _mask = mask[:, b_b_i:b_e_i]
                    state, loss, predictions = net.eval(
                        state, _input, _labels, _mask.astype(np.float32))
                    pred_hist[:, i] = predictions[:, -1, 0]

                    data_idx = i
                    curr_px = px[:, data_idx]
                    global_data_idx = beg_data_idx + data_idx

                    date = datetime.datetime.fromtimestamp(
                        raw_dates[data_idx]).date()

                    open_pos = False
                    close_pos = False
                    if get_config().REBALANCE_FRI:
                        if date.isoweekday() == 5:
                            open_pos = True
                        if date.isoweekday() == 5:
                            close_pos = True
                    else:
                        if data_idx % get_config().REBALANCE_FREQ == 0:
                            close_pos = True
                            open_pos = True

                    if close_pos:
                        rpl = np.sum(pos * (curr_px - pos_px))
                        cash += rpl
                        pos[:] = 0
                    if open_pos:
                        pos_px = curr_px
                        pos_mask = port_mask[:, data_idx]
                        num_stks = np.sum(pos_mask)
                        if get_config().CAPM:
                            exp, cov = env.get_exp_and_cov(
                                pos_mask, global_data_idx -
                                get_config().COVARIANCE_LENGTH + 1,
                                global_data_idx)
                            exp = get_config().REBALANCE_FREQ * exp
                            cov = get_config().REBALANCE_FREQ * get_config(
                            ).REBALANCE_FREQ * cov
                            if get_config().CAPM_USE_NET_PREDICTIONS:
                                exp = predictions[:, i, 0][pos_mask]

                            capm = Capm(num_stks)
                            capm.init()

                            best_sharpe = None
                            best_weights = None
                            best_constriant = None
                            while i <= 10000:
                                w, sharpe, constraint = capm.get_params(
                                    exp, cov)
                                # print("Iteration: %d Sharpe: %.2f Constraint: %.6f" % (i, sharpe, constraint))
                                if w is None:
                                    break
                                if best_sharpe is None or sharpe >= best_sharpe:
                                    best_weights = w
                                    best_sharpe = sharpe
                                    best_constriant = constraint
                                capm.fit(exp, cov)
                                capm.rescale_weights()

                                i += 1
                            date = datetime.datetime.fromtimestamp(
                                raw_dates[data_idx]).date()
                            print("Date: %s sharpe: %.2f constraint: %.6f" %
                                  (date.strftime('%Y-%m-%d'), best_sharpe,
                                   best_constriant))

                            pos[pos_mask] = best_weights / curr_px[pos_mask]
                        else:
                            pos[pos_mask] = 1 / num_stks / curr_px[
                                pos_mask] * np.sign(predictions[pos_mask, -1,
                                                                0])

                    urpl = np.sum(pos * (curr_px - pos_px))
                    nlv = cash + urpl

                    eq[data_idx] = nlv

                    curr_progress = progress.print_progress(
                        curr_progress, i, ds_size)

                progress.print_progess_end()
                avg_loss = np.mean(np.sqrt(losses))
                return avg_loss

            def eval():
                nonlocal raw_dates, beg_data_idx, ds_size, batch_num, input, labels, px, mask, port_mask, eq, pred_hist, state
                curr_progress = 0
                cash = 1
                pos = np.zeros((total_tickers))
                pos_px = np.zeros((total_tickers))
                losses = np.zeros((batch_num))

                for b in range(batch_num):
                    if state is None:
                        state = net.zero_state(total_tickers)

                    b_b_i, b_e_i = get_batch_range(b)
                    _input = input[:, b_b_i:b_e_i, :]
                    _labels = labels[:, b_b_i:b_e_i]
                    _mask = mask[:, b_b_i:b_e_i]

                    state, loss, predictions = net.eval(
                        state, _input, _labels, _mask.astype(np.float32))
                    pred_hist[:, b_b_i:b_e_i] = predictions[:, :, 0]

                    for i in range(predictions.shape[1]):
                        data_idx = b * get_config().BPTT_STEPS + i
                        curr_px = px[:, data_idx]
                        global_data_idx = beg_data_idx + data_idx

                        date = datetime.datetime.fromtimestamp(
                            raw_dates[data_idx]).date()

                        open_pos = False
                        close_pos = False
                        if get_config().REBALANCE_FRI:
                            if date.isoweekday() == 5:
                                open_pos = True
                            if date.isoweekday() == 5:
                                close_pos = True
                        else:
                            if data_idx % get_config().REBALANCE_FREQ == 0:
                                close_pos = True
                                open_pos = True

                        if close_pos:
                            rpl = np.sum(pos * (curr_px - pos_px))
                            cash += rpl
                            pos[:] = 0
                        if open_pos:
                            pos_px = curr_px
                            pos_mask = port_mask[:, data_idx]
                            num_stks = np.sum(pos_mask)
                            if get_config().CAPM:
                                exp, cov = env.get_exp_and_cov(
                                    pos_mask, global_data_idx -
                                    get_config().COVARIANCE_LENGTH + 1,
                                    global_data_idx)
                                exp = get_config().REBALANCE_FREQ * exp
                                cov = get_config().REBALANCE_FREQ * get_config(
                                ).REBALANCE_FREQ * cov
                                if get_config().CAPM_USE_NET_PREDICTIONS:
                                    exp = predictions[:, i, 0][pos_mask]

                                capm = Capm(num_stks)
                                capm.init()

                                best_sharpe = None
                                best_weights = None
                                best_constriant = None
                                while i <= 10000:
                                    w, sharpe, constraint = capm.get_params(
                                        exp, cov)
                                    # print("Iteration: %d Sharpe: %.2f Constraint: %.6f" % (i, sharpe, constraint))
                                    if w is None:
                                        break
                                    if best_sharpe is None or sharpe >= best_sharpe:
                                        best_weights = w
                                        best_sharpe = sharpe
                                        best_constriant = constraint
                                    capm.fit(exp, cov)
                                    capm.rescale_weights()

                                    i += 1
                                date = datetime.datetime.fromtimestamp(
                                    raw_dates[data_idx]).date()
                                print(
                                    "Date: %s sharpe: %.2f constraint: %.6f" %
                                    (date.strftime('%Y-%m-%d'), best_sharpe,
                                     best_constriant))

                                pos[pos_mask] = best_weights / curr_px[pos_mask]
                            else:
                                pos[pos_mask] = 1 / num_stks / curr_px[
                                    pos_mask] * np.sign(predictions[pos_mask,
                                                                    i, 0])

                        urpl = np.sum(pos * (curr_px - pos_px))
                        nlv = cash + urpl

                        eq[data_idx] = nlv

                    losses[b] = loss
                    curr_progress = progress.print_progress(
                        curr_progress, b, tr_batch_num)

                progress.print_progess_end()
                avg_loss = np.mean(np.sqrt(losses))
                return avg_loss

            if get_config().RESET_HIDDEN_STATE_FREQ == 0:
                tr_avg_loss = eval()
            else:
                tr_avg_loss = eval_with_state_reset()

            print("Train loss: %.4f%%" % (tr_avg_loss * 100))

            if get_config().TEST:
                print("Eval %d epoch on test set..." % epoch)
                batch_num = tst_batch_num
                ds_size = tr_ds_sz
                input = tst_input
                labels = tst_labels
                px = tst_px
                mask = tst_tradeable_mask
                port_mask = tst_port_mask
                eq = tst_eq
                beg_data_idx = tst_beg_data_idx
                raw_dates = tst_raw_dates
                pred_hist = tst_pred
                state = None

                if get_config().RESET_HIDDEN_STATE_FREQ == 0:
                    tst_avg_loss = eval()
                else:
                    tst_avg_loss = eval_with_state_reset()

                print("Test loss: %.4f%%" % (tst_avg_loss * 100))

            if not is_train():
                dt = build_time_axis(tr_raw_dates)

                if not get_config().HIDE_PLOTS:
                    plot_eq('Train',
                            get_config().TRAIN_BEG,
                            get_config().TRAIN_END, dt, tr_eq)

                result = pd.DataFrame(columns=('date', 'ticker', 'prediction'))
                for i in range(tr_raw_dates.shape[0]):
                    date = dt[i]
                    for j in range(total_tickers):
                        ticker = env._idx_to_ticker(j)
                        prediction = tr_pred[j, i]

                        row = [date, ticker, prediction]
                        result.loc[i * total_tickers + j] = row
                result.to_csv(get_config().TRAIN_PRED_PATH, index=False)

                if get_config().TEST:
                    dt = build_time_axis(tst_raw_dates)
                    if not get_config().HIDE_PLOTS:
                        plot_eq('Test',
                                get_config().TEST_BEG,
                                get_config().TEST_END, dt, tst_eq)

                if not get_config().HIDE_PLOTS:
                    show_plots()
                break

            if is_train() and epoch <= get_config().MAX_EPOCH:

                # plot and save graphs
                dt = build_time_axis(tr_raw_dates)
                fig, tr_dd, tr_sharpe, tr_y_avg = plot_eq(
                    'Train',
                    get_config().TRAIN_BEG,
                    get_config().TRAIN_END, dt, tr_eq)
                fig.savefig('%s/%04d.png' %
                            (get_config().TRAIN_FIG_PATH, epoch))
                plt.close(fig)

                if get_config().TEST:
                    dt = build_time_axis(tst_raw_dates)
                    fig, tst_dd, tst_sharpe, tst_y_avg = plot_eq(
                        'Test',
                        get_config().TEST_BEG,
                        get_config().TEST_END, dt, tst_eq)
                    fig.savefig('%s/%04d.png' %
                                (get_config().TEST_FIG_PATH, epoch))
                    plt.close(fig)
                else:
                    tst_avg_loss = 0
                    tst_dd = 0
                    tst_sharpe = 0
                    tst_y_avg = 0

                writer.writerow((
                    epoch,
                    tr_avg_loss,
                    tr_dd,
                    tr_sharpe,
                    tr_y_avg,
                    tst_avg_loss,
                    tst_dd,
                    tst_sharpe,
                    tst_y_avg,
                ))

                f.flush()

                if epoch == get_config().MAX_EPOCH:
                    tr_df = pd.DataFrame({'date': dt, 'capital': tr_eq[:]})
                    tr_df.to_csv(get_config().TRAIN_EQ_PATH, index=False)
                    if get_config().TEST:
                        tst_df = pd.DataFrame({
                            'date': dt,
                            'capital': tst_eq[:]
                        })
                        tst_df.to_csv(get_config().TEST_EQ_PATH, index=False)

                epoch += 1
                if epoch > get_config().MAX_EPOCH:
                    break
                print("Training %d epoch..." % epoch)

                curr_progress = 0
                state = None
                for b in range(tr_batch_num):
                    if state is None:
                        state = net.zero_state(total_tickers)

                    b_b_i, b_e_i = get_batch_range(b)
                    _input = tr_input[:, b_b_i:b_e_i, :]
                    _labels = tr_labels[:, b_b_i:b_e_i]
                    _mask = tr_tradeable_mask[:, b_b_i:b_e_i]

                    if get_config().FIT_FRI_PREDICTION_ONLY:
                        batch_week_days = tr_week_days[b_b_i:b_e_i]
                        batch_mon_mask = batch_week_days == 5
                        for i in range(_mask.shape[0]):
                            _mask[i, :] = _mask[i, :] & batch_mon_mask

                    state, loss, predictions = net.fit(
                        state, _input, _labels, _mask.astype(np.float32))

                    curr_progress = progress.print_progress(
                        curr_progress, b, tr_batch_num)

                progress.print_progess_end()

                net.save_weights(get_config().WEIGHTS_PATH, epoch)
Пример #16
0
 def open_train_stat_file():
     return open(get_config().TRAIN_STAT_PATH, 'a', newline='')
Пример #17
0
                  get_config().HIST_END)
    preprocess_data(stocks,
                    get_config().DATA_PATH,
                    get_config().HIST_BEG,
                    get_config().HIST_END,
                    get_config().DATA_NPZ_PATH,
                    get_config().DATA_FEATURES)


predications = []

# download_data_for_all_stocks()

for stock in stocks:
    print("Parsing %s predictions" % stock)
    get_config().TICKER = stock
    get_config().reload()

    try:

        predications.append(pd.read_csv(get_config().TRAIN_PRED_PATH))
    except:
        pass

pred_df = pd.concat(predications, ignore_index=True)
pred_df_length = len(pred_df['date'])

pred_df['date'] = pd.to_datetime(pred_df['date'], format='%Y-%m-%d').dt.date
# add stock idx column
pred_df['stk_idx'] = pd.Series(np.zeros((pred_df_length), dtype=np.int32),
                               index=pred_df.index)
Пример #18
0
def download_data_for_all_stocks():
    def create_folders():
        if not os.path.exists(get_config().DATA_FOLDER_PATH):
            os.makedirs(get_config().DATA_FOLDER_PATH)

    get_config().TICKER = 'ALL_STOCKS'
    get_config().reload()

    create_folders()
    download_data(stocks,
                  get_config().DATA_PATH,
                  get_config().HIST_BEG,
                  get_config().HIST_END)
    preprocess_data(stocks,
                    get_config().DATA_PATH,
                    get_config().HIST_BEG,
                    get_config().HIST_END,
                    get_config().DATA_NPZ_PATH,
                    get_config().DATA_FEATURES)
Пример #19
0
def calc_pl(pos, curr_px, pos_px):
    return np.sum(
        pos * (curr_px * (1 - np.sign(pos) * get_config().SLIPPAGE) - pos_px *
               (1 + np.sign(pos) * get_config().SLIPPAGE)))
Пример #20
0
def create_folders():
    if not os.path.exists(get_config().DATA_FOLDER_PATH):
        os.makedirs(get_config().DATA_FOLDER_PATH)
Пример #21
0
import pandas as pd

from portfolio.multi_stock_config import get_config


def date_to_idx(date):
    if get_config().TRAIN_BEG <= date <= get_config().TRAIN_END:
        return (date - get_config().TRAIN_BEG).days
    return None


def idx_to_date(idx):
    return get_config().TRAIN_BEG + datetime.timedelta(days=idx)


days = (get_config().TRAIN_END - get_config().TRAIN_BEG).days
data = np.ones((len(stocks), days))

stk_idx = 0
for stock in stocks:
    get_config().TICKER = stock
    get_config().reload()

    df = pd.read_csv(get_config().TRAIN_EQ_PATH)
    df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d').dt.date

    idx_from = date_to_idx(get_config().TRAIN_BEG)
    capital = 1.0
    for index, row in df.iterrows():
        date = row['date']
        idx_to = date_to_idx(date)