Exemplo n.º 1
0
def train_rbm(dr, wr, tr_beg_idx, tr_end_idx):
    if not TRAIN_RBM:
        return
    # create rbm layers
    rbmobject1, rbmobject2 = rbm_instance()

    dr = dr[tr_beg_idx:tr_end_idx]
    wr = wr[tr_beg_idx:tr_end_idx]
    train_records = tr_end_idx - tr_beg_idx

    data_indices = np.arange(train_records)

    print("Training RBM layer 1")
    batches_per_epoch = train_records // RBM_BATCH_SIZE

    for i in range(RBM_EPOCH_TO_TRAIN):
        np.random.shuffle(data_indices)
        epoch_cost = 0.
        curr_progress = 0

        for b in range(batches_per_epoch):
            # get data indices for slice
            d_i_s = data_indices[b * RBM_BATCH_SIZE:(b + 1) * RBM_BATCH_SIZE]

            _wr = wr[d_i_s, :]
            _dr = dr[d_i_s, :]
            input = np.concatenate([_wr, _dr], axis=1)

            cost = rbmobject1.partial_fit(input)
            epoch_cost += cost
            curr_progress = progress.print_progress(curr_progress, b,
                                                    batches_per_epoch)
        progress.print_progess_end()
        print(" Epoch cost: {:.3f}".format(epoch_cost / batches_per_epoch))

    rbmobject1.save_weights('./rbm/rbmw1.chp')

    print("Training RBM layer 2")
    for i in range(RBM_EPOCH_TO_TRAIN):
        np.random.shuffle(data_indices)
        epoch_cost = 0.
        curr_progress = 0

        for b in range(batches_per_epoch):
            # get data indices for slice
            d_i_s = data_indices[b * RBM_BATCH_SIZE:(b + 1) * RBM_BATCH_SIZE]

            _wr = wr[d_i_s, :]
            _dr = dr[d_i_s, :]
            input = np.concatenate([_wr, _dr], axis=1)

            input = rbmobject1.transform(input)
            cost = rbmobject2.partial_fit(input)
            epoch_cost += cost
            curr_progress = progress.print_progress(curr_progress, b,
                                                    batches_per_epoch)
        progress.print_progess_end()
        print(" Epoch cost: {:.3f}".format(epoch_cost / batches_per_epoch))

    rbmobject2.save_weights('./rbm/rbmw2.chp')
Exemplo n.º 2
0
def evaluate_ffnn(data_set_records, dr, wr, prob_l):
    ffnn = ffnn_instance()

    ffnn.load_weights('./rbm/ffnn.chp')

    print("Evaluating")
    b = 0
    curr_progress = 0
    batches_per_epoch = data_set_records // FFNN_BATCH_SIZE
    while True:
        start_idx = b * FFNN_BATCH_SIZE
        end_idx = (b + 1) * FFNN_BATCH_SIZE
        d_i_s = np.arange(start_idx, min(end_idx, data_set_records))
        _wr = wr[d_i_s, :]
        _dr = dr[d_i_s, :]
        input = np.concatenate([_wr, _dr], axis=1)
        p_dist = ffnn.predict(input)
        for idx in d_i_s:
            prob_l[idx] = p_dist[idx - start_idx, 0]
        if end_idx >= data_set_records:
            break
        curr_progress = progress.print_progress(curr_progress, b,
                                                batches_per_epoch)
        b += 1
    progress.print_progess_end()
Exemplo n.º 3
0
def train_ae(dr, wr, tr_beg_idx, tr_end_idx):
    if not TRAIN_AU:
        return
    autoencoder = ae_instance()

    print("Training Autoencoder")

    dr = dr[tr_beg_idx:tr_end_idx]
    wr = wr[tr_beg_idx:tr_end_idx]

    train_records = tr_end_idx - tr_beg_idx

    data_indices = np.arange(train_records)

    if LOAD_RBM_WEIGHTS:
        autoencoder.load_rbm_weights('./rbm/rbmw1.chp', ['rbmw1', 'rbmhb1'], 0)
        autoencoder.load_rbm_weights('./rbm/rbmw2.chp', ['rbmw2', 'rbmhb2'], 1)

    batches_per_epoch = train_records // AU_BATCH_SIZE
    for i in range(AU_EPOCH_TO_TRAIN):
        np.random.shuffle(data_indices)
        epoch_cost = 0.
        curr_progress = 0

        for b in range(batches_per_epoch):
            # get data indices for slice
            d_i_s = data_indices[b * AU_BATCH_SIZE:(b + 1) * AU_BATCH_SIZE]

            _wr = wr[d_i_s, :]
            _dr = dr[d_i_s, :]
            input = np.concatenate([_wr, _dr], axis=1)

            cost = autoencoder.partial_fit(input)
            # print("Batch cost: {:.3f}".format(cost))
            epoch_cost += cost
            curr_progress = progress.print_progress(curr_progress, b,
                                                    batches_per_epoch)
        progress.print_progess_end()
        print(" Epoch cost: {:.3f}".format(epoch_cost / batches_per_epoch))

    autoencoder.save_weights('./rbm/au.chp')
Exemplo n.º 4
0
def train_ffnn(dr, wr, c_l, c_s, w_data_index, w_num_stocks, tr_beg_idx,
               tr_end_idx, tr_wk_beg_idx, tr_wk_end_idx):
    if not TRAIN_FFNN:
        return
    ffnn = ffnn_instance()

    print("Training FFNN")

    if LOAD_AU_WEIGHTS:
        ffnn.load_au_weights('./rbm/au.chp', ['rbmw1', 'rbmhb1'], 0)
        ffnn.load_au_weights('./rbm/au.chp', ['rbmw2', 'rbmhb2'], 1)

    if ALIGN_BATCH_TO_DATA:
        batches_per_epoch = tr_wk_end_idx - tr_wk_beg_idx
        w_data_index = w_data_index[tr_wk_beg_idx:tr_wk_end_idx]
        w_num_stocks = w_num_stocks[tr_wk_beg_idx:tr_wk_end_idx]
        for i in range(FFNN_EPOCH_TO_TRAIN):
            epoch_cost = 0.
            curr_progress = 0

            for b in range(batches_per_epoch):
                s_i = w_data_index[b]
                e_i = s_i + w_num_stocks[b]
                _wr = wr[s_i:e_i, :]
                _dr = dr[s_i:e_i, :]
                input = np.concatenate([_wr, _dr], axis=1)

                _cl = c_l[s_i:e_i].reshape((-1, 1))
                _cs = c_s[s_i:e_i].reshape((-1, 1))
                observation = np.concatenate([_cl, _cs],
                                             axis=1).astype(np.float32)

                cost = ffnn.partial_fit(input, observation)
                # print("Batch cost: {:.3f}".format(cost))
                epoch_cost += cost
                curr_progress = progress.print_progress(
                    curr_progress, b, batches_per_epoch)
            progress.print_progess_end()
            print(" Epoch {} cost: {:.6f}".format(
                i, epoch_cost / batches_per_epoch))
            if i % SAVE_EACH_N_EPOCHS == 0:
                print("Model saved")
                ffnn.save_weights('./rbm/ffnn.chp')
    else:
        train_records = tr_end_idx - tr_beg_idx
        dr = dr[tr_beg_idx:tr_end_idx]
        wr = wr[tr_beg_idx:tr_end_idx]

        data_indices = np.arange(train_records)

        batches_per_epoch = train_records // FFNN_BATCH_SIZE
        for i in range(FFNN_EPOCH_TO_TRAIN):
            np.random.shuffle(data_indices)
            epoch_cost = 0.
            curr_progress = 0

            for b in range(batches_per_epoch):
                # get data indices for slice
                d_i_s = data_indices[b * FFNN_BATCH_SIZE:(b + 1) *
                                     FFNN_BATCH_SIZE]

                _wr = wr[d_i_s, :]
                _dr = dr[d_i_s, :]
                input = np.concatenate([_wr, _dr], axis=1)

                _cl = c_l[d_i_s].reshape((-1, 1))
                _cs = c_s[d_i_s].reshape((-1, 1))
                observation = np.concatenate([_cl, _cs],
                                             axis=1).astype(np.float32)

                cost = ffnn.partial_fit(input, observation)
                # print("Batch cost: {:.3f}".format(cost))
                epoch_cost += cost
                curr_progress = progress.print_progress(
                    curr_progress, b, batches_per_epoch)
            progress.print_progess_end()
            print(" Epoch {} cost: {:.6f}".format(
                i, epoch_cost / batches_per_epoch))
            if i % SAVE_EACH_N_EPOCHS == 0:
                print("Model saved")
                ffnn.save_weights('./rbm/ffnn.chp')

    ffnn.save_weights('./rbm/ffnn.chp')
    print("Model saved")
Exemplo n.º 5
0
def train(net):
    create_folders()

    env = Env()
    # net = NetShiva()

    if not os.path.exists(get_config().TRAIN_STAT_PATH):
        with open(get_config().TRAIN_STAT_PATH, 'a', newline='') as f:
            writer = csv.writer(f)
            writer.writerow(
                ('epoch', 'tr loss', 'tr dd', 'tr sharpe', 'tr y avg',
                 'tst loss', 'tst dd', 'tst sharpe', 'tst y avg'))

    total_tickers = len(env.tickers)

    def open_train_stat_file():
        return open(get_config().TRAIN_STAT_PATH, 'a', newline='')

    def is_train():
        return get_config().MODE == Mode.TRAIN

    with open_train_stat_file() if is_train() else dummy_context_mgr() as f:
        if is_train():
            writer = csv.writer(f)
        if get_config().EPOCH_WEIGHTS_TO_LOAD != 0:
            net.load_weights(get_config().WEIGHTS_PATH,
                             get_config().EPOCH_WEIGHTS_TO_LOAD)
            epoch = get_config().EPOCH_WEIGHTS_TO_LOAD
            if is_train():
                epoch += 1
        else:
            net.init()
            epoch = 0

        def get_net_data(BEG, END):
            beg_idx, end_idx = env.get_data_idxs_range(BEG, END)

            raw_dates = env.get_raw_dates(beg_idx, end_idx)
            input = env.get_input(beg_idx, end_idx)
            px = env.get_adj_close_px(beg_idx, end_idx)
            px_pred_hor = env.get_adj_close_px(
                beg_idx + get_config().PRED_HORIZON,
                end_idx + get_config().PRED_HORIZON)
            tradeable_mask = env.get_tradeable_mask(beg_idx, end_idx)
            port_mask = env.get_portfolio_mask(beg_idx, end_idx)

            ds_sz = px_pred_hor.shape[1]

            raw_dates = raw_dates[:ds_sz]
            raw_week_days = np.full(raw_dates.shape, 0, dtype=np.int32)
            for i in range(raw_dates.shape[0]):
                date = date_from_timestamp(raw_dates[i])
                raw_week_days[i] = date.isoweekday()

            input = input[:, :ds_sz, :]
            tradeable_mask = tradeable_mask[:, :ds_sz]
            port_mask = port_mask[:, :ds_sz]
            px = px[:, :ds_sz]

            labels = (px_pred_hor - px) / px
            batch_num = get_batches_num(ds_sz, get_config().BPTT_STEPS)

            return beg_idx, ds_sz, batch_num, raw_dates, raw_week_days, tradeable_mask, port_mask, px, input, labels

        tr_beg_data_idx, tr_ds_sz, tr_batch_num, tr_raw_dates, tr_week_days, tr_tradeable_mask, tr_port_mask, tr_px, tr_input, tr_labels = get_net_data(
            get_config().TRAIN_BEG,
            get_config().TRAIN_END)
        tr_eq = np.zeros((tr_ds_sz))
        tr_pred = np.zeros((total_tickers, tr_ds_sz))

        if get_config().TEST:
            tst_beg_data_idx, tst_ds_sz, tst_batch_num, tst_raw_dates, tst_week_days, tst_tradeable_mask, tst_port_mask, tst_px, tst_input, tst_labels = get_net_data(
                get_config().TEST_BEG,
                get_config().TEST_END)
            tst_eq = np.zeros((tst_ds_sz))
            tst_pred = np.zeros((total_tickers, tst_ds_sz))

        def get_batch_range(b):
            return b * get_config().BPTT_STEPS, (b +
                                                 1) * get_config().BPTT_STEPS

        while epoch <= get_config().MAX_EPOCH:

            print("Eval %d epoch on train set..." % epoch)
            batch_num = tr_batch_num
            ds_size = tr_ds_sz
            input = tr_input
            labels = tr_labels
            px = tr_px
            mask = tr_tradeable_mask
            port_mask = tr_port_mask
            eq = tr_eq
            beg_data_idx = tr_beg_data_idx
            raw_dates = tr_raw_dates
            pred_hist = tr_pred
            state = None

            def eval_with_state_reset():
                nonlocal raw_dates, beg_data_idx, ds_size, batch_num, input, labels, px, mask, port_mask, eq, pred_hist, state
                curr_progress = 0
                cash = 1
                pos = np.zeros((total_tickers))
                pos_px = np.zeros((total_tickers))
                losses = np.zeros((batch_num))
                for i in range(ds_size):
                    state = net.zero_state(total_tickers)
                    b_b_i = max(0,
                                i + 1 - get_config().RESET_HIDDEN_STATE_FREQ)
                    b_e_i = i + 1
                    _input = input[:, b_b_i:b_e_i, :]
                    _labels = labels[:, b_b_i:b_e_i]
                    _mask = mask[:, b_b_i:b_e_i]
                    state, loss, predictions = net.eval(
                        state, _input, _labels, _mask.astype(np.float32))
                    pred_hist[:, i] = predictions[:, -1, 0]

                    data_idx = i
                    curr_px = px[:, data_idx]
                    global_data_idx = beg_data_idx + data_idx

                    date = datetime.datetime.fromtimestamp(
                        raw_dates[data_idx]).date()

                    open_pos = False
                    close_pos = False
                    if get_config().REBALANCE_FRI:
                        if date.isoweekday() == 5:
                            open_pos = True
                        if date.isoweekday() == 5:
                            close_pos = True
                    else:
                        if data_idx % get_config().REBALANCE_FREQ == 0:
                            close_pos = True
                            open_pos = True

                    if close_pos:
                        rpl = np.sum(pos * (curr_px - pos_px))
                        cash += rpl
                        pos[:] = 0
                    if open_pos:
                        pos_px = curr_px
                        pos_mask = port_mask[:, data_idx]
                        num_stks = np.sum(pos_mask)
                        if get_config().CAPM:
                            exp, cov = env.get_exp_and_cov(
                                pos_mask, global_data_idx -
                                get_config().COVARIANCE_LENGTH + 1,
                                global_data_idx)
                            exp = get_config().REBALANCE_FREQ * exp
                            cov = get_config().REBALANCE_FREQ * get_config(
                            ).REBALANCE_FREQ * cov
                            if get_config().CAPM_USE_NET_PREDICTIONS:
                                exp = predictions[:, i, 0][pos_mask]

                            capm = Capm(num_stks)
                            capm.init()

                            best_sharpe = None
                            best_weights = None
                            best_constriant = None
                            while i <= 10000:
                                w, sharpe, constraint = capm.get_params(
                                    exp, cov)
                                # print("Iteration: %d Sharpe: %.2f Constraint: %.6f" % (i, sharpe, constraint))
                                if w is None:
                                    break
                                if best_sharpe is None or sharpe >= best_sharpe:
                                    best_weights = w
                                    best_sharpe = sharpe
                                    best_constriant = constraint
                                capm.fit(exp, cov)
                                capm.rescale_weights()

                                i += 1
                            date = datetime.datetime.fromtimestamp(
                                raw_dates[data_idx]).date()
                            print("Date: %s sharpe: %.2f constraint: %.6f" %
                                  (date.strftime('%Y-%m-%d'), best_sharpe,
                                   best_constriant))

                            pos[pos_mask] = best_weights / curr_px[pos_mask]
                        else:
                            pos[pos_mask] = 1 / num_stks / curr_px[
                                pos_mask] * np.sign(predictions[pos_mask, -1,
                                                                0])

                    urpl = np.sum(pos * (curr_px - pos_px))
                    nlv = cash + urpl

                    eq[data_idx] = nlv

                    curr_progress = progress.print_progress(
                        curr_progress, i, ds_size)

                progress.print_progess_end()
                avg_loss = np.mean(np.sqrt(losses))
                return avg_loss

            def eval():
                nonlocal raw_dates, beg_data_idx, ds_size, batch_num, input, labels, px, mask, port_mask, eq, pred_hist, state
                curr_progress = 0
                cash = 1
                pos = np.zeros((total_tickers))
                pos_px = np.zeros((total_tickers))
                losses = np.zeros((batch_num))

                for b in range(batch_num):
                    if state is None:
                        state = net.zero_state(total_tickers)

                    b_b_i, b_e_i = get_batch_range(b)
                    _input = input[:, b_b_i:b_e_i, :]
                    _labels = labels[:, b_b_i:b_e_i]
                    _mask = mask[:, b_b_i:b_e_i]

                    state, loss, predictions = net.eval(
                        state, _input, _labels, _mask.astype(np.float32))
                    pred_hist[:, b_b_i:b_e_i] = predictions[:, :, 0]

                    for i in range(predictions.shape[1]):
                        data_idx = b * get_config().BPTT_STEPS + i
                        curr_px = px[:, data_idx]
                        global_data_idx = beg_data_idx + data_idx

                        date = datetime.datetime.fromtimestamp(
                            raw_dates[data_idx]).date()

                        open_pos = False
                        close_pos = False
                        if get_config().REBALANCE_FRI:
                            if date.isoweekday() == 5:
                                open_pos = True
                            if date.isoweekday() == 5:
                                close_pos = True
                        else:
                            if data_idx % get_config().REBALANCE_FREQ == 0:
                                close_pos = True
                                open_pos = True

                        if close_pos:
                            rpl = np.sum(pos * (curr_px - pos_px))
                            cash += rpl
                            pos[:] = 0
                        if open_pos:
                            pos_px = curr_px
                            pos_mask = port_mask[:, data_idx]
                            num_stks = np.sum(pos_mask)
                            if get_config().CAPM:
                                exp, cov = env.get_exp_and_cov(
                                    pos_mask, global_data_idx -
                                    get_config().COVARIANCE_LENGTH + 1,
                                    global_data_idx)
                                exp = get_config().REBALANCE_FREQ * exp
                                cov = get_config().REBALANCE_FREQ * get_config(
                                ).REBALANCE_FREQ * cov
                                if get_config().CAPM_USE_NET_PREDICTIONS:
                                    exp = predictions[:, i, 0][pos_mask]

                                capm = Capm(num_stks)
                                capm.init()

                                best_sharpe = None
                                best_weights = None
                                best_constriant = None
                                while i <= 10000:
                                    w, sharpe, constraint = capm.get_params(
                                        exp, cov)
                                    # print("Iteration: %d Sharpe: %.2f Constraint: %.6f" % (i, sharpe, constraint))
                                    if w is None:
                                        break
                                    if best_sharpe is None or sharpe >= best_sharpe:
                                        best_weights = w
                                        best_sharpe = sharpe
                                        best_constriant = constraint
                                    capm.fit(exp, cov)
                                    capm.rescale_weights()

                                    i += 1
                                date = datetime.datetime.fromtimestamp(
                                    raw_dates[data_idx]).date()
                                print(
                                    "Date: %s sharpe: %.2f constraint: %.6f" %
                                    (date.strftime('%Y-%m-%d'), best_sharpe,
                                     best_constriant))

                                pos[pos_mask] = best_weights / curr_px[pos_mask]
                            else:
                                pos[pos_mask] = 1 / num_stks / curr_px[
                                    pos_mask] * np.sign(predictions[pos_mask,
                                                                    i, 0])

                        urpl = np.sum(pos * (curr_px - pos_px))
                        nlv = cash + urpl

                        eq[data_idx] = nlv

                    losses[b] = loss
                    curr_progress = progress.print_progress(
                        curr_progress, b, tr_batch_num)

                progress.print_progess_end()
                avg_loss = np.mean(np.sqrt(losses))
                return avg_loss

            if get_config().RESET_HIDDEN_STATE_FREQ == 0:
                tr_avg_loss = eval()
            else:
                tr_avg_loss = eval_with_state_reset()

            print("Train loss: %.4f%%" % (tr_avg_loss * 100))

            if get_config().TEST:
                print("Eval %d epoch on test set..." % epoch)
                batch_num = tst_batch_num
                ds_size = tr_ds_sz
                input = tst_input
                labels = tst_labels
                px = tst_px
                mask = tst_tradeable_mask
                port_mask = tst_port_mask
                eq = tst_eq
                beg_data_idx = tst_beg_data_idx
                raw_dates = tst_raw_dates
                pred_hist = tst_pred
                state = None

                if get_config().RESET_HIDDEN_STATE_FREQ == 0:
                    tst_avg_loss = eval()
                else:
                    tst_avg_loss = eval_with_state_reset()

                print("Test loss: %.4f%%" % (tst_avg_loss * 100))

            if not is_train():
                dt = build_time_axis(tr_raw_dates)

                if not get_config().HIDE_PLOTS:
                    plot_eq('Train',
                            get_config().TRAIN_BEG,
                            get_config().TRAIN_END, dt, tr_eq)

                result = pd.DataFrame(columns=('date', 'ticker', 'prediction'))
                for i in range(tr_raw_dates.shape[0]):
                    date = dt[i]
                    for j in range(total_tickers):
                        ticker = env._idx_to_ticker(j)
                        prediction = tr_pred[j, i]

                        row = [date, ticker, prediction]
                        result.loc[i * total_tickers + j] = row
                result.to_csv(get_config().TRAIN_PRED_PATH, index=False)

                if get_config().TEST:
                    dt = build_time_axis(tst_raw_dates)
                    if not get_config().HIDE_PLOTS:
                        plot_eq('Test',
                                get_config().TEST_BEG,
                                get_config().TEST_END, dt, tst_eq)

                if not get_config().HIDE_PLOTS:
                    show_plots()
                break

            if is_train() and epoch <= get_config().MAX_EPOCH:

                # plot and save graphs
                dt = build_time_axis(tr_raw_dates)
                fig, tr_dd, tr_sharpe, tr_y_avg = plot_eq(
                    'Train',
                    get_config().TRAIN_BEG,
                    get_config().TRAIN_END, dt, tr_eq)
                fig.savefig('%s/%04d.png' %
                            (get_config().TRAIN_FIG_PATH, epoch))
                plt.close(fig)

                if get_config().TEST:
                    dt = build_time_axis(tst_raw_dates)
                    fig, tst_dd, tst_sharpe, tst_y_avg = plot_eq(
                        'Test',
                        get_config().TEST_BEG,
                        get_config().TEST_END, dt, tst_eq)
                    fig.savefig('%s/%04d.png' %
                                (get_config().TEST_FIG_PATH, epoch))
                    plt.close(fig)
                else:
                    tst_avg_loss = 0
                    tst_dd = 0
                    tst_sharpe = 0
                    tst_y_avg = 0

                writer.writerow((
                    epoch,
                    tr_avg_loss,
                    tr_dd,
                    tr_sharpe,
                    tr_y_avg,
                    tst_avg_loss,
                    tst_dd,
                    tst_sharpe,
                    tst_y_avg,
                ))

                f.flush()

                if epoch == get_config().MAX_EPOCH:
                    tr_df = pd.DataFrame({'date': dt, 'capital': tr_eq[:]})
                    tr_df.to_csv(get_config().TRAIN_EQ_PATH, index=False)
                    if get_config().TEST:
                        tst_df = pd.DataFrame({
                            'date': dt,
                            'capital': tst_eq[:]
                        })
                        tst_df.to_csv(get_config().TEST_EQ_PATH, index=False)

                epoch += 1
                if epoch > get_config().MAX_EPOCH:
                    break
                print("Training %d epoch..." % epoch)

                curr_progress = 0
                state = None
                for b in range(tr_batch_num):
                    if state is None:
                        state = net.zero_state(total_tickers)

                    b_b_i, b_e_i = get_batch_range(b)
                    _input = tr_input[:, b_b_i:b_e_i, :]
                    _labels = tr_labels[:, b_b_i:b_e_i]
                    _mask = tr_tradeable_mask[:, b_b_i:b_e_i]

                    if get_config().FIT_FRI_PREDICTION_ONLY:
                        batch_week_days = tr_week_days[b_b_i:b_e_i]
                        batch_mon_mask = batch_week_days == 5
                        for i in range(_mask.shape[0]):
                            _mask[i, :] = _mask[i, :] & batch_mon_mask

                    state, loss, predictions = net.fit(
                        state, _input, _labels, _mask.astype(np.float32))

                    curr_progress = progress.print_progress(
                        curr_progress, b, tr_batch_num)

                progress.print_progess_end()

                net.save_weights(get_config().WEIGHTS_PATH, epoch)
Exemplo n.º 6
0
            def eval():
                nonlocal raw_dates, beg_data_idx, ds_size, batch_num, input, labels, px, mask, port_mask, eq, pred_hist, state
                curr_progress = 0
                cash = 1
                pos = np.zeros((total_tickers))
                pos_px = np.zeros((total_tickers))
                losses = np.zeros((batch_num))

                for b in range(batch_num):
                    if state is None:
                        state = net.zero_state(total_tickers)

                    b_b_i, b_e_i = get_batch_range(b)
                    _input = input[:, b_b_i:b_e_i, :]
                    _labels = labels[:, b_b_i:b_e_i]
                    _mask = mask[:, b_b_i:b_e_i]

                    state, loss, predictions = net.eval(
                        state, _input, _labels, _mask.astype(np.float32))
                    pred_hist[:, b_b_i:b_e_i] = predictions[:, :, 0]

                    for i in range(predictions.shape[1]):
                        data_idx = b * get_config().BPTT_STEPS + i
                        curr_px = px[:, data_idx]
                        global_data_idx = beg_data_idx + data_idx

                        date = datetime.datetime.fromtimestamp(
                            raw_dates[data_idx]).date()

                        open_pos = False
                        close_pos = False
                        if get_config().REBALANCE_FRI:
                            if date.isoweekday() == 5:
                                open_pos = True
                            if date.isoweekday() == 5:
                                close_pos = True
                        else:
                            if data_idx % get_config().REBALANCE_FREQ == 0:
                                close_pos = True
                                open_pos = True

                        if close_pos:
                            rpl = np.sum(pos * (curr_px - pos_px))
                            cash += rpl
                            pos[:] = 0
                        if open_pos:
                            pos_px = curr_px
                            pos_mask = port_mask[:, data_idx]
                            num_stks = np.sum(pos_mask)
                            if get_config().CAPM:
                                exp, cov = env.get_exp_and_cov(
                                    pos_mask, global_data_idx -
                                    get_config().COVARIANCE_LENGTH + 1,
                                    global_data_idx)
                                exp = get_config().REBALANCE_FREQ * exp
                                cov = get_config().REBALANCE_FREQ * get_config(
                                ).REBALANCE_FREQ * cov
                                if get_config().CAPM_USE_NET_PREDICTIONS:
                                    exp = predictions[:, i, 0][pos_mask]

                                capm = Capm(num_stks)
                                capm.init()

                                best_sharpe = None
                                best_weights = None
                                best_constriant = None
                                while i <= 10000:
                                    w, sharpe, constraint = capm.get_params(
                                        exp, cov)
                                    # print("Iteration: %d Sharpe: %.2f Constraint: %.6f" % (i, sharpe, constraint))
                                    if w is None:
                                        break
                                    if best_sharpe is None or sharpe >= best_sharpe:
                                        best_weights = w
                                        best_sharpe = sharpe
                                        best_constriant = constraint
                                    capm.fit(exp, cov)
                                    capm.rescale_weights()

                                    i += 1
                                date = datetime.datetime.fromtimestamp(
                                    raw_dates[data_idx]).date()
                                print(
                                    "Date: %s sharpe: %.2f constraint: %.6f" %
                                    (date.strftime('%Y-%m-%d'), best_sharpe,
                                     best_constriant))

                                pos[pos_mask] = best_weights / curr_px[pos_mask]
                            else:
                                pos[pos_mask] = 1 / num_stks / curr_px[
                                    pos_mask] * np.sign(predictions[pos_mask,
                                                                    i, 0])

                        urpl = np.sum(pos * (curr_px - pos_px))
                        nlv = cash + urpl

                        eq[data_idx] = nlv

                    losses[b] = loss
                    curr_progress = progress.print_progress(
                        curr_progress, b, tr_batch_num)

                progress.print_progess_end()
                avg_loss = np.mean(np.sqrt(losses))
                return avg_loss
Exemplo n.º 7
0
def train(net):
    create_folders()

    env = Env()

    if not os.path.exists(get_config().TRAIN_STAT_PATH):
        with open(get_config().TRAIN_STAT_PATH, 'a', newline='') as f:
            writer = csv.writer(f)
            writer.writerow((
                'epoch',
                'train loss',
                'test loss',
            ))

    tickers_to_plot = [get_config().TICKER]
    total = len(tickers_to_plot)
    ticker_to_plot_idxs = np.zeros((total), dtype=np.int32)

    for i in range(total):
        ticker_to_plot_idxs[i] = env._ticker_to_idx(tickers_to_plot[i])

    def open_train_stat_file():
        return open(get_config().TRAIN_STAT_PATH, 'a', newline='')

    def is_train():
        return get_config().MODE == Mode.TRAIN

    with open_train_stat_file() if is_train() else dummy_context_mgr() as f:
        if is_train():
            writer = csv.writer(f)
        if get_config().EPOCH_WEIGHTS_TO_LOAD != 0:
            net.load_weights(get_config().WEIGHTS_PATH,
                             get_config().EPOCH_WEIGHTS_TO_LOAD)
            epoch = get_config().EPOCH_WEIGHTS_TO_LOAD
            if is_train():
                epoch += 1
        else:
            net.init()
            epoch = 0

        def get_net_data(BEG, END):
            beg_idx = env.get_next_trading_day_data_idx(BEG)
            end_idx = env.get_prev_trading_day_data_idx(END)

            raw_dates = env.get_raw_dates(beg_idx, end_idx)
            input = env.get_input(beg_idx, end_idx)
            px = env.get_adj_close_px(beg_idx, end_idx)
            px_pred_hor = env.get_adj_close_px(
                beg_idx + get_config().PRED_HORIZON,
                end_idx + get_config().PRED_HORIZON)
            px_t1 = env.get_adj_close_px(beg_idx + 1, end_idx + 1)
            ds_sz = px_pred_hor.shape[1]
            raw_dates = raw_dates[:ds_sz]
            input = input[:, :ds_sz, :]
            px = px[:, :ds_sz]
            px_t1 = px_t1[:, :ds_sz]
            labels = (px_pred_hor - px) / px
            rets = (px_t1 - px) / px
            batch_num = get_batches_num(ds_sz, get_config().BPTT_STEPS)

            return ds_sz, batch_num, raw_dates, px, input, labels, rets

        tr_ds_sz, tr_batch_num, tr_raw_dates, tr_px, tr_input, tr_labels, tr_rets = get_net_data(
            get_config().TRAIN_BEG,
            get_config().TRAIN_END)

        tst_ds_sz, tst_batch_num, tst_raw_dates, tst_px, tst_input, tst_labels, tst_rets = get_net_data(
            get_config().TEST_BEG,
            get_config().TEST_END)

        if get_config().DRAW_PREDICTIONS:
            tr_pred_px_series = []
            tr_pred_dt_series = []
            tst_pred_px_series = []
            tst_pred_dt_series = []

        tr_eq = np.zeros((total, tr_ds_sz))
        tst_eq = np.zeros((total, tst_ds_sz))

        def get_batch_input_and_lables(input, labels, b):
            b_i = b * get_config().BPTT_STEPS
            e_i = (b + 1) * get_config().BPTT_STEPS
            return input[:, b_i:e_i, :], labels[:, b_i:e_i]

        def predict_price_series(px, raw_dates, predictions, b, pred_px,
                                 pred_dt, pred_px_series, pred_dt_series,
                                 curr_pred_px):
            for i in range(predictions.shape[1]):

                data_idx = b * get_config().BPTT_STEPS + i
                serie_idx = data_idx % get_config().RESET_PRED_PX_EACH_N_DAYS

                if serie_idx == 0:
                    # finish old serie is exists
                    if pred_px is not None:
                        pred_px[ticker_to_plot_idxs,
                                get_config().
                                RESET_PRED_PX_EACH_N_DAYS] = curr_pred_px
                        pred_dt[get_config(
                        ).RESET_PRED_PX_EACH_N_DAYS] = date_from_timestamp(
                            raw_dates[data_idx])

                        pred_px_series.append(pred_px)
                        pred_dt_series.append(pred_dt)
                    # reset price
                    curr_pred_px = px[ticker_to_plot_idxs, data_idx]
                    # create new serie
                    pred_px = np.zeros(
                        (total, get_config().RESET_PRED_PX_EACH_N_DAYS + 1))
                    pred_dt = [None
                               ] * (get_config().RESET_PRED_PX_EACH_N_DAYS + 1)

                # fill values
                pred_px[ticker_to_plot_idxs, serie_idx] = curr_pred_px
                pred_dt[serie_idx] = date_from_timestamp(raw_dates[data_idx])

                # update pred px
                curr_pred_px += (predictions[ticker_to_plot_idxs, i, 0] /
                                 get_config().PRED_HORIZON) * curr_pred_px

            return curr_pred_px, pred_px, pred_dt

        def fill_eq(eq, rets, raw_dates):
            nonlocal capital, bet, position
            if capital is None:
                capital = np.ones(total, dtype=np.float32)
            if bet is None:
                bet = np.zeros(total, dtype=np.float32)
            if position is None:
                position = 0

            for i in range(predictions.shape[1]):
                data_idx = b * get_config().BPTT_STEPS + i

                date = date_from_timestamp(raw_dates[data_idx])
                if date.year == 2012 and date.month == 12 and date.day == 31:
                    _debug = 0

                if data_idx % get_config().REBALANCE_FREQ == 0:
                    position = np.sign(predictions[ticker_to_plot_idxs, i, 0])
                    capital += bet
                    bet = np.ones(total)
                    capital -= bet
                eq[ticker_to_plot_idxs, data_idx] = (capital + bet)

                bet += bet * rets[ticker_to_plot_idxs, data_idx] * position

        while epoch <= get_config().MAX_EPOCH:

            print("Eval %d epoch on train set..." % epoch)
            curr_progress = 0
            state = None
            losses = np.zeros((tr_batch_num))
            curr_pred_px = None
            pred_px = None
            pred_dt = None

            capital = None
            bet = None
            position = None
            for b in range(tr_batch_num):
                if state is None:
                    state = net.zero_state(len(env.tickers))

                input, labels = get_batch_input_and_lables(
                    tr_input, tr_labels, b)
                state, loss, predictions = net.eval(state, input, labels)

                if get_config().DRAW_PREDICTIONS:
                    curr_pred_px, pred_px, pred_dt = predict_price_series(
                        tr_px, tr_raw_dates, predictions, b, pred_px, pred_dt,
                        tr_pred_px_series, tr_pred_dt_series, curr_pred_px)
                fill_eq(tr_eq, tr_rets, tr_raw_dates)

                losses[b] = loss
                curr_progress = progress.print_progress(
                    curr_progress, b, tr_batch_num)

            progress.print_progess_end()
            train_avg_loss = np.mean(np.sqrt(losses))
            print("Train loss: %.4f%%" % (train_avg_loss * 100))

            print("Eval %d epoch on test set..." % epoch)
            curr_progress = 0
            state = None
            losses = np.zeros((tst_batch_num))
            curr_pred_px = None
            pred_px = None
            pred_dt = None

            capital = None
            bet = None
            position = None
            for b in range(tst_batch_num):
                if state is None:
                    state = net.zero_state(len(env.tickers))

                input, labels = get_batch_input_and_lables(
                    tst_input, tst_labels, b)
                state, loss, predictions = net.eval(state, input, labels)

                if get_config().DRAW_PREDICTIONS:
                    curr_pred_px, pred_px, pred_dt = predict_price_series(
                        tst_px, tst_raw_dates, predictions, b, pred_px,
                        pred_dt, tst_pred_px_series, tst_pred_dt_series,
                        curr_pred_px)
                fill_eq(tst_eq, tst_rets, tst_raw_dates)

                losses[b] = loss
                curr_progress = progress.print_progress(
                    curr_progress, b, tst_batch_num)

            progress.print_progess_end()
            tst_avg_loss = np.mean(np.sqrt(losses))
            print("Test loss: %.4f%%" % (tst_avg_loss * 100))

            # draw plots in test mode and break
            if not is_train():
                dt = build_time_axis(tr_raw_dates)

                if get_config().DRAW_PREDICTIONS:
                    plot_prediction('Train', dt, tr_px, tr_pred_px_series,
                                    tr_pred_dt_series)
                plot_eq('Train',
                        get_config().TRAIN_BEG,
                        get_config().TRAIN_END, dt, tr_eq)

                dt = build_time_axis(tst_raw_dates)
                if get_config().DRAW_PREDICTIONS:
                    plot_prediction('Test', dt, tst_px, tst_pred_px_series,
                                    tst_pred_dt_series)
                plot_eq('Test',
                        get_config().TEST_BEG,
                        get_config().TEST_END, dt, tst_eq)

                show_plots()
                break

            # train
            if is_train() and epoch <= get_config().MAX_EPOCH:
                # save train progress
                writer.writerow((epoch, train_avg_loss, tst_avg_loss))
                f.flush()
                # plot and save graphs
                dt = build_time_axis(tr_raw_dates)
                fig = plot_eq('Train',
                              get_config().TRAIN_BEG,
                              get_config().TRAIN_END, dt, tr_eq)
                fig.savefig('%s/%04d.png' %
                            (get_config().TRAIN_FIG_PATH, epoch))
                plt.close(fig)
                if epoch == get_config().MAX_EPOCH:
                    tr_df = pd.DataFrame({'date': dt, 'capital': tr_eq[0, :]})
                    tr_df.to_csv(get_config().TRAIN_EQ_PATH, index=False)

                dt = build_time_axis(tst_raw_dates)
                fig = plot_eq('Test',
                              get_config().TEST_BEG,
                              get_config().TEST_END, dt, tst_eq)
                fig.savefig('%s/%04d.png' %
                            (get_config().TEST_FIG_PATH, epoch))
                plt.close(fig)
                if epoch == get_config().MAX_EPOCH:
                    tr_df = pd.DataFrame({'date': dt, 'capital': tst_eq[0, :]})
                    tr_df.to_csv(get_config().TEST_EQ_PATH, index=False)

                epoch += 1
                if epoch > get_config().MAX_EPOCH:
                    break
                print("Training %d epoch..." % epoch)

                curr_progress = 0
                state = None
                for b in range(tr_batch_num):
                    if state is None:
                        state = net.zero_state(len(env.tickers))

                    input, labels = get_batch_input_and_lables(
                        tr_input, tr_labels, b)
                    state, loss, predictions = net.fit(state, input, labels)

                    curr_progress = progress.print_progress(
                        curr_progress, b, tr_batch_num)

                progress.print_progess_end()

                net.save_weights(get_config().WEIGHTS_PATH, epoch)
Exemplo n.º 8
0
def calc_classes_and_decisions(data_set_records, total_weeks, prob_l):
    c_l = np.zeros((data_set_records), dtype=np.bool)
    c_s = np.zeros((data_set_records), dtype=np.bool)

    s_l = np.zeros((data_set_records), dtype=np.bool)
    s_s = np.zeros((data_set_records), dtype=np.bool)

    model_no_sl_hpr = np.zeros((total_weeks))
    min_w_eod_hpr_no_sl = np.zeros((total_weeks))
    min_w_lb_hpr_no_sl = np.zeros((total_weeks))
    l_port_no_sl = np.empty((total_weeks), dtype=np.object)
    s_port_no_sl = np.empty((total_weeks), dtype=np.object)
    l_stops_no_sl = np.zeros((total_weeks))
    s_stops_no_sl = np.zeros((total_weeks))
    min_to = np.zeros((total_weeks))
    avg_to = np.zeros((total_weeks))
    longs = np.zeros((total_weeks))
    shorts = np.zeros((total_weeks))
    selection = np.zeros((total_weeks))

    model_eod_sl_hpr = np.zeros((total_weeks))
    min_w_eod_hpr_eod_sl = np.zeros((total_weeks))
    min_w_lb_hpr_eod_sl = np.zeros((total_weeks))
    l_port_eod_sl = np.empty((total_weeks), dtype=np.object)
    s_port_eod_sl = np.empty((total_weeks), dtype=np.object)
    l_stops_eod_sl = np.zeros((total_weeks))
    s_stops_eod_sl = np.zeros((total_weeks))

    model_lb_sl_hpr = np.zeros((total_weeks))
    min_w_eod_hpr_lb_sl = np.zeros((total_weeks))
    min_w_lb_hpr_lb_sl = np.zeros((total_weeks))
    l_port_lb_sl = np.empty((total_weeks), dtype=np.object)
    s_port_lb_sl = np.empty((total_weeks), dtype=np.object)
    l_stops_lb_sl = np.zeros((total_weeks))
    s_stops_lb_sl = np.zeros((total_weeks))

    model_s_sl_hpr = np.zeros((total_weeks))
    min_w_eod_hpr_s_sl = np.zeros((total_weeks))
    min_w_lb_hpr_s_sl = np.zeros((total_weeks))
    l_port_s_sl = np.empty((total_weeks), dtype=np.object)
    s_port_s_sl = np.empty((total_weeks), dtype=np.object)
    l_stops_s_sl = np.zeros((total_weeks))
    s_stops_s_sl = np.zeros((total_weeks))

    print('Calculating...')
    curr_progress = 0
    for i in range(total_weeks):
        curr_progress = progress.print_progress(curr_progress, i, total_weeks)
        w_i = i
        beg = w_data_index[w_i]
        end = beg + w_num_stocks[w_i]

        _prob_l = prob_l[beg:end]

        prob_median = np.median(_prob_l)
        prob_median = 0.5
        _s_c_l = c_l[beg:end]
        _s_c_s = c_s[beg:end]
        pred_long_cond = _prob_l >= prob_median
        _s_c_l |= pred_long_cond
        _s_c_s |= ~pred_long_cond

        _s_s_l = s_l[beg:end]
        _s_s_s = s_s[beg:end]

        _int_r = s_int_r[beg:end]

        # # third variant: no net
        # _int_r_sorted = np.sort(_int_r)
        # _sel_stks = min(get_config().SLCT_VAL, _int_r_sorted.shape[0])
        # if _sel_stks == 0:
        #     continue
        # _l_b = _int_r_sorted[_sel_stks - 1]
        # _s_b = _int_r_sorted[-_sel_stks]
        # sel_l_cond = ~_s_s_l
        # sel_l_cond &= _int_r <= _l_b
        # sel_s_cond = ~_s_s_s
        # sel_s_cond &= _int_r >= _s_b

        # second variant - metric
        # by yield
        _metric = np.sign(_prob_l - prob_median) * _int_r
        # by prob * yield
        # _metric = (_prob_l - prob_median) * _int_r
        _metric_l = _metric[pred_long_cond]
        _metric_s = _metric[~pred_long_cond]
        _metric_sorted_l = np.sort(_metric_l)
        _metric_sorted_s = np.sort(_metric_s)

        MOD = False
        if MOD:
            _metric_sorted_l = _metric_sorted_l[_metric_sorted_l >= 0]
            _metric_sorted_s = _metric_sorted_s[_metric_sorted_s >= 0]
            _sel_stks = min(get_config().SLCT_VAL, _metric_sorted_l.shape[0])
            if _sel_stks == 0:
                continue
            _l_b = _metric_sorted_l[_sel_stks - 1]
            _sel_stks = min(get_config().SLCT_VAL, _metric_sorted_s.shape[0])
            if _sel_stks == 0:
                continue
            _s_b = _metric_sorted_s[_sel_stks - 1]

            sel_l_cond = _s_s_l
            sel_l_cond |= pred_long_cond
            sel_l_cond &= (_metric <= _l_b) & (_metric >= 0)

            sel_s_cond = _s_s_s
            sel_s_cond |= ~pred_long_cond
            sel_s_cond &= (_metric <= _s_b) & (_metric >= 0)
        else:
            _sel_stks = min(get_config().SLCT_VAL, _metric_sorted_l.shape[0])
            _l_b = _metric_sorted_l[_sel_stks - 1]
            _sel_stks = min(get_config().SLCT_VAL, _metric_sorted_s.shape[0])
            _s_b = _metric_sorted_s[_sel_stks - 1]

            sel_l_cond = _s_s_l
            sel_l_cond |= pred_long_cond
            sel_l_cond &= (_metric <= _l_b)

            sel_s_cond = _s_s_s
            sel_s_cond |= ~pred_long_cond
            sel_s_cond &= (_metric <= _s_b)

        # # initial variant
        # if get_config().SLCT_TYPE == SelectionType.PCT:
        #     top_bound = np.percentile(_prob_l, 100 - get_config().SLCT_VAL)
        #     bottom_bound = np.percentile(_prob_l, get_config().SLCT_VAL)
        # else:
        #     _prob_l_sorted = np.sort(_prob_l)
        #     _sel_stks = min(get_config().SLCT_VAL, _prob_l_sorted.shape[0])
        #     bottom_bound = _prob_l_sorted[_sel_stks - 1]
        #     top_bound = _prob_l_sorted[-_sel_stks]
        #
        # long_cond = _prob_l >= top_bound
        # short_cond = _prob_l <= bottom_bound
        # _s_s_l |= long_cond
        # _s_s_s |= short_cond
        #
        # l_s_int_r = _int_r[_s_s_l]
        # s_s_int_r = _int_r[_s_s_s]
        #
        # if get_config().SLCT_ALG == SelectionAlgo.CONFIRMED:
        #     l_int_r_t_b = np.max(l_s_int_r)
        #     l_int_r_b_b = np.percentile(l_s_int_r, 100 - get_config().SLCT_PCT)
        # elif get_config().SLCT_ALG == SelectionAlgo.NON_CONFIRMED:
        #     l_int_r_t_b = np.percentile(l_s_int_r, get_config().SLCT_PCT)
        #     l_int_r_b_b = np.min(l_s_int_r)
        # elif get_config().SLCT_ALG == SelectionAlgo.MIDDLE:
        #     l_int_r_t_b = np.percentile(l_s_int_r, 100 - get_config().SLCT_PCT / 2)
        #     l_int_r_b_b = np.percentile(l_s_int_r, get_config().SLCT_PCT / 2)
        #
        # if get_config().SLCT_ALG == SelectionAlgo.CONFIRMED:
        #     s_int_r_t_b = np.percentile(s_s_int_r, get_config().SLCT_PCT)
        #     s_int_r_b_b = np.min(s_s_int_r)
        # elif get_config().SLCT_ALG == SelectionAlgo.NON_CONFIRMED:
        #     s_int_r_t_b = np.max(s_s_int_r)
        #     s_int_r_b_b = np.percentile(s_s_int_r, 100 - get_config().SLCT_PCT)
        # elif get_config().SLCT_ALG == SelectionAlgo.MIDDLE:
        #     s_int_r_t_b = np.percentile(s_s_int_r, 100 - get_config().SLCT_PCT / 2)
        #     s_int_r_b_b = np.percentile(s_s_int_r, get_config().SLCT_PCT / 2)
        #
        # sel_l_cond = _s_s_l
        # sel_l_cond &= _int_r >= l_int_r_b_b
        # sel_l_cond &= _int_r <= l_int_r_t_b
        #
        # sel_s_cond = _s_s_s
        # sel_s_cond &= _int_r <= s_int_r_t_b
        # sel_s_cond &= _int_r >= s_int_r_b_b

        # select long and short stocks in portfolio
        _stocks = stocks[beg:end]
        _l_stocks = _stocks[sel_l_cond]
        _s_stocks = _stocks[sel_s_cond]

        _t_w_eod_num = t_w_eod_num[w_i]
        _t_w_eods = t_w_eods[w_i, :_t_w_eod_num]
        # select eod hpr by stock during the week
        _t_w_s_s_hpr = t_w_s_hpr[beg:end, :_t_w_eod_num]
        _t_w_l_s_hpr = _t_w_s_s_hpr[sel_l_cond, :]
        _t_w_s_s_hpr = _t_w_s_s_hpr[sel_s_cond, :]
        # select lb hpr by stock during the week
        _t_w_s_s_h_hpr = t_w_s_h_hpr[beg:end, :_t_w_eod_num]
        _t_w_s_s_l_hpr = t_w_s_l_hpr[beg:end, :_t_w_eod_num]
        _t_w_l_s_lb_hpr = _t_w_s_s_l_hpr[sel_l_cond, :]
        _t_w_s_s_lb_hpr = _t_w_s_s_h_hpr[sel_s_cond, :]
        # select hpr by stock during the week using open px
        _t_w_ss_o_hpr = t_w_s_o_hpr[beg:end, :_t_w_eod_num]
        _t_w_l_s_o_hpr = _t_w_ss_o_hpr[sel_l_cond, :]
        _t_w_s_s_o_hpr = _t_w_ss_o_hpr[sel_s_cond, :]

        # calc portfolio metrics
        _avg_s_to = avg_s_to[beg:end]
        _l_avg_s_to = _avg_s_to[sel_l_cond]
        _s_avg_s_to = _avg_s_to[sel_s_cond]
        _min_to = min(np.min(_l_avg_s_to), np.min(_s_avg_s_to))
        _avg_to = (np.mean(_l_avg_s_to) + np.mean(_s_avg_s_to)) / 2
        _longs = _l_stocks.shape[0]
        _shorts = _s_stocks.shape[0]
        min_to[w_i] = _min_to
        avg_to[w_i] = _avg_to
        longs[w_i] = _longs
        shorts[w_i] = _shorts
        selection[w_i] = _prob_l.shape[0]

        def calc_params(override_ext_lb_hpr, _t_w_eods, t_w_l_s_hpr,
                        t_w_s_s_hpr, t_w_l_s_lb_hpr, t_w_s_s_lb_hpr,
                        _s_l_ext_idx, _s_s_ext_idx, _s_l_stop, _s_s_stop,
                        _s_l_ext_hpr, _s_s_ext_hpr):
            # create arrays copy
            _t_w_l_s_hpr = np.copy(t_w_l_s_hpr)
            _t_w_s_s_hpr = np.copy(t_w_s_s_hpr)
            _t_w_l_s_lb_hpr = np.copy(t_w_l_s_lb_hpr)
            _t_w_s_s_lb_hpr = np.copy(t_w_s_s_lb_hpr)
            # fill proper array values
            for idx in range(_s_l_ext_idx.shape[0]):
                _ext_idx = _s_l_ext_idx[idx]
                _ext_hpr = _s_l_ext_hpr[idx]
                _t_w_l_s_hpr[idx, _ext_idx:] = _ext_hpr
                _t_w_l_s_lb_hpr[idx,
                                _ext_idx if override_ext_lb_hpr else _ext_idx +
                                1:] = _ext_hpr
            for idx in range(_s_s_ext_idx.shape[0]):
                _ext_idx = _s_s_ext_idx[idx]
                _ext_hpr = _s_s_ext_hpr[idx]
                _t_w_s_s_hpr[idx, _ext_idx:] = _ext_hpr
                _t_w_s_s_lb_hpr[idx,
                                _ext_idx if override_ext_lb_hpr else _ext_idx +
                                1:] = _ext_hpr

            # calc portfolio hpr
            _l_s_hpr = _t_w_l_s_hpr[:, -1]
            _s_s_hpr = _t_w_s_s_hpr[:, -1]

            _l_hpr = np.mean(_l_s_hpr)
            _s_hpr = np.mean(_s_s_hpr)

            _w_hpr = np.zeros(_l_hpr.shape)
            _w_hpr = (_l_hpr - get_config().SLIPPAGE) * get_config(
            ).LONG_ALLOC_PCT - (
                _s_hpr - get_config().SLIPPAGE) * get_config().SHORT_ALLOC_PCT
            # if get_config().LONG_LEG:
            #     _w_hpr += _l_hpr
            # if get_config().SHORT_LEG:
            #     _w_hpr -= _s_hpr
            # if get_config().LONG_LEG and get_config().SHORT_LEG:
            #     _w_hpr /= 2
            # _w_hpr = (_l_hpr - _s_hpr) / 2

            # calc min w eod hpr
            _t_w_l_s_hpr_mean = np.mean(_t_w_l_s_hpr, axis=0)
            _t_w_s_s_hpr_mean = np.mean(_t_w_s_s_hpr, axis=0)

            _t_w_hpr = np.zeros(_t_w_l_s_hpr_mean.shape)
            _t_w_hpr = _t_w_l_s_hpr_mean * get_config(
            ).LONG_ALLOC_PCT - _t_w_s_s_hpr_mean * get_config().SHORT_ALLOC_PCT
            # if get_config().LONG_LEG:
            #     _t_w_hpr += _t_w_l_s_hpr_mean
            # if get_config().SHORT_LEG:
            #     _t_w_hpr -= _t_w_s_s_hpr_mean
            # if get_config().LONG_LEG and get_config().SHORT_LEG:
            #     _t_w_hpr /= 2
            # _t_w_hpr = (_t_w_l_s_hpr_mean - _t_w_s_s_hpr_mean) / 2
            _min_w_eod_hpr = np.min(_t_w_hpr)

            # calc lower bound weekly min
            _t_w_l_s_lb_hpr_mean = np.mean(_t_w_l_s_lb_hpr, axis=0)
            _t_w_s_s_lb_hpr_mean = np.mean(_t_w_s_s_lb_hpr, axis=0)

            _t_w_lb_hpr = np.zeros(_t_w_l_s_lb_hpr_mean.shape)
            _t_w_lb_hpr = _t_w_l_s_lb_hpr_mean * get_config(
            ).LONG_ALLOC_PCT - _t_w_s_s_lb_hpr_mean * get_config(
            ).SHORT_ALLOC_PCT
            # if get_config().LONG_LEG:
            #     _t_w_lb_hpr += _t_w_l_s_lb_hpr_mean
            # if get_config().SHORT_LEG:
            #     _t_w_lb_hpr -= _t_w_s_s_lb_hpr_mean
            # if get_config().LONG_LEG and get_config().SHORT_LEG:
            #     _t_w_lb_hpr /= 2
            # _t_w_lb_hpr = (_t_w_l_s_lb_hpr_mean - _t_w_s_s_lb_hpr_mean) / 2
            _min_w_lb_hpr = np.min(_t_w_lb_hpr)

            # calc portfolio string
            _longs_df = None
            _shorts_df = None
            if not get_config().GRID_SEARCH and get_config().PRINT_PORTFOLIO:
                _l_prob_l = _prob_l[sel_l_cond]
                _s_prob_l = _prob_l[sel_s_cond]

                # _longs_df = pd.DataFrame(index=np.arange(0, len(_l_stocks)), columns=('ticker', 'ret', 'ext', 'awto','p'))
                # _shorts_df = pd.DataFrame(index=np.arange(0, len(_s_stocks)), columns=('ticker', 'ret', 'ext', 'awto','p'))
                _longs_df = np.empty((_l_stocks.shape[0], 5), dtype=np.object)
                _shorts_df = np.empty((_s_stocks.shape[0], 5), dtype=np.object)

                idx = 0
                for _stock_idx in _l_stocks:
                    _dt_ext = datetime.datetime.fromtimestamp(
                        raw_dt[_t_w_eods[_s_l_ext_idx[idx]]])
                    _s_dt_ext = _dt_ext.strftime('%Y-%m-%d')

                    # _longs_df.loc[idx].ticker = tickers[_stock_idx]
                    # _longs_df.loc[idx].ret = _l_s_hpr[idx]
                    # _longs_df.loc[idx].ext = _s_dt_ext
                    # _longs_df.loc[idx].awto = _l_avg_s_to[idx]
                    # _longs_df.loc[idx].p = _l_prob_l[idx]
                    _longs_df[idx, 0] = tickers[_stock_idx]
                    _longs_df[idx, 1] = _l_s_hpr[idx]
                    _longs_df[idx, 2] = _s_dt_ext
                    _longs_df[idx, 3] = _l_avg_s_to[idx]
                    _longs_df[idx, 4] = _l_prob_l[idx]

                    idx += 1
                idx = 0
                for _stock_idx in _s_stocks:
                    _dt_ext = datetime.datetime.fromtimestamp(
                        raw_dt[_t_w_eods[_s_s_ext_idx[idx]]])
                    _s_dt_ext = _dt_ext.strftime('%Y-%m-%d')

                    # _shorts_df.loc[idx].ticker = tickers[_stock_idx]
                    # _shorts_df.loc[idx].ret = _s_s_hpr[idx]
                    # _shorts_df.loc[idx].ext = _s_dt_ext
                    # _shorts_df.loc[idx].awto = _s_avg_s_to[idx]
                    # _shorts_df.loc[idx].p = _s_prob_l[idx]

                    _shorts_df[idx, 0] = tickers[_stock_idx]
                    _shorts_df[idx, 1] = _s_s_hpr[idx]
                    _shorts_df[idx, 2] = _s_dt_ext
                    _shorts_df[idx, 3] = _s_avg_s_to[idx]
                    _shorts_df[idx, 4] = _s_prob_l[idx]

                    idx += 1

            # calc long and short stops
            _l_stops = np.sum(_s_l_stop)
            _s_stops = np.sum(_s_s_stop)

            return _l_hpr, _s_hpr, _w_hpr, _min_w_eod_hpr, _min_w_lb_hpr, _l_stops, _s_stops, _longs_df, _shorts_df, _min_to, _avg_to

        _default_ext_idx = _t_w_eod_num - 1

        # calc no sl model
        _s_l_ext_idx = np.full(_l_stocks.shape, _t_w_eod_num - 1)
        _s_s_ext_idx = np.full(_s_stocks.shape, _t_w_eod_num - 1)
        _s_l_stop = np.full(_l_stocks.shape, False)
        _s_s_stop = np.full(_l_stocks.shape, False)
        _s_l_ext_hpr = _t_w_l_s_hpr[:, _t_w_eod_num - 1]
        _s_s_ext_hpr = _t_w_s_s_hpr[:, _t_w_eod_num - 1]

        _l_hpr, _s_hpr, _w_hpr, _min_w_eod_hpr, _min_w_lb_hpr, _l_stops, _s_stops, _s_longs, _s_shorts, _min_to, _avg_to = calc_params(
            False, _t_w_eods, _t_w_l_s_hpr, _t_w_s_s_hpr, _t_w_l_s_lb_hpr,
            _t_w_s_s_lb_hpr, _s_l_ext_idx, _s_s_ext_idx, _s_l_stop, _s_s_stop,
            _s_l_ext_hpr, _s_s_ext_hpr)
        model_no_sl_hpr[w_i] = _w_hpr
        min_w_eod_hpr_no_sl[w_i] = _min_w_eod_hpr
        min_w_lb_hpr_no_sl[w_i] = _min_w_lb_hpr
        l_port_no_sl[w_i] = _s_longs
        s_port_no_sl[w_i] = _s_shorts
        l_stops_no_sl[w_i] = _l_stops
        s_stops_no_sl[w_i] = _s_stops

        # calc eod model
        _t_w_l_s_hpr_mean = np.mean(_t_w_l_s_hpr, axis=0)
        _t_w_s_s_hpr_mean = np.mean(_t_w_s_s_hpr, axis=0)
        _t_w_hpr = (_t_w_l_s_hpr_mean - _t_w_s_s_hpr_mean) / 2
        sl_idxs = np.nonzero(_t_w_hpr <= get_config().STOP_LOSS_HPR)
        _ext_idx = _default_ext_idx
        _stop = False
        if sl_idxs[0].shape[0] > 0:
            _ext_idx = sl_idxs[0][0]
            _stop = True

        _s_l_ext_idx = np.full(_l_stocks.shape, _ext_idx)
        _s_s_ext_idx = np.full(_s_stocks.shape, _ext_idx)
        _s_l_stop = np.full(_l_stocks.shape, _stop)
        _s_s_stop = np.full(_l_stocks.shape, _stop)
        _s_l_ext_hpr = _t_w_l_s_hpr[:, _ext_idx]
        _s_s_ext_hpr = _t_w_s_s_hpr[:, _ext_idx]

        _l_hpr, _s_hpr, _w_hpr, _min_w_eod_hpr, _min_w_lb_hpr, _l_stops, _s_stops, _s_longs, _s_shorts, _min_to, _avg_to = calc_params(
            False, _t_w_eods, _t_w_l_s_hpr, _t_w_s_s_hpr, _t_w_l_s_lb_hpr,
            _t_w_s_s_lb_hpr, _s_l_ext_idx, _s_s_ext_idx, _s_l_stop, _s_s_stop,
            _s_l_ext_hpr, _s_s_ext_hpr)

        model_eod_sl_hpr[w_i] = _w_hpr
        min_w_eod_hpr_eod_sl[w_i] = _min_w_eod_hpr
        min_w_lb_hpr_eod_sl[w_i] = _min_w_lb_hpr
        l_port_eod_sl[w_i] = _s_longs
        s_port_eod_sl[w_i] = _s_shorts
        l_stops_eod_sl[w_i] = _l_stops
        s_stops_eod_sl[w_i] = _s_stops

        # calc lower bound hpr
        _t_w_l_s_lb_hpr_mean = np.mean(_t_w_l_s_lb_hpr, axis=0)
        _t_w_s_s_lb_hpr_mean = np.mean(_t_w_s_s_lb_hpr, axis=0)
        _t_w_lb_hpr = (_t_w_l_s_lb_hpr_mean - _t_w_s_s_lb_hpr_mean) / 2

        _t_w_l_s_o_hpr_mean = np.mean(_t_w_l_s_o_hpr, axis=0)
        _t_w_s_s_o_hpr_mean = np.mean(_t_w_s_s_o_hpr, axis=0)
        _t_w_o_hpr = (_t_w_l_s_o_hpr_mean - _t_w_s_s_o_hpr_mean) / 2

        sl_idxs = np.nonzero(_t_w_lb_hpr <= get_config().STOP_LOSS_HPR)
        _ext_idx = _t_w_eod_num - 1
        _stop = False
        _stop_on_open = False
        if sl_idxs[0].shape[0] > 0:
            _ext_idx = sl_idxs[0][0]
            _stop = True
            if _t_w_o_hpr[_ext_idx] <= get_config().STOP_LOSS_HPR:
                _stop_on_open = True

        _s_l_ext_idx = np.full(_l_stocks.shape, _ext_idx)
        _s_s_ext_idx = np.full(_s_stocks.shape, _ext_idx)
        _s_l_stop = np.full(_l_stocks.shape, _stop)
        _s_s_stop = np.full(_s_stocks.shape, _stop)
        if _stop:
            if _stop_on_open:
                _s_l_ext_hpr = _t_w_l_s_o_hpr[:, _ext_idx]
                _s_s_ext_hpr = _t_w_s_s_o_hpr[:, _ext_idx]
            else:
                _s_l_ext_hpr = np.full(_l_stocks.shape,
                                       get_config().STOP_LOSS_HPR)
                _s_s_ext_hpr = np.full(_s_stocks.shape,
                                       -get_config().STOP_LOSS_HPR)
        else:
            _s_l_ext_hpr = _t_w_l_s_hpr[:, _ext_idx]
            _s_s_ext_hpr = _t_w_s_s_hpr[:, _ext_idx]

        _l_hpr, _s_hpr, _w_hpr, _min_w_eod_hpr, _min_w_lb_hpr, _l_stops, _s_stops, _s_longs, _s_shorts, _min_to, _avg_to = calc_params(
            True, _t_w_eods, _t_w_l_s_hpr, _t_w_s_s_hpr, _t_w_l_s_lb_hpr,
            _t_w_s_s_lb_hpr, _s_l_ext_idx, _s_s_ext_idx, _s_l_stop, _s_s_stop,
            _s_l_ext_hpr, _s_s_ext_hpr)

        model_lb_sl_hpr[w_i] = _w_hpr
        min_w_eod_hpr_lb_sl[w_i] = _min_w_eod_hpr
        min_w_lb_hpr_lb_sl[w_i] = _min_w_lb_hpr
        l_port_lb_sl[w_i] = _s_longs
        s_port_lb_sl[w_i] = _s_shorts
        l_stops_lb_sl[w_i] = _l_stops
        s_stops_lb_sl[w_i] = _s_stops

        # calc sl by stock model
        def first_true_idx_by_row(mask, default_idx):
            idxs = np.full(mask.shape[0], default_idx)
            for i, ele in enumerate(np.argmax(mask, axis=1)):
                if ele == 0 and mask[i][0] == 0:
                    idxs[i] = default_idx
                else:
                    idxs[i] = ele

            return idxs

        # _t_w_l_s_o_hpr = _t_w_s_s_o_hpr[sel_l_cond, :]
        # _t_w_s_s_o_hpr = _t_w_s_s_o_hpr[sel_s_cond, :]
        # long
        _s_l_stop_mask = _t_w_l_s_lb_hpr <= get_config().STOP_LOSS_HPR
        _s_l_stop = np.any(_s_l_stop_mask, axis=1)
        _s_l_ext_idx = first_true_idx_by_row(_s_l_stop_mask, _default_ext_idx)
        # by default ext hpr == hpr no sl
        _s_l_ext_hpr = _t_w_l_s_hpr[:, _default_ext_idx]
        # calc hpr for stocks with sl
        # exit idx for stocks with sl
        _s_l_sl_ext_idx = _s_l_ext_idx[_s_l_stop]
        # stocks with sl hpr by open px during the week
        _t_w_l_sl_s_o_hrp = _t_w_l_s_o_hpr[_s_l_stop, :]
        _aaa = _t_w_l_sl_s_o_hrp
        _xxx = np.arange(_aaa.shape[0])
        _yyy = _s_l_sl_ext_idx
        # hpr for stocks with sl by open px
        _l_s_sl_o_hrp = _aaa[_xxx, _yyy]
        # condition
        _use_o_px = _l_s_sl_o_hrp <= get_config().STOP_LOSS_HPR
        # stock with sl hpr
        _s_l_sl_hpr = np.where(_use_o_px, _l_s_sl_o_hrp,
                               get_config().STOP_LOSS_HPR)
        # override default hpr for stocks with sl
        _s_l_ext_hpr[_s_l_stop] = _s_l_sl_hpr

        # short
        _s_s_stop_mask = _t_w_s_s_lb_hpr >= -get_config().STOP_LOSS_HPR
        _s_s_stop = np.any(_s_s_stop_mask, axis=1)
        _s_s_ext_idx = first_true_idx_by_row(_s_s_stop_mask, _default_ext_idx)

        # by default ext hpr == hpr no sl
        _s_s_ext_hpr = _t_w_s_s_hpr[:, _default_ext_idx]
        # calc hpr for stocks with sl
        # exit idx for stocks with sl
        _s_s_sl_ext_idx = _s_s_ext_idx[_s_s_stop]
        # stocks with sl hpr by open px during the week
        _t_w_s_sl_s_o_hrp = _t_w_s_s_o_hpr[_s_s_stop, :]
        _aaa = _t_w_s_sl_s_o_hrp
        _xxx = np.arange(_aaa.shape[0])
        _yyy = _s_s_sl_ext_idx
        # hpr for stocks with sl by open px
        _s_s_sl_o_hrp = _aaa[_xxx, _yyy]
        # condition
        _use_o_px = _s_s_sl_o_hrp >= -get_config().STOP_LOSS_HPR
        # stock with sl hpr
        _s_s_sl_hpr = np.where(_use_o_px, _s_s_sl_o_hrp,
                               -get_config().STOP_LOSS_HPR)
        # override default hpr for stocks with sl
        _s_s_ext_hpr[_s_s_stop] = _s_s_sl_hpr

        _l_hpr, _s_hpr, _w_hpr, _min_w_eod_hpr, _min_w_lb_hpr, _l_stops, _s_stops, _s_longs, _s_shorts, _min_to, _avg_to = calc_params(
            True, _t_w_eods, _t_w_l_s_hpr, _t_w_s_s_hpr, _t_w_l_s_lb_hpr,
            _t_w_s_s_lb_hpr, _s_l_ext_idx, _s_s_ext_idx, _s_l_stop, _s_s_stop,
            _s_l_ext_hpr, _s_s_ext_hpr)
        model_s_sl_hpr[w_i] = _w_hpr
        min_w_eod_hpr_s_sl[w_i] = _min_w_eod_hpr
        min_w_lb_hpr_s_sl[w_i] = _min_w_lb_hpr
        l_port_s_sl[w_i] = _s_longs
        s_port_s_sl[w_i] = _s_shorts
        l_stops_s_sl[w_i] = _l_stops
        s_stops_s_sl[w_i] = _s_stops

        model_no_sl = (model_no_sl_hpr, min_w_eod_hpr_no_sl,
                       min_w_lb_hpr_no_sl, l_stops_no_sl, s_stops_no_sl,
                       l_port_no_sl, s_port_no_sl)
        model_eod_sl = (model_eod_sl_hpr, min_w_eod_hpr_eod_sl,
                        min_w_lb_hpr_eod_sl, l_stops_eod_sl, s_stops_eod_sl,
                        l_port_eod_sl, s_port_eod_sl)
        model_lb_sl = (model_lb_sl_hpr, min_w_eod_hpr_lb_sl,
                       min_w_lb_hpr_lb_sl, l_stops_lb_sl, s_stops_lb_sl,
                       l_port_lb_sl, s_port_lb_sl)
        model_s_sl = (model_s_sl_hpr, min_w_eod_hpr_s_sl, min_w_lb_hpr_s_sl,
                      l_stops_s_sl, s_stops_s_sl, l_port_s_sl, s_port_s_sl)

    progress.print_progess_end()

    return c_l, \
           c_s, \
           model_no_sl, \
           model_eod_sl, \
           model_lb_sl, \
           model_s_sl, \
           min_to, \
           avg_to, \
           longs, \
           shorts, \
           selection
Exemplo n.º 9
0
    s_hpr_model = append_data(s_hpr_model, _s_hpr_model)
    s_int_r = append_data(s_int_r, _s_int_r)
    c_l = append_data(c_l, _c_l)
    c_s = append_data(c_s, _c_s)
    w_data_index = append_data(w_data_index, make_array(data_set_records))
    w_num_stocks = append_data(w_num_stocks, make_array(num_stocks))
    w_enter_index = append_data(w_enter_index, make_array(enter_date_idx))
    w_exit_index = append_data(w_exit_index, make_array(exit_date_idx))

    update_indexes()

    # record counts
    data_set_records += num_stocks
    total_weeks += 1

progress.print_progess_end()

update_indexes()

train_rbm(dr, wr, tr_beg_idx, tr_end_idx)
train_ae(dr, wr, tr_beg_idx, tr_end_idx)
train_ffnn(dr, wr, c_l, c_s, w_data_index, w_num_stocks, tr_beg_idx,
           tr_end_idx, tr_wk_beg_idx, tr_wk_end_idx)

prob_l = np.zeros((data_set_records), dtype=np.float)
evaluate_ffnn(data_set_records, dr, wr, prob_l)


def calc_classes_and_decisions(data_set_records, total_weeks, prob_l):
    c_l = np.zeros((data_set_records), dtype=np.bool)
    c_s = np.zeros((data_set_records), dtype=np.bool)
Exemplo n.º 10
0
def train():
    snp_env = SnpEnv()
    if get_config().NET_VER == NetVersion.APPLE:
        net = NetApple()
    elif get_config().NET_VER == NetVersion.BANANA:
        net = NetBanana()
    elif get_config().NET_VER == NetVersion.WORM:
        net = NetWorm()
    elif get_config().NET_VER == NetVersion.SNAKE:
        net = NetSnake()
    elif get_config().NET_VER == NetVersion.ANTI_SNAKE:
        net = NetAntiSnake()
    elif get_config().NET_VER == NetVersion.CAT:
        net = NetCat()
    elif get_config().NET_VER == NetVersion.COW:
        net = NetCow()
    net.init()
    train_trading_schedule = []
    test_trading_schedule = []
    for ent, ext in snp_env.trading_schedule_generator(
            get_config().TRAIN_BEG,
            get_config().TRAIN_END,
            get_config().TRADING_PERIOD_DAYS):
        train_trading_schedule.append((ent, ext))
    for ent, ext in snp_env.trading_schedule_generator(
            get_config().TEST_BEG,
            get_config().TEST_END,
            get_config().TRADING_PERIOD_DAYS):
        test_trading_schedule.append((ent, ext))

    if not os.path.exists(get_config().TRAIN_STAT_PATH):
        with open(get_config().TRAIN_STAT_PATH, 'a', newline='') as f:
            writer = csv.writer(f)
            writer.writerow(
                ('epoch', 'train dd', 'train y avg', 'train sharpe', 'test dd',
                 'test y avg', 'test sharpe'))

    with open(get_config().TRAIN_STAT_PATH, 'a', newline='') as f:
        writer = csv.writer(f)
        if get_config().EPOCH_WEIGHTS_TO_LOAD != 0:
            net.load_weights(get_config().WEIGHTS_PATH,
                             get_config().EPOCH_WEIGHTS_TO_LOAD)
            epoch = get_config().EPOCH_WEIGHTS_TO_LOAD
            if get_config().MODE == Mode.TRAIN:
                epoch += 1
        else:
            epoch = 0
        while True:
            print("Epoch %d" % epoch)

            # train
            if get_config().MODE == Mode.TRAIN:
                print("Training...")
                if get_config().SHUFFLE:
                    train_schedule = random.sample(train_trading_schedule,
                                                   len(train_trading_schedule))
                else:
                    train_schedule = train_trading_schedule
                dataset_size = len(train_schedule)
                curr_progress = 0
                passed = 0
                for ent, ext in train_schedule:
                    stk_mask = snp_env.get_tradeable_snp_components_mask(ent)
                    # print("%d %s %s" % (np.sum(stk_mask), ent.strftime("%Y-%m-%d"), ext.strftime("%Y-%m-%d")))
                    x = snp_env.get_input(stk_mask, ent)
                    labels = snp_env.get_ret_lbl(stk_mask, ent, ext)
                    if get_config().NET_VER == NetVersion.COW:
                        variances = calc_variance(x)
                    if get_config().NET_VER == NetVersion.COW:
                        net.fit(x, labels, variances)
                    else:
                        net.fit(x, labels)
                    if passed == 0:
                        if get_config().NET_VER == NetVersion.COW:
                            pl, weights = net.eval(x, labels, variances)
                        else:
                            pl, weights = net.eval(x, labels)
                        print(pl)
                    curr_progress = progress.print_progress(
                        curr_progress, passed, dataset_size)
                    passed += 1
                progress.print_progess_end()

            # eval train
            print("Eval train...")
            ret = np.zeros((len(train_trading_schedule)))

            dataset_size = len(train_trading_schedule)
            curr_progress = 0
            passed = 0
            dt = []
            for ent, ext in train_trading_schedule:
                if ext is None:
                    break
                if len(dt) == 0:
                    dt.append(ent)
                dt.append(ext)
                stk_mask = snp_env.get_tradeable_snp_components_mask(ent)
                x = snp_env.get_input(stk_mask, ent)
                labels = snp_env.get_ret_lbl(stk_mask, ent, ext)

                if get_config().NET_VER == NetVersion.COW:
                    variances = calc_variance(x)
                    pl, weights = net.eval(x, labels, variances)
                else:
                    pl, weights = net.eval(x, labels)

                if get_config().NET_VER == NetVersion.APPLE:
                    # # net
                    # long_mask = weights >= 0.0
                    # short_mask = weights <= 0.0
                    #
                    # int_date = snp_env.find_trading_date(ent + datetime.timedelta(days=1))
                    # int_r = snp_env.get_ret_lbl(stk_mask, ent, int_date)
                    # port_ret = labels - int_r
                    # # sorted_int_r_idxs = np.argsort(int_r)
                    # long_idxs = np.nonzero(long_mask)[0]
                    # long_int_r = int_r[long_idxs]
                    # sorted_long_int_r_idxs = np.argsort(long_int_r)
                    # long_sel_idxs = long_idxs[sorted_long_int_r_idxs[:get_config().SELECTTION]]
                    #
                    # short_idxs = np.nonzero(short_mask)[0]
                    # short_int_r = int_r[short_idxs]
                    # sorted_short_int_r_idxs = np.argsort(short_int_r)
                    # short_sel_idxs = short_idxs[sorted_short_int_r_idxs[-get_config().SELECTTION:]]
                    #
                    # long_pl = np.mean(port_ret[long_sel_idxs])
                    # short_pl = np.mean(port_ret[short_sel_idxs])

                    # # no net
                    # int_date = snp_env.find_trading_date(ent + datetime.timedelta(days=1))
                    # int_r = snp_env.get_ret_lbl(stk_mask, ent, int_date)
                    # port_ret = labels - int_r
                    # sorted_int_r_idxs = np.argsort(int_r)
                    # long_pl = np.mean(port_ret[sorted_int_r_idxs[:get_config().SELECTTION]])
                    # short_pl = np.mean(port_ret[sorted_int_r_idxs[-get_config().SELECTTION:]])

                    # 1d no net
                    prev_date = snp_env.find_prev_trading_date(
                        ent - datetime.timedelta(days=1))
                    int_stk_mask = snp_env.get_tradeable_snp_components_mask(
                        prev_date)
                    stk_mask &= int_stk_mask

                    int_r = snp_env.get_ret_lbl(stk_mask, prev_date, ent)
                    port_ret = snp_env.get_ret_lbl(stk_mask, ent, ext)
                    sorted_int_r_idxs = np.argsort(int_r)
                    long_pl = np.mean(
                        port_ret[sorted_int_r_idxs[:get_config().SELECTTION]])
                    short_pl = np.mean(
                        port_ret[sorted_int_r_idxs[-get_config().SELECTTION:]])

                    # real_ext = snp_env.find_trading_date(ext + datetime.timedelta(days=1))
                    # if real_ext is None:
                    #     real_ext = ext
                    # int_r = labels
                    # port_ret = snp_env.get_ret_lbl(stk_mask, ext, real_ext)
                    # sorted_int_r_idxs = np.argsort(int_r)
                    # long_pl = np.mean(port_ret[sorted_int_r_idxs[:get_config().SELECTTION]])
                    # short_pl = np.mean(port_ret[sorted_int_r_idxs[-get_config().SELECTTION:]])

                    pl = 0.7 * long_pl - 0.3 * short_pl

                # if get_config().NET_VER == NetVersion.SNAKE or get_config().NET_VER == NetVersion.ANTI_SNAKE:
                #     sorted_weights_idxs = np.argsort(weights)
                #     selected_weights_idxs = sorted_weights_idxs[-get_config().SELECTTION:]
                #     pl = np.mean(labels[selected_weights_idxs])
                #     if get_config().NET_VER == NetVersion.ANTI_SNAKE:
                #         pl = -pl

                if get_config().PRINT_PREDICTION:
                    print_alloc(pl, ent, snp_env.tickers, stk_mask, weights)
                if abs(pl) >= 0.3:
                    pl = 0
                ret[passed] = pl
                curr_progress = progress.print_progress(
                    curr_progress, passed, dataset_size)
                passed += 1
            progress.print_progess_end()

            # print("Train loss: %.4f" % (np.mean(np.sqrt(ret)) * 100))
            years = (get_config().TRAIN_END -
                     get_config().TRAIN_BEG).days / 365
            capital = get_capital(ret, False)
            train_dd = get_draw_down(capital, False)
            train_sharpe = get_sharpe_ratio(ret, years)
            train_y_avg = get_avg_yeat_ret(ret, years)
            print('Train dd: %.2f%% y_avg: %.2f%% sharpe: %.2f' %
                  (train_dd * 100, train_y_avg * 100, train_sharpe))
            if get_config().MODE == Mode.TEST:
                plot_equity_curve("Train equity curve", dt, capital)
                df = pd.DataFrame({'date': dt, 'capital': capital})
                df.to_csv('data/tr_eq.csv', index=False)

            # eval test
            print("Eval test...")
            ret = np.zeros((len(test_trading_schedule)))

            dataset_size = len(test_trading_schedule)
            curr_progress = 0
            passed = 0
            dt = []
            for ent, ext in test_trading_schedule:
                if ext.month == 3 and ext.day == 11 and ext.year == 2015:
                    _debug = 0
                if ext is None:
                    break
                if len(dt) == 0:
                    dt.append(ent)
                dt.append(ext)
                stk_mask = snp_env.get_tradeable_snp_components_mask(ent)
                x = snp_env.get_input(stk_mask, ent)
                labels = snp_env.get_ret_lbl(stk_mask, ent, ext)
                if get_config().NET_VER == NetVersion.COW:
                    variances = calc_variance(x)
                    pl, weights = net.eval(x, labels, variances)
                else:
                    pl, weights = net.eval(x, labels)

                if get_config().NET_VER == NetVersion.APPLE:
                    # # net
                    # long_mask = weights >= 0.0
                    # short_mask = weights <= 0.0
                    #
                    # int_date = snp_env.find_trading_date(ent + datetime.timedelta(days=1))
                    # int_r = snp_env.get_ret_lbl(stk_mask, ent, int_date)
                    # port_ret = labels - int_r
                    # # sorted_int_r_idxs = np.argsort(int_r)
                    # long_idxs = np.nonzero(long_mask)[0]
                    # long_int_r = int_r[long_idxs]
                    # sorted_long_int_r_idxs = np.argsort(long_int_r)
                    # long_sel_idxs = long_idxs[sorted_long_int_r_idxs[:get_config().SELECTTION]]
                    #
                    # short_idxs = np.nonzero(short_mask)[0]
                    # short_int_r = int_r[short_idxs]
                    # sorted_short_int_r_idxs = np.argsort(short_int_r)
                    # short_sel_idxs =  short_idxs[sorted_short_int_r_idxs[-get_config().SELECTTION:]]
                    #
                    # long_pl = np.mean(port_ret[long_sel_idxs])
                    # short_pl = np.mean(port_ret[short_sel_idxs])

                    # # no net
                    # int_date = snp_env.find_trading_date(ent + datetime.timedelta(days=1))
                    # int_r = snp_env.get_ret_lbl(stk_mask, ent, int_date)
                    # port_ret = labels - int_r
                    # sorted_int_r_idxs = np.argsort(int_r)
                    # long_pl = np.mean(port_ret[sorted_int_r_idxs[:get_config().SELECTTION]])
                    # short_pl = np.mean(port_ret[sorted_int_r_idxs[-get_config().SELECTTION:]])

                    # 1d no net
                    prev_date = snp_env.find_prev_trading_date(
                        ent - datetime.timedelta(days=1))
                    int_stk_mask = snp_env.get_tradeable_snp_components_mask(
                        prev_date)
                    stk_mask &= int_stk_mask

                    int_r = snp_env.get_ret_lbl(stk_mask, prev_date, ent)
                    port_ret = snp_env.get_ret_lbl(stk_mask, ent, ext)
                    sorted_int_r_idxs = np.argsort(int_r)
                    long_pl = np.mean(
                        port_ret[sorted_int_r_idxs[:get_config().SELECTTION]])
                    short_pl = np.mean(
                        port_ret[sorted_int_r_idxs[-get_config().SELECTTION:]])

                    pl = 0.7 * long_pl - 0.3 * short_pl

                # if get_config().NET_VER == NetVersion.SNAKE or get_config().NET_VER == NetVersion.ANTI_SNAKE:
                #     sorted_weights_idxs = np.argsort(weights)
                #     selected_weights_idxs = sorted_weights_idxs[-get_config().SELECTTION:]
                #     pl = np.mean(labels[selected_weights_idxs])
                #     if get_config().NET_VER == NetVersion.ANTI_SNAKE:
                #         pl = -pl

                if get_config().PRINT_PREDICTION:
                    print_alloc(pl, ent, snp_env.tickers, stk_mask, weights)
                if abs(pl) >= 0.3:
                    pl = 0
                ret[passed] = pl
                curr_progress = progress.print_progress(
                    curr_progress, passed, dataset_size)
                passed += 1
            progress.print_progess_end()

            # print("Test loss: %.4f" % (np.mean(np.sqrt(ret)) * 100))

            years = (get_config().TRAIN_END -
                     get_config().TRAIN_BEG).days / 365
            capital = get_capital(ret, False)
            test_dd = get_draw_down(capital, False)
            test_sharpe = get_sharpe_ratio(ret, years)
            test_y_avg = get_avg_yeat_ret(ret, years)
            print('Test dd: %.2f%% y_avg: %.2f%% sharpe: %.2f' %
                  (test_dd * 100, test_y_avg * 100, test_sharpe))

            if get_config().MODE == Mode.TEST:
                plot_equity_curve("Test equity curve", dt, capital)
                df = pd.DataFrame({'date': dt, 'capital': capital})
                df.to_csv('data/tst_eq.csv', index=False)

            if get_config().MODE == Mode.TRAIN:
                net.save_weights(get_config().WEIGHTS_PATH, epoch)
                writer.writerow((epoch, train_dd, train_y_avg, train_sharpe,
                                 test_dd, test_y_avg, test_sharpe))
                epoch += 1
                f.flush()
            else:
                show_plots()
                break