def testcode_marketsim(symbol = 'ML_based', base_dir = './orders/', \
                       sv = 100000, leverLimit = True, verbose = True):
    ###    Use one of the order folders below    ###
    #    of = "./orders/benchmark.csv"
    #    of = "./orders/bestPossibleStrategy.csv"
    #    of = "./orders/rule_based.csv"
    #    of = "./orders/ML_based.csv"
    of = symbol_to_path(symbol, base_dir)

    #    sv = 100000 # starting value of portfolio, i.e. initial cash available

    # Process orders
    portVals = compute_portvals(of, sv, leverLimit)
    if isinstance(portVals, pd.DataFrame):
        portVals = portVals[
            portVals.columns[0]]  # just get the first column as a Series
    else:
        "warning, code did not return a DataFrame"

    start_date = portVals.index[0]
    end_date = portVals.index[-1]
    pricesSPX = get_data(['$SPX'], pd.date_range(start_date, end_date))
    pricesSPX = pricesSPX['$SPX']

    cum_ret, avg_daily_ret, std_daily_ret, sharpe_ratio = get_portfolio_stats(portVals, \
                                                daily_rf = 0, samples_per_year = 252)
    cum_ret_SPY, avg_daily_ret_SPY, std_daily_ret_SPY, sharpe_ratio_SPY = \
            get_portfolio_stats(pricesSPX, daily_rf = 0, samples_per_year = 252)

    # Compare portfolio against $SPX
    if verbose == True:
        dfTemp = pd.concat([portVals, pricesSPX],
                           axis=1,
                           keys=['portfolio', '$SPX'])
        plot_normalized_data(dfTemp, '', '', '')

        print "\nDate Range: {} to {}".format(start_date.date(),
                                              end_date.date())
        print
        print "Sharpe Ratio of Fund: {}".format(sharpe_ratio)
        print "Sharpe Ratio of SPY : {}".format(sharpe_ratio_SPY)
        print
        print "Cumulative Return of Fund: {}".format(cum_ret)
        print "Cumulative Return of SPY : {}".format(cum_ret_SPY)
        print
        print "Standard Deviation of Fund: {}".format(std_daily_ret)
        print "Standard Deviation of SPY : {}".format(std_daily_ret_SPY)
        print
        print "Average Daily Return of Fund: {}".format(avg_daily_ret)
        print "Average Daily Return of SPY : {}".format(avg_daily_ret_SPY)
        print
        print "Final Portfolio Value: {}".format(portVals[-1])

    return cum_ret, portVals
예제 #2
0
def testcode_marketsim(symbol='ML_based', base_dir='', sv=100000, leverLimit=True, verbose=False):
    of = symbol_to_path(symbol, base_dir)
    portVals = compute_portvals(of, sv, leverLimit)  # Process orders
    if isinstance(portVals, pd.DataFrame):
        portVals = portVals[portVals.columns[0]]  # just get the first column as a Series
    else:
        "warning, code did not return a DataFrame"

    start_date = portVals.index[0]
    end_date = portVals.index[-1]
    pricesSPX = get_data(['$SPX'], pd.date_range(start_date, end_date))
    pricesSPX = pricesSPX['$SPX']

    cum_ret, avg_daily_ret, std_daily_ret, sharpe_ratio = get_portfolio_stats(portVals, daily_rf=0, samples_per_year=252)
    cum_ret_SPY, avg_daily_ret_SPY, std_daily_ret_SPY, sharpe_ratio_SPY =\
        get_portfolio_stats(pricesSPX, daily_rf=0, samples_per_year=252)

    # Compare portfolio against $SPX
    if verbose:
        print "\nDate Range: {} to {}".format(start_date.date(), end_date.date())
        print
        print "Sharpe Ratio of Fund: {}".format(sharpe_ratio)
        print "Sharpe Ratio of SPY : {}".format(sharpe_ratio_SPY)
        print
        print "Cumulative Return of Fund: {}".format(cum_ret)
        print "Cumulative Return of SPY : {}".format(cum_ret_SPY)
        print
        print "Standard Deviation of Fund: {}".format(std_daily_ret)
        print "Standard Deviation of SPY : {}".format(std_daily_ret_SPY)
        print
        print "Average Daily Return of Fund: {}".format(avg_daily_ret)
        print "Average Daily Return of SPY : {}".format(avg_daily_ret_SPY)
        print
        print "Final Portfolio Value: {}".format(portVals[-1])

    return cum_ret, portVals
예제 #3
0
    def getFeatures(self, sd, ed, symbol, norm=False):

        sd_org = sd
        sd = sd + dt.timedelta(days=-(lookback + 15))

        dates = pd.date_range(sd, ed)
        df = ut.get_data(symbol, dates)

        #print "get_data  ...", symbol

        df = df.dropna(axis=0)

        #df.fillna(method ='ffill',inplace=True)
        #df.fillna(method ='bfill',inplace=True)

        price = df[symbol]
        if (buffet):
            sma = pd.rolling_mean(price, window=lookback, min_periods=lookback)
            r_std = pd.rolling_std(price,
                                   window=lookback,
                                   min_periods=lookback)
        else:
            sma = price.rolling(window=lookback, min_periods=lookback).mean()
            r_std = price.rolling(window=lookback, min_periods=lookback).std()

        bb_upper = sma + (2 * r_std)
        bb_lower = sma - (2 * r_std)

        #        if(1) :
        #            df['sma'] = sma
        #            df['bb_upper'] = bb_upper
        #            df['bb_lower'] = bb_lower

        df['bbp'] = (price - bb_lower) / (bb_upper - bb_lower)
        df['psma'] = price / sma
        #df['roc'] = ( (df[symbol]/df[symbol].shift(lookback-1)) - 1 )

        #compute DR
        normed = price / price.ix[0]
        daily_rets = (normed / normed.shift(1)) - 1
        daily_rets = daily_rets[1:]
        df['dr'] = daily_rets
        #df['dr_std'] = daily_rets.rolling(window=lookback,min_periods=lookback).std()

        SSO = 1
        if (SSO):
            df_so = pd.DataFrame(index=df.index)

            #filename = os.path.join(os.path.join("..", "data"), "{}.csv".format(str(symbol[0])))

            filename = ut.symbol_to_path(symbol[0])
            #print "reading file ...", filename

            df_temp = pd.read_csv(filename,
                                  index_col='Date',
                                  parse_dates=True,
                                  na_values=['nan'])
            df_so = df_so.join(df_temp)
            '''
                Fast stochastic calculation
                    %K = (Current Close - Lowest Low)/(Highest High - Lowest Low) * 100
                    %D = 3-day SMA of %K
            '''
            if (buffet):
                low_min = pd.rolling_min(df_so['Low'], window=lookback)
                high_max = pd.rolling_max(df_so['High'], window=lookback)
            else:
                low_min = df_so['Low'].rolling(window=lookback).min()
                high_max = df_so['High'].rolling(window=lookback).max()

            #df_so['low_min'] = low_min
            #df_so['high_max'] = high_max
            df_so['k_fast'] = (df_so['Adj Close'] - low_min) / (high_max -
                                                                low_min) * 100

            if (buffet):
                df_so['d_fast'] = pd.rolling_mean(df_so['k_fast'],
                                                  window=3,
                                                  min_periods=3)
            else:
                df_so['d_fast'] = df_so['k_fast'].rolling(
                    window=3, min_periods=3).mean()
            """
                Slow stochastic calculation
                    %K = %D of fast stochastic
                    %D = 3-day SMA of %K
            """
            #df_so['k_slow'] = df_so['d_fast']
            #k_slow is same as d_fast
            if (buffet):
                df_so['d_slow'] = pd.rolling_mean(df_so['d_fast'],
                                                  window=3,
                                                  min_periods=3)
            else:
                df_so['d_slow'] = df_so['d_fast'].rolling(
                    window=3, min_periods=3).mean()

            #df_so.to_csv("df_so.csv")
            #df.to_csv("df.csv")

            df = df.join(df_so['d_slow'])

        #df.to_csv("get_features_1.csv")
        #df_so.to_csv("SSO.csv")

        df = df.sort_index()
        df = df[sd_org:]
        #df.to_csv("get_features_2.csv")

        return df
예제 #4
0
    def addEvidence(self,
                    symbol="IBM",
                    sd=dt.datetime(2008, 1, 1),
                    ed=dt.datetime(2009, 1, 1),
                    sv=10000,
                    N_mmt=20,
                    N_bb=20,
                    it=50,
                    output=False):

        # we need to read from the earliest possible date to get the same EMA for indicator calculations
        path = symbol_to_path(symbol=symbol)
        df_temp = pd.read_csv(path,
                              parse_dates=True,
                              index_col='Date',
                              usecols=['Date', 'Adj Close'],
                              na_values=['nan'])
        data_sd = sorted(df_temp.index)[0]

        # example usage of the old backward compatible util function
        syms = [symbol]
        dates = pd.date_range(data_sd, ed)
        data = ut.get_data(syms, dates)  # automatically adds SPY

        if self.verbose:
            prices = data[syms]
            print(prices)

        # calculate technical indicators
        for ind in self.indicators:
            if ind == 'bbp':
                data = add_bband(data, N=N_bb)
            elif ind == 'mmt':
                data = add_mmt(data, N=N_mmt)
            elif ind == 'MACD':
                data = add_MACD(data)
            elif ind == 'ATR':
                data = add_ATR(data, N=14)
            else:
                print('Method to calculate not implemented!')
        # data['MACD_sig'] = data.MACD - data.Signal
        # find the true training starting day when market opened (data_sd is earlier than this day)
        sd = data.index[data.index >= sd][0]

        # create a dataframe within sd and ed
        self.df = data[sd:ed].copy()
        # create a Series of dates for convenience
        date_index = pd.Series(self.df.index)
        # generate dividing values to bin indicators
        for ind in self.indicators:
            self.div_dict[ind] = self._bin_divider(indicator=ind,
                                                   method=self.div_method)

        # without considering holdings, get partial states based on indicators all at once
        self.df['Ind_States'] = self._get_state(self.df[self.indicators])

        self.learner = ql.QLearner(num_states=10**len(self.indicators) * 3,
                                   num_actions=3,
                                   dyna=200,
                                   rar=0.999,
                                   radr=0.9999,
                                   gamma=0.9,
                                   verbose=False)

        bestr = 0.0
        for iteration in range(0, it):

            holdings = 0
            start_state = self._full_state(self.df.loc[sd, 'Ind_States'],
                                           holdings)
            action = self.learner.querysetstate(start_state)
            # log in states that has been in
            if start_state not in self.states_log:
                self.states_log.append(start_state)

            next_day = self.df.index[
                1]  # use to get one day ahead of current date
            cash = sv
            portv_old = sv
            for date in self.df.index[:-1]:

                # first check if action is allowed
                if action == 0 and holdings == -100:
                    action = 1
                elif action == 2 and holdings == 100:
                    action = 1

                # set the s prime state and holdings according to action
                newstate, newhold = self._set_sprime(self.df, next_day,
                                                     holdings, action)

                if newstate not in self.states_log:
                    self.states_log.append(newstate)

                # calculate portfolio values. portv = cash + stock
                cash -= self.df.loc[date, syms].values * (newhold - holdings)
                portv = cash + self.df.loc[next_day, syms].values * newhold
                # calculate the reward as daily portfolio return
                # big number to wash out random initiation of Q-table
                rwd = 100 * (portv - portv_old) / portv
                # query for the next action

                action = self.learner.query(newstate, rwd, iteration)

                holdings = newhold
                portv_old = portv
                today_date = list(date_index).index(date)
                next_day = date_index[today_date + 1]

            print 'CumRet is:', float(portv) / sv - 1
            if output:  # record the best portv
                if float(portv) / sv - 1 > bestr:
                    bestr = float(portv) / sv - 1
            print 'Explored states:', len(self.states_log)
            print iteration, ':', len(date_index), 'days of trading'

        self.evi_states = list(set(self.df.Ind_States.values))
        print "We have", len(
            self.evi_states), "states initially not considering holdings."
        print "Meaning possible states are", len(self.evi_states) * 3
        # if need the information of cum. ret. and divided states, return them
        if output:
            return bestr
예제 #5
0
    def testPolicy(self,
                   symbol="IBM",
                   sd=dt.datetime(2009, 1, 1),
                   ed=dt.datetime(2010, 1, 1),
                   sv=10000,
                   N_mmt=20,
                   N_bb=20):

        # we need to read from the earliest possible date to get the same EMA for indicator calculations
        path = symbol_to_path(symbol=symbol)
        df_temp = pd.read_csv(path,
                              parse_dates=True,
                              index_col='Date',
                              usecols=['Date', 'Adj Close'],
                              na_values=['nan'])
        data_sd = sorted(df_temp.index)[0]

        syms = [symbol]
        dates = pd.date_range(data_sd, ed)
        data = ut.get_data(syms, dates)  # automatically adds SPY

        # trades_SPY = prices_all['SPY_test']  # only SPY, for comparison later

        # add on indicators
        for ind in self.indicators:
            if ind == 'bbp':
                data = add_bband(data, N=N_bb)
            elif ind == 'mmt':
                data = add_mmt(data, N=N_mmt)
            elif ind == 'MACD':
                data = add_MACD(data)
            elif ind == 'ATR':
                data = add_ATR(data, N=14)
            else:
                print('Method to calculate not implemented!')
        # data['MACD_sig'] = data.MACD - data.Signal

        # find the true training starting day when market opens (data_sd is earlier than this day)
        temp = data.index[data.index >= sd]
        sd = temp[0]

        # create a dataframe within sd and ed
        self.tdf = data[sd:ed].copy()
        # create a Series of dates for convenience
        date_index = pd.Series(self.tdf.index)
        # without considering holdings, get partial states based on indicators all at once
        self.tdf['Ind_States'] = self._get_state(self.tdf[self.indicators])

        # trades = data[[symbol,]][sd:].copy()  # only portfolio symbols from start date to end date
        trades = data.loc[sd:, symbol]
        trades[:] = 0  # set them all to nothing
        holdings = 0
        start_state = self._full_state(self.tdf.loc[sd, 'Ind_States'],
                                       holdings)
        if start_state not in self.test_log:
            self.test_log.append(start_state)
        action = self.learner.querysetstate(start_state)
        next_day = self.tdf.index[1]
        portv = 0
        cash = sv
        for date in self.tdf.index[:-1]:

            # first check if action is allowed
            if action == 0 and holdings == -100:
                action = 1
            elif action == 2 and holdings == 100:
                action = 1

            # set the s prime state according to action
            newstate, newhold = self._set_sprime(self.tdf, next_day, holdings,
                                                 action)
            # record states
            if newstate not in self.test_log:
                self.test_log.append(newstate)

            # record trades
            if not newhold == holdings:
                if action == 2:
                    trades[date] = 100
                elif action == 0:
                    trades[date] = -100

            # calculate portfolio values. portv = cash today + stock value today
            cash -= int(self.tdf.loc[date, syms].values * (newhold - holdings))
            portv = cash + int(self.tdf.loc[date, syms].values * newhold)
            # query for the next action
            action = self.learner.querysetstate(newstate)
            holdings = newhold
            today_index = list(date_index).index(date)
            next_day = date_index[today_index + 1]

        print float(portv) / sv - 1

        if self.verbose: print type(trades)  # it better be a Series!
        if self.verbose: print trades
        # if self.verbose: print prices_all

        # now check what states have not been explored before
        not_seen = []
        test_ind_states = []
        for item in self.test_log:
            test_ind_states.append(int(item % 1000))
        possible_states = list(set(test_ind_states))

        print 'Test session hit', len(possible_states), 'states.'
        # get unseen states in test process not considering holdings
        for item in self.tdf.Ind_States:
            if item not in self.df.Ind_States.values:
                not_seen.append(item)
        print len(set(not_seen)
                  ), "indicator states not seen before in training:", not_seen

        self.test_log.sort()
        # fill zeros at the end of testsstates list in order to make the same length
        for _ in range(0, len(self.states_log) - len(self.test_log)):
            self.test_log.append(0)

        self.states_log.sort()
        logs = {'Learned': self.states_log, 'Tested': self.test_log}
        states = pd.DataFrame(logs)
        states.to_csv(symbol_to_path('States', base_dir=os.getcwd()),
                      index=False)

        return trades
예제 #6
0
def get_tickers(filename):
    df = pd.read_csv(symbol_to_path(filename), usecols=['Symbol'])
    return df
예제 #7
0
def test_code(verb=True):
    import random
    random.seed(42)
    # instantiate the strategy learner
    learner = sl.StrategyLearner(bins=10,
                                 div_method='even',
                                 indicators=['bbp', 'mmt', 'ATR'],
                                 verbose=verb)
    # set parameters for training the learner
    sym = "SINE_SLOW"
    money = 5000
    stdate = dt.datetime(2010, 1, 1)
    enddate = dt.datetime(2011, 1, 1)
    Nbb = 22  # bollinger band looking back window
    Nmmt = 3  # momentum looking back window
    # train the learner
    bestr = learner.addEvidence(symbol=sym,
                                sd=stdate,
                                ed=enddate,
                                sv=money,
                                N_bb=Nbb,
                                N_mmt=Nmmt,
                                it=70,
                                output=True)
    print('Best return is', bestr)
    print('Now rar is:', learner.learner.newrar)
    #############
    # Test
    #############
    st_date = dt.datetime(2011, 1, 1)
    en_date = dt.datetime(2012, 1, 1)

    syms = [sym]
    dates = pd.date_range(st_date, en_date)
    prices_all = get_data(syms, dates)  # automatically adds SPY
    prices = prices_all[syms]  # only portfolio symbols
    if verb: print prices
    #
    # test the learner
    df_trades = learner.testPolicy(symbol=sym,
                                   sd=st_date,
                                   ed=en_date,
                                   sv=money,
                                   N_bb=Nbb,
                                   N_mmt=Nmmt)
    #
    # a few sanity checks
    # df_trades should be a series)
    # including only the values 100, 0, -100
    if type(df_trades) is not pd.core.series.Series:
        print "Returned result is not a Series"
    if prices.shape != df_trades.shape:
        print "Returned result is not the right shape"
    tradecheck = abs(df_trades.cumsum()).values
    tradecheck[tradecheck <= 100] = 0
    tradecheck[tradecheck > 0] = 1
    if tradecheck.sum(axis=0) > 0:
        print "Returned result violates holding restrictions (more than 100 shares)"

    if verb: print df_trades

    # generate orders based on principle
    orders, l_en, s_en, ext = generate_order(df_trades)
    orders.to_csv(symbol_to_path('orders', base_dir=os.getcwd()))
    # for plot
    plt.plot(prices)
    for i in l_en:
        plt.axvline(x=i, color='g')
    for i in s_en:
        plt.axvline(x=i, color='r')
    for i in ext:
        plt.axvline(x=i, color='k')
    plt.show()

    # feed orders to the market simulator and print back-testing outputs
    print_port(of=orders, sv=money, output=True, lvrg=False, symbol=sym)
    # output Q table and indicator divider info
    q, dividers = learner.output()
    q_table = pd.DataFrame(q)

    ind_dividers = pd.DataFrame(dividers)
    q_table.to_csv(symbol_to_path('Q_Table', base_dir=os.getcwd()))
    ind_dividers.to_csv(symbol_to_path('Dividers', base_dir=os.getcwd()))