class NormalValidation(object):

    def __init__(self):
        self.sd = StockData()

    def validate(self, symbol, start_date, end_date):
        """
        :param start_date, end_date: YYYY-MM-DD
        """
        data = self.sd.fetch_pd_data(symbol, start_date, end_date)
        daily_changes = data['Adj Close'].astype('float').pct_change(periods=1).tolist()[1:]
        #daily_changes = np.random.normal(1, 0.5, 1000)
        bins = 20
        n, bins, patches = plt.hist(daily_changes, 50, normed=1, facecolor='green', alpha=0.75)

        mu = np.mean(daily_changes)
        sigma = np.std(daily_changes)

        t_stat, p_value = shapiro(daily_changes)
        print 'Shapiro Test'.center(110, '-')
        print 'Mean: %.2f' % mu
        print 'Std:  %.2f' % sigma
        print 't_stat: %.2f' % t_stat
        print 'p_value: %.2f' % p_value
        if p_value > 0.05:
            print 'At 0.05 significance level, Null hypothsis that points are from normal distribution can NOT be rejected!'
        else:
            print 'At 0.05 significance level, Reject Normal distribution!'


        t_stat, critical_v, sig = anderson(daily_changes, 'norm')
        print 'Anderson Test'.center(110, '-')
        print 't_stat: %s' % t_stat
        print 'critial_v: %s' % critical_v
        print 'sig: %s' % sig
        if t_stat > critical_v[2]:
            print 'At 0.05 significance level, Reject Normal distribution!'
        else:
            print 'At 0.05 significance level, Null hypothsis that points are from normal distribution can NOT be rejected!'

        y = mlab.normpdf(bins, mu, sigma)
        l = plt.plot(bins, y, 'r--', linewidth=1)

        plt.grid(True)
        plt.show()
 def __init__(self):
     self.sd = StockData()
     self.symbols = self.sd.fetch_list_of_symbols()
class AverageReversionAlgorithm(object):

    def __init__(self):
        self.sd = StockData()
        self.symbols = self.sd.fetch_list_of_symbols()

    def simulate(self, symbol, start_date, end_date, params):
        """
        :param start_date, end_date: YYYY-MM-DD
        """
        window = params['window']
        alpha = params['alpha']
        transaction_budget = params['transaction_budget']
        adjusted_df = self.construct(symbol, start_date, end_date, window)
        signal_df = self.find_buy_signals(adjusted_df, alpha=alpha)
        trading_signal, summary = self.trade(signal_df, transaction_budget=transaction_budget)
        #self.print_full(trading_signal)
        summary.setdefault('params', params)
        return summary

    @staticmethod
    def combinations(windows, alphas, transaction_budgets):
        subsets = []
        for window in windows:
            for alpha in alphas:
                for transaction_budget in transaction_budgets:
                    subsets.append({'window': window, 'alpha': alpha, 'transaction_budget': transaction_budget})
        return subsets

    def optimize(self, symbol, start_date, end_date, params):
        windows = params['windows']
        alphas = params['alphas']
        transaction_budgets = params['transaction_budgets']
        all_subsets = self.combinations(windows, alphas, transaction_budgets)

        performances = []
        for subset in all_subsets:
            performance = self.simulate(symbol, start_date, end_date, subset)
            performances.append(performance)
        ranked_performances = sorted(performances, key=lambda x: x['return'])[::-1]
        return ranked_performances[:3]

    def construct(self, symbol, start_date, end_date, window):
        data = self.sd.fetch_pd_data(symbol, start_date, end_date)
        adj_close = data['Adj Close'].astype('float')
        ma = get_ma(adj_close, window=window)
        ma_std = get_std(adj_close, window=window)

        new_df = pd.DataFrame({'date': data.index, 'adj_close': adj_close, 'ma': ma, 'ma_std': ma_std})
        adjusted_new_df = new_df.set_index('date')[window:]
        adjusted_new_df['diff_rate'] = (adjusted_new_df.adj_close - adjusted_new_df.ma) / adjusted_new_df.ma_std
        return adjusted_new_df

    def find_buy_signals(self, df, alpha=2):
        df['signal'] = 'Standby'
        df.set_value(df.diff_rate < -alpha, 'signal', 'Buy')
        df.set_value(df.diff_rate >= 0, 'signal', 'Sell')
        return df

    @staticmethod
    def print_full(x):
        pd.set_option('display.max_rows', len(x))
        print(x)
        pd.reset_option('display.max_rows')

    @staticmethod
    def buy_stock(cash, budget, stock_price):
        """
        :param cash: total cash at hand
        :param budget: the budget to spend in this transaction
        """
        if cash <= budget:
            amount = cash / stock_price
            remaining = cash % stock_price
            return amount, remaining
        amount = budget / stock_price
        remaining = budget % stock_price
        remaining_cash = cash - budget + remaining
        return amount, remaining_cash

    @staticmethod
    def cal_asset(cash, stocks, price):
        return sum(stocks) * price + cash

    def trade(self, signal_df, initial_asset=100000, transaction_budget=10000):
        signal_df['cash'] = None
        signal_df['amount'] = None
        signal_df['asset'] = None
        signal_df['gains'] = None

        stocks = []
        long_actions = 0
        short_actions = 0

        for i, (date, row) in enumerate(signal_df.iterrows()):
            if i == 0:
                signal_df.set_value(date, 'asset', initial_asset)
                signal_df.set_value(date, 'gains', 0)
                if row['signal'] != 'Buy':
                    signal_df.set_value(date, 'cash', initial_asset)
                    signal_df.set_value(date, 'amount',  0)
                else:
                    adj_close = row['adj_close']
                    amount, remaining_cash = self.buy_stock(initial_asset, transaction_budget, adj_close)
                    signal_df.set_value(date, 'cash', remaining_cash)
                    stocks.append(amount)
                    signal_df.set_value(date, 'amount', sum(stocks))
                    long_actions += 1
            else:
                adj_close = row['adj_close']
                previous_cash = signal_df.iloc[i-1]['cash']
                asset = self.cal_asset(previous_cash, stocks, adj_close)
                signal_df.set_value(date, 'asset', asset)
                signal_df.set_value(date, 'gains', asset - initial_asset)

                if row['signal'] == 'Buy':
                    amount, remaining_cash = self.buy_stock(previous_cash, transaction_budget, adj_close)
                    stocks.append(amount)
                    signal_df.set_value(date, 'cash', remaining_cash)
                    long_actions += 1

                elif row['signal'] == 'Sell':
                    signal_df.set_value(date, 'cash', asset)
                    if sum(stocks) > 0:
                        stocks = []
                        short_actions += 1
                else:
                    signal_df.set_value(date, 'cash', previous_cash)

            signal_df.set_value(date, 'amount', sum(stocks))

        gains = signal_df.iloc[i]['gains']
        return_rate = gains * 1.0 / initial_asset
        start_price = signal_df.iloc[0]['adj_close']
        benchmark = (signal_df.iloc[i]['adj_close'] - start_price) / start_price
        summary = {'longs': long_actions, 'shorts': short_actions, 'return': return_rate,
                   'benchmark': benchmark}
        return signal_df, summary

    @staticmethod
    def cal_multi_assets(previous_cash, stocks, stock_prices):
        stock_values = [sum(amount) * stock_prices[symbol] for symbol, amount in stocks.iteritems()]
        return previous_cash + sum(stock_values)

    def select(self, pools, start_date, end_date, params):
        window = params['window']
        alpha = params['alpha']
        transaction_budget = params['transaction_budget']
        initial_asset = params.get('initial_asset', 100000)

        data = {}
        for symbol in pools:
            df = self.construct(symbol, start_date, end_date, window)
            df['signal'] = 'Standby'
            data.setdefault(symbol, df)

        panel = pd.Panel.from_dict(data, orient='minor')
        pd.set_option('mode.chained_assignment', None)

        # buy signals
        for i, (date, row) in enumerate(panel['signal'].iterrows()):
            diff_rates = panel['diff_rate', date]
            qualified_buys = (diff_rates < -alpha).copy()
            if qualified_buys.sum() < 1:
                pass
            elif qualified_buys.sum() == 1:
                panel.set_value('signal', date, qualified_buys, 'Buy')
                pass
            else:
                # buy the one with largest deviation
                best_buy = qualified_buys.idxmin()
                panel.set_value('signal', date, best_buy, 'Buy')

            qualified_sells = (diff_rates >= 0).copy()
            panel.set_value('signal', date, qualified_sells, 'Sell')
            #panel['signal'].loc[date, qualified_sells] = 'Sell'

        # trading
        trading_df = pd.DataFrame(index=panel['signal'].index,
                                       columns=['cash', 'asset', 'gains', 'stocks'])
        stocks = {}
        longs = 0
        shorts = 0

        for i, (date, row) in enumerate(panel['signal'].iterrows()):
            row_df = row.to_frame()
            row_df.columns = ['signal']
            stocks_to_buy = row_df.index[row_df.signal == 'Buy']
            stocks_to_sell = row_df.index[row_df.signal == 'Sell']
            if i == 0:
                previous_cash = initial_asset
            else:
                previous_cash = trading_df.iloc[i-1]['cash']

            stock_prices = panel['adj_close'].loc[date].to_dict()
            current_asset = self.cal_multi_assets(previous_cash, stocks, stock_prices)

            trading_df.set_value(date, 'asset', current_asset)
            trading_df.set_value(date, 'gains', current_asset - initial_asset)
            trading_df.set_value(date, 'cash', previous_cash)

            if len(stocks_to_sell) > 0 and len([stock for stock in stocks if stock in stocks_to_sell]) > 0:
                stocks_to_cash = self.sell_stocks(stocks, stock_prices, stocks_to_sell)
                # clean stocks
                [stocks.pop(symbol, 0) for symbol in stocks_to_sell]
                current_cash = previous_cash + stocks_to_cash
                trading_df.set_value(date, 'cash', current_cash)
                shorts += 1
            else:
                current_cash = previous_cash

            if len(stocks_to_buy) > 0:
                stock_symbol = stocks_to_buy[0]
                adj_close = stock_prices[stock_symbol]
                amount, remaining_cash = self.buy_stock(current_cash, transaction_budget, adj_close)
                stocks.setdefault(stock_symbol, []).append(amount)
                trading_df.set_value(date, 'cash', remaining_cash)
                longs += 1

            trading_df.set_value(date, 'stocks', stocks.keys())

        return_rate = trading_df.iloc[i]['gains'] / initial_asset
        summary = {'longs': longs, 'shorts': shorts, 'return': return_rate, 'params': params}
        #print trading_df
        return summary

    def select_all(self, pools, start_date, end_date, params):
        windows = params['windows']
        alphas = params['alphas']
        transaction_budgets = params['transaction_budgets']
        all_subsets = self.combinations(windows, alphas, transaction_budgets)

        performances = []
        for subset in all_subsets:
            performance = self.select(pools, start_date, end_date, subset)
            print performance
            performances.append(performance)
        ranked_performances = sorted(performances, key=lambda x: x['return'])[::-1]
        return ranked_performances

    @staticmethod
    def sell_stocks(stocks, stock_prices, stocks_to_sell):
        sold = [sum(stocks.get(symbol, [])) * stock_prices[symbol] for symbol in stocks_to_sell]
        return sum(sold)
 def __init__(self):
     self.sd = StockData()