def get_index_eod_data( self, index_list=None, index_type=None, start=None, end=None, ): ''' TODO If SYMBOL_DATA_PATH exists grab data from file. Update data if data in the file is older than 5 days. Else fetch symbol data from NSE website. ''' if NSE.__INDEX_EOD_DATA_KEY in get_store_keys(NSE.__NSE_DATA_PATH): eod_data = pd.read_hdf(NSE.__NSE_DATA_PATH, NSE.__INDEX_EOD_DATA_KEY) eod_data = eod_data.reset_index() else: self.force_load_data(force_load='index_eod_data') eod_data = pd.read_hdf(NSE.__NSE_DATA_PATH, NSE.__INDEX_EOD_DATA_KEY) eod_data = eod_data.reset_index() index_list = self.get_index_list( index_list=index_list, index_type=index_type ) eod_data = eod_data[eod_data.symbol.isin(index_list)] start = get_date(start, out='dt', start=True) end = get_date(end, out='dt', start=False) eod_data = eod_data.ix[ (eod_data.date >= start) & (eod_data.date <= end) ] return eod_data
def get_symbol_eod_data( self, symbol_list=None, index=None, index_type=None, start=None, end=None, min_rows=0, missing_count=0 ): ''' If SYMBOL_DATA_PATH exists grab data from file. Update data if data in the file is older than 5 days. Else fetch symbol data from NSE website. ''' if NSE.__SYMBOL_EOD_DATA_KEY in get_store_keys(NSE.__NSE_DATA_PATH): eod_data = pd.read_hdf(NSE.__NSE_DATA_PATH, NSE.__SYMBOL_EOD_DATA_KEY) eod_data = eod_data.reset_index() else: self.force_load_data(force_load='symbol_eod_data') eod_data = pd.read_hdf(NSE.__NSE_DATA_PATH, NSE.__SYMBOL_EOD_DATA_KEY) eod_data = eod_data.reset_index() symbol_list = self.get_symbol_list( symbol_list=symbol_list, index=index, index_type=index_type, start=start, missing_count=missing_count, min_rows=min_rows ) eod_data = eod_data[eod_data.symbol.isin(symbol_list)] start = get_date(start, out='dt', start=True) end = get_date(end, out='dt', start=False) eod_data = eod_data.loc[ (eod_data.date >= start) & (eod_data.date <= end) ] return eod_data
def get_profits_per_day(self, trades=None, trades_type='all'): ''' Get overall profits per day of symbols based on trades. If trades is none, Local stored trades is used. ''' if trades is None and Zerodha.PROFITS_PER_DAY_KEY in get_store_keys( Zerodha.TRADES_DATA_PATH): profits_per_day = pd.read_hdf(Zerodha.TRADES_DATA_PATH, Zerodha.PROFITS_PER_DAY_KEY) return profits_per_day elif trades is None: trades = self.get_trades(trades_type=trades_type) profits_per_day = pd.DataFrame( 0, index=self.get_traded_dates(start=trades.date.min()).index, columns=trades.symbol.unique()) profits = self.get_profits(trades=trades, trades_type=trades_type) for trade in trades.itertuples(): symbol = trade.symbol date = trade.date profits_per_day.loc[date:, symbol] = (profits.loc[date:, symbol] - profits.loc[date:, symbol].shift(1)) profits_per_day.loc[date, symbol] = profits.loc[date, symbol] return profits_per_day
def get_profits(self, trades=None, trades_type='all'): ''' Get overall profits of symbols based on trades. If trades is none, Local stored trades is used. ''' if trades is None and Zerodha.PROFITS_KEY in get_store_keys( Zerodha.TRADES_DATA_PATH): profits = pd.read_hdf(Zerodha.TRADES_DATA_PATH, Zerodha.PROFITS_KEY) return profits elif trades is None: trades = self.get_trades(trades_type=trades_type) profits = pd.DataFrame( 0, index=self.get_traded_dates(start=trades.date.min()).index, columns=trades.symbol.unique()) investments = self.get_investments(trades=trades) holdings = self.get_holdings(trades=trades) intra_trades, inter_trades = trades.query( 'trade_type == "intra"'), trades.query('trade_type == "inter"') for trade in inter_trades.itertuples(): symbol = trade.symbol date = trade.date profits.loc[date:, symbol] = (holdings.loc[date:, symbol] - investments.loc[date:, symbol]) for trade in intra_trades.itertuples(): profits.loc[trade.date, trade.symbol] = profits.loc[ trade.date, trade.symbol] + trade.cash_flow return profits
def get_holdings(self, trades=None): ''' Get holdings of symbols based on trades. If trades is none, Local stored trades is used. ''' if trades is None and Zerodha.HOLDINGS_KEY in get_store_keys( Zerodha.TRADES_DATA_PATH): holdings = pd.read_hdf(Zerodha.TRADES_DATA_PATH, Zerodha.HOLDINGS_KEY) return holdings elif trades is None: trades = self.get_trades(trades_type='all') holdings = pd.DataFrame( 0, index=self.get_traded_dates(start=trades.date.min()).index, columns=trades.symbol.unique()) close = self.get_symbol_eod_values(data='close') for trade in trades.itertuples(): symbol = trade.symbol date = trade.date total_qty = trade.total_qty if symbol not in close.columns: holdings[symbol] = np.nan continue if trade.total_qty == 0: holdings.loc[date:, symbol] = 0 else: holdings.loc[date:, symbol] = total_qty * close[symbol][date:] return holdings
def get_investments(self, trades=None): ''' Get investments in symbols based on trades. If trades is none, Local stored trades is used. ''' if trades is None and Zerodha.INVESTMENTS_KEY in get_store_keys( Zerodha.TRADES_DATA_PATH): investments = pd.read_hdf(Zerodha.TRADES_DATA_PATH, Zerodha.INVESTMENTS_KEY) return investments elif trades is None: trades = self.get_trades(trades_type='all') investments = pd.DataFrame( 0, index=self.get_traded_dates(start=trades.date.min()).index, columns=trades.symbol.unique()) for trade in trades.itertuples(): symbol = trade.symbol date = trade.date rate = trade.trade_rate qty = trade.trade_qty if trade.total_qty == 0: investments.loc[date:, symbol] = 0 else: investments.loc[ date:, symbol] = investments.loc[date:, symbol] + (qty * rate) return investments
def get_index_meta(self): 'Get meta data for index and its components' if Market.__INDEX_META_KEY in get_store_keys(Market.__Market_PATH): index_meta = pd.read_hdf(Market.__Market_PATH, Market.__INDEX_META_KEY) else: warnings.warn( 'Unable to read symbol_meta locally. Fetching data from NSE website' ) self.force_load_data('index') index_meta = pd.read_hdf(Market.__Market_PATH, Market.__INDEX_META_KEY) index_meta = index_meta.replace('nan', np.nan) return index_meta
def get_symbol_meta(self): ''' If symbol meta data exists grab data from file. Else fetch symbol meta data from NSE website. ''' if Market.__SYMBOL_META_KEY in get_store_keys(Market.__Market_PATH): symbol_meta = pd.read_hdf(Market.__Market_PATH, Market.__SYMBOL_META_KEY) else: warnings.warn( 'Unable to read symbol_meta locally. Fetching data from NSE website' ) self.force_load_data('symbol') symbol_meta = pd.read_hdf(Market.__Market_PATH, Market.__SYMBOL_META_KEY) return symbol_meta
def get_trades(self, trades_type='all', path=None): ''' Get trades. trades_type: (all, intra, inter) path: path of file for trades ''' if path is not None: trades = self.read_trades(path=path) elif Zerodha.TRADES_KEY in get_store_keys(Zerodha.TRADES_DATA_PATH): trades = pd.read_hdf(Zerodha.TRADES_DATA_PATH, Zerodha.TRADES_KEY) else: raise AttributeError('Data not avaliable') if trades_type != 'all': trades = trades.query('trade_type == @trades_type') return trades
def get_symbol_eod_values( self, data='returns', symbol_list=None, index=None, index_type=None, start=None, end=None, min_rows=0, missing_count=0 ): '''Get Close prices for historical as a separate dataframe''' symbol_list = self.get_symbol_list( symbol_list=symbol_list, index=index, index_type=index_type, start=start, missing_count=missing_count, min_rows=min_rows ) eod_data_schema = [ 'symbol', 'date', 'prev_close', 'open', 'high', 'low', 'last', 'close', 'vwap', 'trades', 'volume', 'turnover', 'pct_deliverble', 'simple_returns', 'log_returns', 'high_low_spread', 'open_close_spread' ] if data in eod_data_schema: values = data elif data == 'returns': values = 'log_returns' elif data == 'deliverble': values = 'pct_deliverble' else: warnings.warn( 'Invalid type of data requested. Returning returns data' ) values = 'log_returns' if 'symbol_eod_values_{0}'.format(values) in get_store_keys(NSE.__NSE_DATA_PATH): data = pd.read_hdf( NSE.__NSE_DATA_PATH, 'symbol_eod_values_{0}'.format(values) ) else: self.force_load_data(force_load='symbol_eod_values', values=values) data = pd.read_hdf( NSE.__NSE_DATA_PATH, 'symbol_eod_values_{0}'.format(values) ) column_list = data.columns column_list = data.columns.intersection(symbol_list) data = data[column_list] start = get_date(start, 'str', True) end = get_date(end, 'str', False) data = data[start:end] data = data.dropna(how='all', axis=1) return data
def get_returns(self, trades=None): ''' Get overall returns per share of symbols based on trades. If trades is none, Local stored trades is used. ''' if trades is None and Zerodha.RETURNS_KEY in get_store_keys( Zerodha.TRADES_DATA_PATH): returns = pd.read_hdf(Zerodha.TRADES_DATA_PATH, Zerodha.RETURNS_KEY) return returns elif trades is None: trades = self.get_trades(trades_type='inter') returns = pd.DataFrame( index=self.get_traded_dates(start=trades.date.min()).index, columns=trades.symbol.unique()) investments = self.get_investments(trades=trades) holdings = self.get_holdings(trades=trades) close = self.get_symbol_eod_values(data='close') inter_trades = trades.query('trade_type == "inter"') for trade in inter_trades.itertuples(): symbol = trade.symbol date = trade.date rate = trade.trade_rate total_qty = trade.total_qty previous_date, next_date = get_adjacent_dates(index=returns.index, date=date) returns.loc[date:, symbol] = np.log(holdings.loc[date:, symbol] / holdings.loc[date:, symbol].shift(1)) if trade.type == 'buy': returns.loc[date, symbol] = np.log( holdings.loc[date, symbol] / investments.loc[date, symbol]) elif previous_date is not None: returns.loc[date, symbol] = np.log( rate / close.loc[previous_date, symbol]) if total_qty == 0: returns.loc[next_date:, symbol] = np.nan return returns.astype(np.float)
def get_traded_dates(self, start=None, end=None): 'Generate Traded dates for NSE' if Market.__TRADED_DATES_KEY in get_store_keys(Market.__Market_PATH): traded_dates = pd.read_hdf(Market.__Market_PATH, Market.__TRADED_DATES_KEY) else: self.force_load_data('traded_dates') traded_dates = pd.read_hdf(Market.__Market_PATH, Market.__TRADED_DATES_KEY) start = get_date(start, 'str', True) end = get_date(end, 'str', False) traded_dates = traded_dates[start:end] traded_dates['specific_date_count'] = [ i + 1 for i in range(len(traded_dates)) ] return traded_dates
def get_quantity(self, trades=None): ''' Get quantity of symbols based on trades. If trades is none, Local stored trades is used. ''' if trades is None and Zerodha.QUANTITY_KEY in get_store_keys( Zerodha.TRADES_DATA_PATH): quantity = pd.read_hdf(Zerodha.TRADES_DATA_PATH, Zerodha.QUANTITY_KEY) return quantity elif trades is None: trades = self.get_trades(trades_type='all') quantity = pd.DataFrame( 0, index=self.get_traded_dates(start=trades.date.min()).index, columns=trades.symbol.unique()) for trade in trades.itertuples(): symbol = trade.symbol date = trade.date total_qty = trade.total_qty quantity.loc[date:, symbol] = total_qty return quantity
def get_risk_free_rate(self, returns=None, freq=None, start=None, end=None, excess=False): '''Get risk free rate''' if Market.__RISK_FREE_RATE_KEY in get_store_keys(Market.__Market_PATH): risk_free_rate = pd.read_hdf(Market.__Market_PATH, Market.__RISK_FREE_RATE_KEY) else: self.force_load_data('rf') risk_free_rate = pd.read_hdf(Market.__Market_PATH, Market.__RISK_FREE_RATE_KEY) if returns is None: traded_dates = self.get_traded_dates(start, end) returns = pd.DataFrame(0, index=traded_dates.index, columns=['returns']) elif isinstance(returns, pd.Series): returns = pd.DataFrame(returns) else: returns = returns.copy() if freq in ['daily', 'd', None]: risk_free_rate = risk_free_rate['rf_daily'] elif freq in ['monthly', 'm']: risk_free_rate = risk_free_rate['rf_monthly'] elif freq in ['yearly', 'a', 'annual', 'y']: risk_free_rate = risk_free_rate['rf_yearly'] for symbol in returns.columns: returns[symbol] = (returns[symbol] - risk_free_rate) if excess else risk_free_rate return returns
def get_eod_meta(self, eod_data=None, eod_type='symbol'): 'Calculate meta data for EOD Data' if eod_data is None: if eod_type == 'symbol': symbol_meta = self.get_symbol_meta() eod_data_meta = pd.DataFrame( index=symbol_meta.index.copy(), ) if NSE.__SYMBOL_EOD_META_KEY in get_store_keys(NSE.__NSE_DATA_PATH): eod_data_meta = pd.read_hdf(NSE.__NSE_DATA_PATH, NSE.__SYMBOL_EOD_META_KEY) return eod_data_meta elif NSE.__SYMBOL_EOD_DATA_KEY in get_store_keys(NSE.__NSE_DATA_PATH): eod_data = pd.read_hdf(NSE.__NSE_DATA_PATH, NSE.__SYMBOL_EOD_DATA_KEY) eod_data = eod_data.reset_index() else: eod_data_meta['from_date'] = pd.to_datetime('1994-01-01') eod_data_meta['to_date'] = pd.to_datetime('1994-01-01') eod_data_meta['row_count'] = 0 eod_data_meta['missing_count'] = np.inf eod_data_meta['non_traded_dates'] = np.inf eod_data_meta['missing_dates'] = np.nan return eod_data_meta elif eod_type == 'index': index_meta = self.get_index_meta() eod_data_meta = pd.DataFrame( index=index_meta.index.copy(), ) if NSE.__INDEX_EOD_META_KEY in get_store_keys(NSE.__NSE_DATA_PATH): eod_data_meta = pd.read_hdf(NSE.__NSE_DATA_PATH, NSE.__INDEX_EOD_META_KEY) return eod_data_meta elif NSE.__INDEX_EOD_DATA_KEY in get_store_keys(NSE.__NSE_DATA_PATH): eod_data = pd.read_hdf(NSE.__NSE_DATA_PATH, NSE.__INDEX_EOD_DATA_KEY) eod_data = eod_data.reset_index() else: eod_data_meta['from_date'] = pd.to_datetime('1994-01-01') eod_data_meta['to_date'] = pd.to_datetime('1994-01-01') eod_data_meta['row_count'] = 0 eod_data_meta['missing_count'] = np.inf eod_data_meta['non_traded_dates'] = np.inf eod_data_meta['missing_dates'] = np.nan return eod_data_meta else: raise KeyError( 'Wrong eod_type' ) else: if eod_type == 'symbol': symbol_meta = self.get_symbol_meta() eod_data_meta = pd.DataFrame( index=symbol_meta.index.copy(), ) elif eod_type == 'index': index_meta = self.get_index_meta() eod_data_meta = pd.DataFrame( index=index_meta.index.copy(), ) def counts(data): '''Calculate count data''' data = data.set_index('date') name = data.symbol.unique()[0] count_data = pd.Series(name=name) count_data['from_date'] = data.index.min() count_data['to_date'] = data.index.max() count_data['row_count'] = len(data) traded_dates = self.get_traded_dates( start=count_data['from_date'], end=count_data['to_date'] ) missing_dates = traded_dates.index.difference(data.index) count_data['missing_count'] = len(traded_dates) - len(data) count_data['non_traded_dates'] = len(data.query('volume == 0')) count_data['missing_dates'] = missing_dates.tolist() return count_data count_data = eod_data.groupby('symbol').apply(counts) eod_data_meta = eod_data_meta.join(count_data) eod_data_meta['from_date'] = eod_data_meta['from_date'].fillna(datetime(1994, 1, 1)) eod_data_meta['to_date'] = eod_data_meta['to_date'].fillna(datetime(1994, 1, 1)) eod_data_meta['row_count'] = eod_data_meta['row_count'].fillna(0).astype(int) eod_data_meta['missing_count'] = eod_data_meta['missing_count'].fillna(np.inf).astype(np.float) eod_data_meta['non_traded_dates'] = eod_data_meta['non_traded_dates'].fillna(np.inf).astype(np.float) return eod_data_meta
def force_load_data(self, force_load, values=None): ''' Force loading helper method for saving EOD data from NSE Website to local HDFStores ''' if force_load == 'symbol_eod_meta': print('Updating symbol eod metadata') if NSE.__SYMBOL_EOD_DATA_KEY in get_store_keys(NSE.__NSE_DATA_PATH): eod_data = pd.read_hdf(NSE.__NSE_DATA_PATH, NSE.__SYMBOL_EOD_DATA_KEY) eod_data = eod_data.reset_index() else: eod_data = None eod_data_meta = self.get_eod_meta(eod_data, eod_type='symbol') eod_data_meta.to_hdf(NSE.__NSE_DATA_PATH, NSE.__SYMBOL_EOD_META_KEY) elif force_load == 'symbol_eod_data': eod_data_meta = self.get_eod_meta(eod_type='symbol') date_diff = (TODAY - eod_data_meta.to_date).dt.days eod_data_meta = eod_data_meta[ (date_diff >= 5) | (eod_data_meta.row_count == 0) ] # return if less indices need to be refreshed if len(eod_data_meta) < 120: print(eod_data_meta.row_count) return if len(eod_data_meta) > 500: eod_data_meta = eod_data_meta.ix[0:200] print('Fetching Data from NSE website for {0} symbols'.format(len(eod_data_meta))) fresh_eod_data = pd.DataFrame() for symbol in eod_data_meta.itertuples(): eod_data = self.fetch_eod_data( symbol=symbol.Index, start=symbol.to_date, index=False, ) if eod_data.empty: continue else: eod_data = eod_data.reset_index() print( 'Recieved {0} records from NSE for {1} from {2} to {3}'. format(len(eod_data), symbol.Index, eod_data.date.min().date(), eod_data.date.max().date()) ) fresh_eod_data = fresh_eod_data.append(eod_data) if NSE.__SYMBOL_EOD_DATA_KEY in get_store_keys(NSE.__NSE_DATA_PATH): old_eod_data = pd.read_hdf(NSE.__NSE_DATA_PATH, NSE.__SYMBOL_EOD_DATA_KEY) old_eod_data = old_eod_data.reset_index() else: old_eod_data = pd.DataFrame() fresh_eod_data = fresh_eod_data.append(old_eod_data) del old_eod_data fresh_eod_data = fresh_eod_data.drop_duplicates(['symbol', 'date'], keep='last') fresh_eod_data = fresh_eod_data.sort_values(['symbol', 'date']) eod_data_schema = [ 'symbol', 'date', 'prev_close', 'open', 'high', 'low', 'last', 'close', 'vwap', 'trades', 'volume', 'turnover', 'pct_deliverble', 'simple_returns', 'log_returns', 'high_low_spread', 'open_close_spread' ] fresh_eod_data = fresh_eod_data.reset_index()[eod_data_schema] fresh_eod_data = fresh_eod_data.set_index(['symbol', 'date']) fresh_eod_data.to_hdf(NSE.__NSE_DATA_PATH, NSE.__SYMBOL_EOD_DATA_KEY) del fresh_eod_data self.force_load_data('symbol_eod_meta') self.force_load_data('traded_dates') eod_data_meta = self.get_eod_meta(eod_type='symbol') date_diff = (TODAY - eod_data_meta.to_date).dt.days eod_data_meta = eod_data_meta[ (date_diff >= 5) | (eod_data_meta.row_count == 0) ] if len(eod_data_meta) > 20: self.force_load_data('symbol_eod_data') eod_data_columns = [ 'open', 'high', 'low', 'close', 'vwap', 'simple_returns', 'log_returns', 'high_low_spread', 'open_close_spread' ] for column in eod_data_columns: self.force_load_data(force_load='symbol_eod_values', values=column) # clean_file(NSE.__NSE_DATA_PATH) elif force_load == 'symbol_eod_values': print('Generating time series data for {0} from local data'.format(values)) eod_data = self.get_symbol_eod_data(symbol_list='all') data = pd.pivot_table(data=eod_data, index='date', columns='symbol', values=values) data.to_hdf(NSE.__NSE_DATA_PATH, 'symbol_eod_values_{0}'.format(values)) elif force_load == 'index_eod_meta': print('Updating index eod metadata') if NSE.__INDEX_EOD_DATA_KEY in get_store_keys(NSE.__NSE_DATA_PATH): eod_data = pd.read_hdf(NSE.__NSE_DATA_PATH, NSE.__INDEX_EOD_DATA_KEY) eod_data = eod_data.reset_index() else: eod_data = None eod_data_meta = self.get_eod_meta(eod_data, eod_type='index') eod_data_meta.to_hdf(NSE.__NSE_DATA_PATH, NSE.__INDEX_EOD_META_KEY) elif force_load == 'index_eod_data': eod_data_meta = self.get_eod_meta(eod_type='index') index_meta = self.get_index_meta() eod_data_meta = eod_data_meta.join(index_meta.index_code) date_diff = (TODAY - eod_data_meta.to_date).dt.days eod_data_meta = eod_data_meta[ (date_diff >= 5) | (eod_data_meta.row_count == 0) ] eod_data_meta = eod_data_meta.dropna(subset=['index_code']) # return if less indices need to be refreshed if len(eod_data_meta) < 20: return print('Fetching Data from NSE website for {0} indices'.format(len(eod_data_meta))) fresh_eod_data = pd.DataFrame() for index in eod_data_meta.itertuples(): eod_data = self.fetch_eod_data( symbol=index.index_code, start=index.to_date, index=True, ) if eod_data.empty: continue else: eod_data = eod_data.reset_index() eod_data['symbol'] = [index.Index for i in range(len(eod_data))] print( 'Recieved {0} records from NSE for {1} from {2} to {3}'. format(len(eod_data), index.Index, eod_data.date.min().date(), eod_data.date.max().date()) ) fresh_eod_data = fresh_eod_data.append(eod_data) if fresh_eod_data.empty: return if NSE.__INDEX_EOD_DATA_KEY in get_store_keys(NSE.__NSE_DATA_PATH): old_eod_data = pd.read_hdf(NSE.__NSE_DATA_PATH, NSE.__INDEX_EOD_DATA_KEY) old_eod_data = old_eod_data.reset_index() else: old_eod_data = pd.DataFrame() fresh_eod_data = fresh_eod_data.append(old_eod_data) del old_eod_data fresh_eod_data = fresh_eod_data.drop_duplicates(['symbol', 'date'], keep='last') fresh_eod_data = fresh_eod_data.sort_values(['symbol', 'date']) eod_data_schema = [ 'symbol', 'date', 'open', 'high', 'low', 'close', 'volume', 'turnover', 'simple_returns', 'log_returns', 'high_low_spread', 'open_close_spread' ] fresh_eod_data = fresh_eod_data.reset_index()[eod_data_schema] fresh_eod_data = fresh_eod_data.set_index(['symbol', 'date']) fresh_eod_data.to_hdf(NSE.__NSE_DATA_PATH, NSE.__INDEX_EOD_DATA_KEY) del fresh_eod_data self.force_load_data('index_eod_meta') self.force_load_data('traded_dates') eod_data_meta = self.get_eod_meta(eod_type='index') date_diff = (TODAY - eod_data_meta.to_date).dt.days eod_data_meta = eod_data_meta[ (date_diff >= 5) | (eod_data_meta.row_count == 0) ] if len(eod_data_meta) > 5: self.force_load_data('index_eod_data') eod_data_columns = [ 'open', 'high', 'low', 'close', 'simple_returns', 'log_returns', 'high_low_spread', 'open_close_spread' ] for column in eod_data_columns: self.force_load_data(force_load='index_eod_values', values=column) clean_file(NSE.__NSE_DATA_PATH) elif force_load == 'index_eod_values': print('Generating time series data for {0} from local data'.format(values)) eod_data = self.get_index_eod_data(index_list='all') data = pd.pivot_table(data=eod_data, index='date', columns='symbol', values=values) data.to_hdf(NSE.__NSE_DATA_PATH, 'index_eod_values_{0}'.format(values)) elif force_load == 'all': self.force_load_data('traded_dates') self.force_load_data('symbol_eod_data') self.force_load_data('index_eod_data') else: super().force_load_data(force_load)
def force_load_data(self, force_load, values=None): ''' Force loading helper method for saving symbol data from NSE Website to local HDFStores ''' if force_load == 'symbol': print('Loading Symbol Meta data from NSE website') symbol_meta = self.fetch_symbol_meta() if not os.path.isdir(os.path.join(Market.__CURRENT_PATH, 'data')): os.mkdir(os.path.join(Market.__CURRENT_PATH, 'data')) symbol_meta.to_hdf(Market.__Market_PATH, Market.__SYMBOL_META_KEY) elif force_load == 'index': print('Loading Index components data from NSE website') symbol_meta = self.get_symbol_meta() symbol_meta['industry'] = np.nan session = requests.session() url = 'https://www.nseindia.com/products/content/equities/indices/historical_index_data.htm' response = session.get(url) if response.status_code != 200: print('Unable to load base url data due to {0} status code'. format(response.status_code)) return soup = BeautifulSoup(response.text, 'html.parser') index_meta = pd.DataFrame(columns=[ 'index_code', 'index_name', 'index_type', 'url', 'number_of_symbols' ]) index_type_group = soup.find('select', {'id': 'indexType'}) index_type_group = index_type_group.find_all('optgroup') for index_type in index_type_group: index_type_label = index_type['label'].strip() index_type_label = index_type_label.split(' ') index_type_label = '_'.join(index_type_label[0:-1]).lower() index_type_label = index_type_label.replace( 'strategy', 'strategic') index_list = index_type.find_all('option') for index in index_list: index_code = index['value'].strip() index_code_asindex = index_code.lower().replace( ' ', '_').replace('%', '') index_name = index.text.strip() index_meta.loc[index_code_asindex] = [ index_code, index_name, index_type_label, np.nan, np.nan ] if index_type_label != 'broad_market': index_components_data = pd.DataFrame( index=symbol_meta.index) info_url = 'https://www.nseindia.com/products/content/equities/indices/{0}_indices.htm' response = session.get(info_url.format(index_type_label)) if response.status_code != 200: print( 'Unable to load url data for {0} index type due to {1} status code' .format(index_type_label, response.status_code)) continue soup = BeautifulSoup(response.text, 'html.parser') content = soup.find('div', {'class': 'abt_equities_content'}) download_links = content.find_all('a', {'class': 'download'}) for link in download_links: text = link.text text = re.sub(r'\r\n', ' ', text) text = re.sub(' +', ' ', text) text = text[text.find('NIFTY'):text.find('Index')] if text[-3:] == 'csv': text = text[text.find('NIFTY'):text.find('stocks')] if text[-2:] == 'cs': text = text[text.find('NIFTY'):text.find('Indices' )] text = text.lower().strip() link = link['href'] link = 'https://www.nseindia.com' + link if link[-3:] == 'csv': try: index = index_meta[index_meta.index_name.str. lower() == text].index[0] except: warnings.warn( '{0} index not found in index_meta table'. format(text)) continue response = session.get(link) if response.status_code != 200: print( 'Unable to fetch csv data for {0} index due to {1} status code' .format(index, response.status_code)) continue index_components = pd.read_csv(StringIO( response.text), index_col='Symbol') index_components.index = index_components.index.str.lower( ) symbol_meta['industry'] = symbol_meta[ 'industry'].fillna( index_components['Industry']) index_meta.loc[index, 'url'] = link index_meta.loc[index, 'number_of_symbols'] = len( index_components) index_components = pd.Series( True, index=index_components.index, name=index) index_components_data = index_components_data.join( index_components) print('Component data loaded successfully for {0}'. format(index)) elif index_type_label == 'broad_market': index_components_data = pd.DataFrame( index=symbol_meta.index) info_url = 'https://www.nseindia.com/products/content/equities/indices/broad_indices.htm' response = session.get(info_url.format(index_type_label)) soup = BeautifulSoup(response.text, 'html.parser') content = soup.find('div', {'class': 'content'}) download_links = content.find_all('a') for link in download_links: text = link.text text = re.sub(r'\r\n', ' ', text) text = re.sub(' +', ' ', text) text = text[text.find('NIFTY'):text.find('Index')] if text[-3:] == 'csv': text = text[text.find('NIFTY'):text.find('stocks')] if text[-2:] == 'cs': text = text[text.find('NIFTY'):text.find('Indices' )] text = text.lower().strip() link = link['href'] link = 'https://www.nseindia.com/products/content/equities/indices/' + link try: index = index_meta[index_meta.index_name.str.lower( ) == text].index[0] except: warnings.warn( '{0} index not found in index_meta table'. format(text)) continue response = session.get(link) soup = BeautifulSoup(response.text, 'html.parser') link_list = soup.find_all('a', {'class': 'download'}) for link in link_list: link = link['href'] if link[-3:] == 'csv': csv_link = 'https://www.nseindia.com' + link break response = session.get(csv_link) if response.status_code != 200: print( 'Unable to fetch csv data for {0} index due to {1} status code' .format(index, response.status_code)) continue index_components = pd.read_csv(StringIO(response.text), index_col='Symbol') index_components.index = index_components.index.str.lower( ) symbol_meta['industry'] = symbol_meta[ 'industry'].fillna(index_components['Industry']) index_meta.loc[index, 'url'] = link index_meta.loc[index, 'number_of_symbols'] = len( index_components) index_components = pd.Series( True, index=index_components.index, name=index) index_components_data = index_components_data.join( index_components) print('Component data loaded successfully for {0}'. format(index)) index_components_data = index_components_data.fillna( False).astype(bool) hdf_key = index_type_label + '_components' index_components_data.to_hdf(Market.__Market_PATH, hdf_key) symbol_meta['name_of_company'] = symbol_meta[ 'name_of_company'].astype(str) symbol_meta['isin_number'] = symbol_meta['isin_number'].astype(str) symbol_meta['industry'] = symbol_meta['industry'].fillna('unknown') symbol_meta['industry'] = symbol_meta['industry'].str.lower( ).str.replace(' ', '_') symbol_meta.to_hdf(Market.__Market_PATH, Market.__SYMBOL_META_KEY) index_meta = index_meta.astype(str) index_meta.to_hdf(Market.__Market_PATH, Market.__INDEX_META_KEY) elif force_load == 'traded_dates': print('Updating traded dates') from nse import NSE if (('symbol_eod_values_close' in get_store_keys( NSE.NSE_DATA_PATH)) and ('index_eod_values_close' in get_store_keys( NSE.NSE_DATA_PATH))): nse = NSE(symbol_list='infy', index='nifty_50') symbol_returns = nse.get_symbol_eod_values() index_returns = nse.get_index_eod_values() traded_dates_symbol = symbol_returns.index traded_dates_index = index_returns.index traded_dates = traded_dates_symbol.union(traded_dates_index) else: traded_dates = pd.read_hdf(Market.__CONSTANTS_PATH, Market.__TRADED_DATES_KEY) traded_dates = traded_dates.index traded_dates = pd.DataFrame(index=traded_dates) traded_dates['date'] = traded_dates.index traded_dates['date_count'] = [ i + 1 for i in range(len(traded_dates)) ] traded_dates['day'] = traded_dates.date.dt.day traded_dates['month'] = traded_dates.date.dt.month traded_dates['year'] = traded_dates.date.dt.year traded_dates['day_of_week'] = traded_dates.date.dt.dayofweek traded_dates['day_of_year'] = traded_dates.date.dt.dayofyear traded_dates['week_of_year'] = traded_dates.date.dt.week traded_dates = traded_dates.drop(['date'], axis=1) traded_dates.to_hdf(Market.__Market_PATH, Market.__TRADED_DATES_KEY) elif force_load == 'rf' or force_load == 'risk_free_rate': intervals = ['Daily', 'Monthly', 'Yearly'] date_formats = ['%Y%m%d', '%Y%m', '%Y'] month_end = TODAY + MonthEnd(-1) month_end = datetime.strftime(month_end, format='%Y%m%d') link = 'http://www.iimahd.ernet.in/~iffm/Indian-Fama-French-Momentum/DATA/{0}_FourFactors_and_Market_Returns_{1}.csv' session = requests.session() traded_dates = self.get_traded_dates() fama_french_rf = pd.DataFrame(index=traded_dates.index) for interval, date_format in zip(intervals, date_formats): interval_link = link.format(month_end, interval) response = session.get(interval_link) interval_fama_french = pd.read_csv(StringIO(response.text)) rename_columns(interval_fama_french) interval_fama_french.columns.values[0] = 'date' interval_fama_french = interval_fama_french[[ 'date', 'rf_pct_' ]] interval_fama_french.columns = [ 'date', 'rf_{0}'.format(interval.lower()) ] interval_fama_french.date = pd.to_datetime( interval_fama_french.date, format=date_format) interval_fama_french['rf_{0}'.format( interval.lower())] = interval_fama_french['rf_{0}'.format( interval.lower())] / 100 interval_fama_french = interval_fama_french.set_index('date') fama_french_rf = fama_french_rf.join(interval_fama_french, how='outer') fama_french_rf = fama_french_rf.ffill() fama_french_rf.to_hdf(Market.__Market_PATH, Market.__RISK_FREE_RATE_KEY) elif force_load == 'all': value = values self.force_load_data('symbol') self.force_load_data('index') self.force_load_data('traded_dates') self.force_load_data('rf') return value