class AShareStocksStorage(DataframeStorage): file_writer = FileWriter(gs.A_SHARE_STOCKS_PATH, DataframeFileSaver()) def __init__(self, data_writer=file_writer, fields_map=ashare_stocks_fields_map): super(AShareStocksStorage, self).__init__(data_writer=data_writer, fields_map=fields_map) def load_data(self): mt = ts.Master() # Fetch data from tushare df = mt.SecID(assetClass='E', field=','.join(self.fields_map.keys())) # Change dtype of ticker to string ticker_field = self.fields_map.keys()[0] df[ticker_field] = df[ticker_field].astype('string') # Filter stocks not in Shanghai exchagne and Shenzhen exchange exchange_field = self.fields_map.keys()[2] df = df[df[exchange_field].isin([gs.SHANGHAI_EXCHANGE, gs.SHENZHEN_EXCHANGE])] # Recount index from 1 to len(df) + 1 df.index = Series(range(1, len(df) + 1)) self.data = df
class AShareDailyTradeWriter(): def __init__(self): self.file_writer = FileWriter(gs.A_SHARE_DAILY_TRADE_PATH, DataframeFileSaver()) self.ashare_stocks = AShareStocks() def load_Internet_data(self): mt = ts.Market() data = None for sid in self.ashare_stocks.get_all_sids(): df = mt.MktEqud(ticker=sid, field=','.join(FETCH_FIELDS)) if df is not None and not df.empty: df[FETCH_FIELDS[0]] = df[FETCH_FIELDS[0]].astype('string') df[FETCH_FIELDS[0]] = sid if data is None or data.empty: if df is not None and not df.empty: data = df elif df is not None and not df.empty: data = data.append(df, ignore_index=True) print 'sid: ' + str(sid) + ' completed, size ' + str(len(data)) data.rename(columns={ FETCH_FIELDS[0]: TICKER_FIELD, FETCH_FIELDS[1]: TRADE_DATE_FIELD, FETCH_FIELDS[2]: PRE_CLOSE_PRICE_FIELD, FETCH_FIELDS[3]: OPEN_PRICE_FIELD, FETCH_FIELDS[4]: HIGHEST_PRICE_FIELD, FETCH_FIELDS[5]: LOWEST_PRICE_FIELD, FETCH_FIELDS[6]: CLOSE_PRICE_FIELD, FETCH_FIELDS[7]: TURNOVER_VOL_FIELD, FETCH_FIELDS[8]: TURNOVER_VALUE_FIELD, FETCH_FIELDS[9]: DEAL_AMOUNT_FIELD, FETCH_FIELDS[10]: TURNOVER_RATE_FIELD, FETCH_FIELDS[11]: NEG_MARKET_VALUE_FIELD, FETCH_FIELDS[12]: MARKET_VALUE_FIELD, FETCH_FIELDS[13]: IS_OPEN_FIELD, FETCH_FIELDS[14]: PE_FIELD, FETCH_FIELDS[15]: SUPPOSED_PE_FIELD, FETCH_FIELDS[16]: SUPPOSED_PB_FIELD }, inplace=True) return data def write(self): self.file_writer.write(self.load_Internet_data())
class AShareStocksAdjWriter(object): def __init__(self): self.file_writer = FileWriter(gs.A_SHARE_STOCKS_ADJUST_PATH, DataframeFileSaver()) self.ashare_stocks = AShareStocks() def load_Internet_data(self): mt = ts.Market() data = None for sid in self.ashare_stocks.get_all_sids(): df = mt.MktAdjf(ticker=sid, field=','.join(FETCH_FIELDS)) if df is not None and not df.empty: df[FETCH_FIELDS[0]] = df[FETCH_FIELDS[0]].astype('string') # df.astype('string') df[FETCH_FIELDS[0]] = sid if data is None or data.empty: if df is not None and not df.empty: data = df elif df is not None and not df.empty: data = data.append(df, ignore_index=True) data.rename(columns={ FETCH_FIELDS[0]: TICKER_FIELD, FETCH_FIELDS[1]: DIV_DATE_FIELD, FETCH_FIELDS[2]: PER_CASH_FIELD, FETCH_FIELDS[3]: PER_SHARE_DIV_FIELD, FETCH_FIELDS[4]: PER_SHARE_TRANS_FIELD, FETCH_FIELDS[5]: ALLOTMENT_RATIO_FIELD, FETCH_FIELDS[6]: ALLOTMENT_PRICE_FIELD, FETCH_FIELDS[7]: ADJ_FACTOR_FIELD, FETCH_FIELDS[8]: ACCUM_ADJ_FACTOR_FIELD }, inplace=True) return data def write(self): self.file_writer.write(self.load_Internet_data())
class AShareStocksWriter(object): def __init__(self): self.file_writer = FileWriter(gs.A_SHARE_STOCKS_PATH, DataframeFileSaver()) def load_Internet_data(self): mt = ts.Master() df = mt.SecID(assetClass='E', field=','.join(FETCH_FIELDS)) df[FETCH_FIELDS[0]] = df[FETCH_FIELDS[0]].astype('string') df.rename(columns={ FETCH_FIELDS[0]: TICKER_FIELD, FETCH_FIELDS[1]: NAME_FIELD, FETCH_FIELDS[2]: EXCHANGE_FIELD, FETCH_FIELDS[3]: STATUS_FIELD, FETCH_FIELDS[4]: LIST_DATE_FIELD }, inplace=True) df = df[df[EXCHANGE_FIELD].isin( [gs.SHANGHAI_EXCHANGE, gs.SHENZHEN_EXCHANGE])] df.index = Series(range(1, len(df) + 1)) return df def write(self): self.file_writer.write(self.load_Internet_data())
def analyze(self): print self.noon_urls data_list = [] for raw_data in self.noon_urls: date = raw_data[0] url = raw_data[1] tuple_list = self.analyze_noon_notices(url) for item_tuple in tuple_list: data_item = [date, item_tuple[0], item_tuple[1], item_tuple[2], item_tuple[3]] data_list.append(data_item) dtypes = np.dtype([('date', 'string'), ('sid', 'string'), ('name', 'string'), ('title', 'string'), ('content', 'string')]) df = pd.DataFrame(data=data_list, columns=['date', 'sid', 'name', 'title', 'content'], dtype='string') print df notice_writer = FileWriter(file_path=conf.NOTICE_NOON_DATA_PATH, file_saver=DataframeFileSaver()) notice_writer.write(df)
def __init__(self): self.file_writer = FileWriter(gs.A_SHARE_STOCKS_ADJUST_PATH, DataframeFileSaver()) self.ashare_stocks = AShareStocks()
def __init__(self): self.file_writer = FileWriter(gs.A_SHARE_DAILY_TRADE_PATH, DataframeFileSaver()) self.ashare_stocks = AShareStocks()
def __init__(self): self.file_writer = FileWriter(gs.A_SHARE_STOCKS_PATH, DataframeFileSaver())