def test_get_stock_fuquan_kdata(): # 有当前价,前复权,后复权 df = quote.get_kdata('600977', the_date='2016-08-09') # 从新浪获取的后复权价格 df1 = quote.get_kdata('600977', the_date='2016-08-09', fuquan='hfq', source='sina') # 四舍五入取两位小数 # 后复权 和 新浪计算的一致 assert round(df.loc['2016-08-09', 'hfqClose'], 2) == round(df1.loc['2016-08-09', 'close'], 2) # 从新浪获取的后复权价格 df_hfq = quote.get_kdata('600977', the_date='2016-08-09', fuquan='hfq', source='sina') latest_kdata = quote.get_kdata('600977', the_date='2018-03-29', fuquan='hfq', source='sina') # 四舍五入取两位小数 # 前复权 和 新浪计算的一致 assert round(df.loc['2016-08-09', 'qfqClose'], 2) == round( df_hfq.loc['2016-08-09', 'close'] / latest_kdata.loc['2018-03-29', 'factor'], 2)
def on_event(self, event_item): self.logger.debug(event_item) if not self.last_date or not is_same_date(self.last_date, self.current_time): self.last_date = to_timestamp( event_item['timestamp']) - timedelta(days=1) self.last_kdata = get_kdata(self.security_item, the_date=to_time_str(self.last_date)) if self.last_kdata is None: fetch_kdata(exchange_str=self.security_item['exchange']) self.last_kdata = get_kdata(self.security_item, the_date=to_time_str( self.last_date)) if self.last_kdata is not None: self.last_close = self.last_kdata.loc[ to_time_str(self.last_date), 'close'] else: self.logger.exception("could not get last close for:{}".format( self.last_date)) self.update_today_triggered() change_pct = (event_item['price'] - self.last_close) / self.last_close self.logger.info( "{} last day close is:{},now price is:{},the change_pct is:{}". format(self.security_item['id'], self.last_close, event_item['price'], change_pct)) self.check_subscription(current_price=event_item['price'], change_pct=change_pct)
def newhighergenerator(start_date,fuquan='qfq',source='163',period=20): baseindex = 'index_sh_000001' df = quote.get_kdata(baseindex,start_date=start_date,source=source) #df = quote.get_kdata('index_sh_000001',start_date='2017-05-10',source='163') dh = pd.DataFrame(0,index=df.index,columns=['total']) #暂时只添加total,后续需要添加各个市场total #stocklist = quote.get_security_list(security_type='stock', mode='simple') stocklist = quote.get_security_list(security_type='stock',start='600000',end='600030', mode='simple') for _, item in stocklist.iterrows(): print("caculating {}".format(item.id)) for ts in dh.index: if((ts-datetime.datetime.strptime(item.listDate,'%Y-%m-%d')).days<period): dh.at[ts,item.id] = 0 else: ds = quote.get_kdata(item.id,fuquan=fuquan) indexlist = list(ds['timestamp']) tsstr = ts.strftime('%Y-%m-%d') if(tsstr in indexlist): pos = list(ds['timestamp']).index(ts.strftime('%Y-%m-%d')) if (ds['close'][pos] >= max(ds['close'][pos -period+1 :pos + 1])): dh.at[ts, item.id] = 1 else: dh.at[ts, item.id] = 0 else: dh.at[ts,item.id] = 0 df['total'] = dh.apply(lambda x:x.sum(),axis=1) df['index_c'] = df['close'] dh.to_csv('newhigher.csv') return True
def on_tick(self, tick_item): # 只计算日内,超短线 if not is_same_date(self.current_time, tick_item['timestamp']): self.today_traded = False if not self.df.empty: self.df = pd.DataFrame() else: if not self.today_traded: if not self.df.empty and self.df.index.size == 1: self.df = index_df_with_time(self.df) self.df = self.df.append(tick_item, ignore_index=True) # 14:50时,计算当日资金流 if ("14:50:" in tick_item['timestamp']): money_flow = (self.df['turnover'] * self.df['direction']).sum() money_all = (self.df['turnover'] * abs(self.df['direction'])).sum() # 净流入 if money_flow > 0 and not self.account_service.get_position(tick_item['securityId']): # 使用用后复权价格 factor = \ get_kdata(tick_item['securityId'], the_date=pd.Timestamp(tick_item['timestamp']).date())[ 'factor'] self.buy(tick_item['securityId'], current_price=tick_item['price'] * factor) # 净流出 elif money_flow < 0 and self.account_service.get_position(tick_item['securityId']): # 使用用后复权价格 factor = \ get_kdata(tick_item['securityId'], the_date=pd.Timestamp(tick_item['timestamp']).date())[ 'factor'] self.sell(tick_item['securityId'], current_price=tick_item['price'] * factor) self.today_traded = True
def test_get_cryptocurrency_kdata(): df = quote.get_kdata('BTC-USD', exchange='gdax') assert not df.empty assert '2017-09-14' in df.index assert df.loc['2017-09-14', 'changePct'] < -0.18 df = quote.get_kdata('BTC-JPY', exchange='kraken') assert not df.empty assert '2017-09-14' in df.index assert df.loc['2017-09-14', 'changePct'] < -0.18
def stock_kdata_to_es(start='000001', end='666666', force=False): for _, security_item in get_security_list(start=start, end=end).iterrows(): # 创建索引 index_name = get_es_kdata_index(security_item['type'], security_item['exchange']) es_index_mapping(index_name, StockKData) start_date = None if not force: query = {"term": {"securityId": ""}} query["term"]["securityId"] = security_item["id"] latest_record = es_get_latest_record(index_name, query=query) logger.info("latest_record:{}".format(latest_record)) if latest_record: start_date = latest_record['timestamp'] actions = [] for _, kdata_item in get_kdata(security_item, start_date=start_date).iterrows(): if start_date and is_same_date(start_date, kdata_item['timestamp']): continue try: id = '{}_{}'.format(kdata_item['securityId'], kdata_item['timestamp']) kdata = StockKData(meta={'id': id}, id=id) fill_doc_type(kdata, json.loads(kdata_item.to_json())) # kdata.save(index=index_name) actions.append(kdata.to_dict(include_meta=True)) except Exception as e: logger.warn("wrong KdataDay:{},error:{}", kdata_item, e) if actions: resp = elasticsearch.helpers.bulk(es, actions) logger.info(resp)
def stock_kdata_to_es(start='000001', end='666666', codes=US_STOCK_CODES, force=False): for _, security_item in get_security_list(start=start, end=end, exchanges=['sh', 'sz', 'nasdaq'], codes=codes).iterrows(): # 创建索引 index_name = get_es_kdata_index(security_item['type'], security_item['exchange']) es_index_mapping(index_name, StockKData) start_date = None if not force: query = { "term": {"securityId": ""} } query["term"]["securityId"] = security_item["id"] latest_record = es_get_latest_record(index_name, query=query) logger.info("latest_record:{}".format(latest_record)) if latest_record: start_date = latest_record['timestamp'] actions = [] for _, kdata_item in get_kdata(security_item, start_date=start_date).iterrows(): if start_date and is_same_date(start_date, kdata_item['timestamp']): continue try: id = '{}_{}'.format(kdata_item['securityId'], kdata_item['timestamp']) kdata = StockKData(meta={'id': id}, id=id) kdata.meta['index'] = index_name fill_doc_type(kdata, json.loads(kdata_item.to_json())) # kdata.save(index=index_name) actions.append(kdata.to_dict(include_meta=True)) except Exception as e: logger.warn("wrong KdataDay:{},error:{}", kdata_item, e) if actions: resp = elasticsearch.helpers.bulk(es, actions) logger.info(resp)
def stock_kdata_to_es(start='000001', end='666666', force=False): for _, security_item in get_security_list(start=start, end=end).iterrows(): # 创建索引 index_name = get_es_kdata_index(security_item['id']) es_index_mapping(index_name, StockKData) start_date = None if not force: latest_record = es_get_latest_record(index_name) logger.info("latest_record:{}".format(latest_record)) if latest_record: start_date = latest_record['timestamp'] for _, kdata_item in get_kdata(security_item, start_date=start_date).iterrows(): if start_date and is_same_date(start_date, kdata_item['timestamp']): continue try: id = '{}_{}'.format(kdata_item['securityId'], kdata_item['timestamp']) kdata = StockKData(meta={'id': id}, id=id) fill_doc_type(kdata, json.loads(kdata_item.to_json())) kdata.save(index=index_name) except Exception as e: logger.warn("wrong KdataDay:{},error:{}", kdata_item, e)
def ema(security_id, start, end, level='day', fuquan='bfq', source='sina', window=12, col=['close'], pre_count=0): df = quote.get_kdata(security_id, fuquan=fuquan, source=source, level=level) # 向前补全计算window df1 = df[:start] df2 = df1[-window - pre_count + 1:] adjust_start = df2.index[0] df_result = df.loc[adjust_start:end, col].ewm(span=window, adjust=False).mean() if pre_count == 0: df_result = df_result[start:end] else: df_result = df_result[window - 1:] df_result.columns = ["{}_ema{}".format(item, window) for item in col] return df_result
def save_account(self, timestamp, trading_close=False): self.account_lock.acquire() self.account = self.account.copy_for_save(trading_close=trading_close) self.account.timestamp = timestamp self.account.allValue = 0 for position in self.account.positions: # 对于T+1的,下个交易日all available if trading_close and position.tradingT == 1: position.availableAmount = position.amount df = get_kdata(position.securityId, pd.Timestamp(timestamp).date(), source='sina', fuquan=self.stock_fuquan) if len(df) > 0: position.value = position.amount * df['close'] self.account.allValue += position.value if position.amount == 0: self.account.positions.remove(position) self.account.allValue += self.account.cash self.account.save(index='account') self.account_lock.release()
def crawl_index_quote(): for _, security_item in get_security_list(security_type='index').iterrows(): # 抓取日K线 logger.info("{} get index kdata start".format(security_item['code'])) start_date = get_latest_download_trading_date(security_item, source='163') end_date = pd.Timestamp.today() if start_date > end_date: logger.info("{} kdata is ok".format(security_item['code'])) else: process_crawl(StockKdataSpider163, {"security_item": security_item, "start_date": start_date, "end_date": end_date}) logger.info("{} get index kdata from 163 end".format(security_item['code'])) # 获取市场概况数据[上海,深圳,中小板,创业板] if security_item['id'] in ['index_sh_000001', 'index_sz_399106', 'index_sz_399005', 'index_sz_399006']: # if security_item['id'] in ['index_sz_399106', 'index_sz_399005', 'index_sz_399006']: df = get_kdata(security_item=security_item) df = df[df['turnoverRate'].isna() | df['tCap'].isna() | df['mCap'].isna() | df[ 'pe'].isna()] if not df.empty: dates = df.index.strftime('%Y-%m-%d').tolist() # if security_item['id'] == 'index_sz_399106': # dates = [the_date for the_date in dates if # pd.Timestamp(the_date).date().year >= 2018] if dates: process_crawl(StockSummarySpider, {"security_item": security_item, "the_dates": dates})
def ema(security_item, start_date, end_date, level='day', fuquan='qfq', source='163', window=12, col=['close'], return_all=False, return_col=True): """ calculate ema. Parameters ---------- security_item : SecurityItem or str the security item,id or code start_date : TimeStamp str or TimeStamp start date end_date : TimeStamp str or TimeStamp end date fuquan : str {"qfq","hfq","bfq"},default:"qfq" source : str the data source,{'163','sina'},default: '163' level : str or int the kdata level,{1,5,15,30,60,'day','week','month'},default : 'day' window : int the ma window,default : 12 col : list the column for calculating,['close', 'volume', 'turnover'],default:['close'] return_all : bool whether return all the kdata values,default:False return_col : bool whether return the calculating col too,default:True Returns ------- DataFrame """ df = quote.get_kdata(security_item, fuquan=fuquan, start_date=start_date, end_date=end_date, source=source, level=level) df_col = df.loc[:, col] df_result = df_col.ewm(span=window, adjust=False, min_periods=window).mean() df_result.columns = ["{}_ema{}".format(item, window) for item in col] if return_all: df_result = pd.concat([df, df_result], axis=1) elif return_col: df_result = pd.concat([df_col, df_result], axis=1) return df_result
def download_tick(self, response): content_type_header = response.headers.get('content-type', None) if content_type_header.decode( "utf-8" ) == 'application/vnd.ms-excel' or "当天没有数据" in response.body.decode( 'GB2312'): trading_date = response.meta['trading_date'] security_item = response.meta['item'] if content_type_header.decode( "utf-8") == 'application/vnd.ms-excel': content = response.body else: kdata_df = get_kdata(security_item, the_date=trading_date) if trading_date in kdata_df.index: json_data = kdata_df.loc[trading_date, :] content = kdata_to_tick(json_data) self.logger.info( "{} {} generate tick from kdata {}".format( security_item['code'], trading_date, content)) content = content.encode('GB2312') sina_tick_to_csv(security_item, io.BytesIO(content), trading_date) else: self.logger.exception( "get tick error:url={} content type={} body={}".format( response.url, content_type_header, response.body))
def test_get_stock_kdata(): df = quote.get_kdata('600977') assert len(df.index) > 0 df = quote.get_kdata('600977', the_date='2018-03-29') assert '2018-03-29' in df.index df = quote.get_kdata('600977', start_date='2016-08-09', end_date='20180329') assert '2016-08-09' in df.index assert '20180329' in df.index assert df.loc['2016-08-09', 'factor'] == 1 assert df.loc['20180329', 'factor'] > 1 df = quote.get_kdata('AAPL', start_date='2016-08-09', end_date='20180329') assert not df.empty
def kdata_to_kafka(fuquan): for _, security_item in get_security_list().iterrows(): for _, kdata_item in get_kdata(security_item, source='sina', fuquan=fuquan).iterrows(): the_json = kdata_item.to_json(force_ascii=False) producer.send(get_kafka_kdata_topic(security_item['id'], fuquan), bytes(the_json, encoding='utf8'), timestamp_ms=int(datetime.datetime.strptime(kdata_item['timestamp'], TIME_FORMAT_DAY).timestamp())) logger.debug("kdata_to_kafka {}".format(the_json))
def _kdata_to_kafka(security_item, fuquan='hfq'): security_item = to_security_item(security_item) for _, kdata_item in get_kdata(security_item, fuquan=fuquan).iterrows(): the_json = kdata_item.to_json(force_ascii=False) producer.send(get_kafka_kdata_topic(security_item['id'], fuquan), bytes(the_json, encoding='utf8'), timestamp_ms=int(datetime.datetime.strptime(kdata_item['timestamp'], TIME_FORMAT_DAY).timestamp())) logger.debug("kdata_to_kafka {}".format(the_json))
def ma(security_id, start, end, level='day', fuquan='bfq', source='sina', window=5, col=['close', 'volume', 'turnover']): df = quote.get_kdata(security_id, fuquan=fuquan, source=source, level=level) # 向前补全计算window df1 = df[:start] df2 = df1[-window + 1:] adjust_start = df2.index[0] df_result = df.loc[adjust_start:end, col].rolling(window=window, min_periods=0).mean()[start:end] df_result.columns = ["{}_ma{}".format(item, window) for item in col] return df_result
def kdata_to_es(start=None, end=None, security_type='stock', exchanges=['sh', 'sz'], force=False): if security_type == 'stock': doc_type = StockKData elif security_type == 'index': doc_type = IndexKData elif security_type == 'cryptocurrency': doc_type = CryptoCurrencyKData for _, security_item in get_security_list(security_type=security_type, exchanges=exchanges, start=start, end=end).iterrows(): # 创建索引 index_name = get_es_kdata_index(security_item['type'], security_item['exchange']) es_index_mapping(index_name, doc_type) start_date = None if not force: query = {"term": {"securityId": ""}} query["term"]["securityId"] = security_item["id"] latest_record = es_get_latest_record(index_name, query=query) logger.info("latest_record:{}".format(latest_record)) if latest_record: start_date = latest_record['timestamp'] actions = [] df_kdata = get_kdata(security_item, start_date=start_date) for _, kdata_item in df_kdata.iterrows(): if start_date and is_same_date(start_date, kdata_item['timestamp']): continue try: id = '{}_{}'.format(kdata_item['securityId'], kdata_item['timestamp']) kdata = doc_type(meta={'id': id}, id=id) kdata.meta['index'] = index_name kdata_json = json.loads(kdata_item.to_json()) fill_doc_type(kdata, kdata_json) # kdata.save(index=index_name) actions.append(kdata.to_dict(include_meta=True)) except Exception as e: logger.warn("wrong KdataDay:{},error:{}", kdata_item, e) if actions: resp = elasticsearch.helpers.bulk(es_client, actions) logger.info(resp)
def on_time_elapsed(self): super().on_time_elapsed() current_kdata = quote.get_kdata(security_item=self.security_code, the_date=self.current_time, fuquan='hfq', source='sina') if len(current_kdata) > 0: # macd为正 if self.macd.loc[self.current_time, 'macd'] > 0 and not self.account_service.get_position( self.security_code): self.buy(security_id=self.security_code, current_price=current_kdata['close'], pct=1.0) # macd为负 elif self.macd.loc[self.current_time, 'macd'] < 0 and self.account_service.get_position(self.security_code): self.sell(security_id=self.security_code, current_price=current_kdata['close'], pct=1.0)
def download_tick(self, response): content_type_header = response.headers.get('content-type', None) if content_type_header.decode("utf-8") == 'application/vnd.ms-excel' or "当天没有数据" in response.body.decode( 'GB2312'): trading_date = response.meta['trading_date'] security_item = response.meta['item'] if content_type_header.decode("utf-8") == 'application/vnd.ms-excel': content = response.body else: kdata_json = get_kdata(security_item, trading_date).to_json() content = kdata_to_tick(kdata_json) self.logger.info("{} {} generate tick from kdata {}", security_item['code'], trading_date, content) content = content.encode('GB2312') sina_tick_to_csv(security_item, io.BytesIO(content), trading_date) else: self.logger.error( "get tick error:url={} content type={} body={}".format(response.url, content_type_header, response.body))
def start_requests(self): self.security_item = self.settings.get("security_item") self.current_df = get_kdata(security_item=self.security_item, start_date=self.security_item['listDate']) the_dates = self.settings.get("the_dates") # 上海市场概况放在 上证指数 if self.security_item['id'] == 'index_sh_000001': for the_date in the_dates: yield Request( url= 'http://query.sse.com.cn/marketdata/tradedata/queryTradingByProdTypeData.do?jsonCallBack=jsonpCallback30731&searchDate={}&prodType=gp&_=1515717065511' .format(the_date), headers=DEFAULT_SH_SUMMARY_HEADER, meta={'search_date': the_date}, callback=self.download_sh_summary) # 深圳市场概况放在 深证综指 elif self.security_item['id'] == 'index_sz_399106': for the_date in the_dates: if pd.Timestamp(the_date).date().year >= 2005: yield Request( url= 'http://www.szse.cn/szseWeb/ShowReport.szse?SHOWTYPE=excel&CATALOGID=1803&txtQueryDate={}&ENCODE=1&TABKEY=tab1' .format(the_date), meta={'search_date': the_date}, callback=self.download_sz_summary) # 中小板 elif self.security_item['id'] == 'index_sz_399005': for the_date in the_dates: yield Request( url= 'http://www.szse.cn/szseWeb/ShowReport.szse?SHOWTYPE=excel&CATALOGID=1803&txtQueryDate={}&ENCODE=1&TABKEY=tab3' .format(the_date), meta={'search_date': the_date}, callback=self.download_sz_summary) # 创业板 elif self.security_item['id'] == 'index_sz_399006': for the_date in the_dates: yield Request( url= 'http://www.szse.cn/szseWeb/ShowReport.szse?SHOWTYPE=excel&CATALOGID=1803&txtQueryDate={}&ENCODE=1&TABKEY=tab4' .format(the_date), meta={'search_date': the_date}, callback=self.download_sz_summary)
def start_requests(self): self.security_item = self.settings.get("security_item") self.current_df = get_kdata(security_item=self.security_item) the_dates = self.settings.get("the_dates") # 上海市场概况放在 上证指数 if self.security_item['id'] == 'index_sh_000001': for the_date in the_dates: yield Request( url='http://query.sse.com.cn/marketdata/tradedata/queryTradingByProdTypeData.do?jsonCallBack=jsonpCallback30731&searchDate={}&prodType=gp&_=1515717065511'.format( the_date), headers=DEFAULT_SH_SUMMARY_HEADER, meta={'search_date': the_date}, callback=self.download_sh_summary) # 深圳市场概况放在 深证综指 elif self.security_item['id'] == 'index_sz_399106': for the_date in the_dates: if pd.Timestamp(the_date).date().year >= 2005: yield Request( url='http://www.szse.cn/szseWeb/ShowReport.szse?SHOWTYPE=excel&CATALOGID=1803&txtQueryDate={}&ENCODE=1&TABKEY=tab1'.format( the_date), meta={'search_date': the_date}, callback=self.download_sz_summary) # 中小板 elif self.security_item['id'] == 'index_sz_399005': for the_date in the_dates: yield Request( url='http://www.szse.cn/szseWeb/ShowReport.szse?SHOWTYPE=excel&CATALOGID=1803&txtQueryDate={}&ENCODE=1&TABKEY=tab3'.format( the_date), meta={'search_date': the_date}, callback=self.download_sz_summary) # 创业板 elif self.security_item['id'] == 'index_sz_399006': for the_date in the_dates: yield Request( url='http://www.szse.cn/szseWeb/ShowReport.szse?SHOWTYPE=excel&CATALOGID=1803&txtQueryDate={}&ENCODE=1&TABKEY=tab4'.format( the_date), meta={'search_date': the_date}, callback=self.download_sz_summary)
def test_get_future_kdata(): df = quote.get_kdata('rb1605', start_date='2015-05-15') assert not df.empty assert '20160516' in df.index