Exemplo n.º 1
0
 def start_requests(self):
     if self.dataType is None or self.dataType == 'dayk':
         daterange = pd.date_range(start='2006-06-30',
                                   end=pd.Timestamp.today())
         daterange = daterange[daterange.dayofweek < 5]
         for i in daterange:
             the_dir = get_exchange_cache_path(
                 security_type='future',
                 exchange='cffex',
                 data_type='day_kdata',
                 the_date=to_timestamp(i)) + ".csv"
             if not os.path.exists(the_dir):
                 yield Request(
                     url="http://www.cffex.com.cn/sj/hqsj/rtj/" +
                     i.strftime("%Y%m/%d/%Y%m%d") + "_1.csv",
                     callback=self.download_cffex_history_data_file,
                     meta={'filename': the_dir})
     elif self.dataType == 'inventory':
         daterange = pd.date_range(start='2006-06-30',
                                   end=pd.Timestamp.today())
         k = ['IF', 'IC', 'IH', 'T', 'TF']
         daterange = daterange[daterange.dayofweek < 5]
         for i in daterange:
             for j in k:
                 the_dir = get_exchange_cache_path(
                     security_type='future',
                     exchange='cffex',
                     data_type='inventory',
                     the_date=to_timestamp(i)) + j + ".csv"
                 if not os.path.exists(the_dir):
                     yield Request(
                         url="http://www.cffex.com.cn/sj/ccpm/" +
                         i.strftime("%Y%m/%d/") + j + "_1.csv",
                         callback=self.download_cffex_history_data_file,
                         meta={'filename': the_dir})
Exemplo n.º 2
0
 def start_requests(self):
     self.dataType = self.settings.get("dataType")
     if self.dataType is None or self.dataType == 'day_kdata':
         today = pd.Timestamp.today()
         for date in pd.date_range(start='20200101', end=today):
             the_dir = get_exchange_cache_path(
                 security_type='future',
                 exchange='czce',
                 the_date=to_timestamp(date),
                 data_type='day_kdata') + '.xls'
             if (date.dayofweek < 5 and not os.path.exists(the_dir)):
                 yield Request(
                     url="http://www.czce.com.cn/cn/DFSStaticFiles/Future/"
                     + date.strftime("%Y/%Y%m%d") + "/FutureDataDaily.xls",
                     callback=self.download_czce_kline_data,
                     meta={'filename': the_dir})
     elif self.dataType == 'historyk':
         yield Request(
             url="http://www.czce.com.cn/cn/jysj/lshqxz/H770319index_1.htm",
             callback=self.download_czce_history_data)
     elif self.dataType == 'inventory':
         today = pd.Timestamp.today()
         for date in pd.date_range(start='20200101', end=today):
             the_dir = get_exchange_cache_path(
                 security_type='future',
                 exchange='czce',
                 the_date=to_timestamp(date),
                 data_type='inventory') + '.xls'
             if (date.dayofweek < 5 and not os.path.exists(the_dir)):
                 yield Request(
                     url="http://www.czce.com.cn/cn/DFSStaticFiles/Future/"
                     + date.strftime("%Y/%Y%m%d") +
                     "/FutureDataHolding.xls",
                     callback=self.download_czce_kline_data,
                     meta={'filename': the_dir})
Exemplo n.º 3
0
    def start_requests(self):
        self.dataType = self.settings.get("dataType")
        if self.dataType == 'inventory':
            today = pd.Timestamp.today()
            for date in pd.date_range(start=today.date() -
                                      pd.Timedelta(weeks=520),
                                      end=today):
                the_dir = get_exchange_cache_path(
                    security_type='future',
                    exchange='shfe',
                    the_date=to_timestamp(date),
                    data_type='inventory') + '.json'
                if date.dayofweek < 5 and not os.path.exists(the_dir):
                    yield Request(url=self.get_day_inventory_url(
                        the_date=date.strftime('%Y%m%d')),
                                  meta={
                                      'the_date': date,
                                      'the_path': the_dir
                                  },
                                  callback=self.download_shfe_data_by_date)

        if self.dataType == 'day_kdata':

            daterange = pd.date_range(start='2020-01-01',
                                      end=pd.Timestamp.today())
            daterange = daterange[daterange.dayofweek < 5]
            # 每天的数据
            for the_date in daterange:
                the_path = get_exchange_cache_path(
                    security_type='future',
                    exchange='shfe',
                    the_date=to_timestamp(the_date),
                    data_type='day_kdata')

                if not os.path.exists(the_path):
                    yield Request(url=self.get_day_kdata_url(
                        the_date=the_date.strftime('%Y%m%d')),
                                  meta={
                                      'the_date': the_date,
                                      'the_path': the_path
                                  },
                                  callback=self.download_shfe_data_by_date)
        else:
            # 直接抓年度统计数据
            for the_year in range(2009, datetime.today().year):
                the_dir = get_exchange_cache_dir(security_type='future',
                                                 exchange='shfe')
                the_path = os.path.join(
                    the_dir, "{}_shfe_history_data.zip".format(the_year))

                if not os.path.exists(the_path):
                    yield Request(
                        url=self.get_year_k_data_url(the_year=the_year),
                        meta={
                            'the_year': the_year,
                            'the_path': the_path
                        },
                        callback=self.download_shfe_history_data)
Exemplo n.º 4
0
    def consume_topic_with_func(self, topic, func):
        consumer = KafkaConsumer(
            topic,
            client_id='fooltrader',
            group_id=self.bot_name,
            value_deserializer=lambda m: json.loads(m.decode('utf8')),
            bootstrap_servers=[KAFKA_HOST])
        topic_partition = TopicPartition(topic=topic, partition=0)

        if self.start_timestamp:
            start_timestamp = int(self.start_timestamp.timestamp() * 1000)

            end_offset = consumer.end_offsets([topic_partition
                                               ])[topic_partition]
            if end_offset == 0:
                self.logger.warning("topic:{} end offset:{}".format(
                    topic, end_offset))
                self.logger.error(
                    "the topic:{} has no data,but you want to backtest".format(
                        self.quote_topic))
                return

            # find the offset from start_timestamp
            offset_and_timestamp = consumer.offsets_for_times(
                {topic_partition: start_timestamp})

            if offset_and_timestamp:
                offset_and_timestamp = offset_and_timestamp[topic_partition]

                if offset_and_timestamp:
                    # partition  assigned after poll, and we could seek
                    consumer.poll(5, 1)
                    # move to the offset
                    consumer.seek(topic_partition, offset_and_timestamp.offset)

                    for message in consumer:
                        if 'timestamp' in message.value:
                            message_time = to_timestamp(
                                message.value['timestamp'])
                        else:
                            message_time = to_timestamp(message.timestamp)

                        if self.end_timestamp and (message_time >
                                                   self.end_timestamp):
                            consumer.close()
                            break

                        getattr(self, func)(message.value)

                else:
                    latest_timestamp, _ = get_latest_timestamp_order_from_topic(
                        self.quote_topic)
                    self.logger.warning(
                        "start:{} is after the last record:{}".format(
                            self.start_timestamp, latest_timestamp))
Exemplo n.º 5
0
    def start_requests(self):
        self.trading_dates = self.settings.get("trading_dates")

        if self.trading_dates:
            # 每天的数据
            for the_date in self.trading_dates:
                the_path = get_exchange_cache_path(
                    security_type='future',
                    exchange='shfe',
                    the_date=to_timestamp(the_date),
                    data_type='day_kdata')

                yield Request(url=self.get_day_kdata_url(the_date=the_date),
                              meta={
                                  'the_date': the_date,
                                  'the_path': the_path
                              },
                              callback=self.download_shfe_data_by_date)
        else:
            # 直接抓年度统计数据
            for the_year in range(2009, datetime.today().year):
                the_dir = get_exchange_cache_dir(security_type='future',
                                                 exchange='shfe')
                the_path = os.path.join(
                    the_dir, "{}_shfe_history_data.zip".format(the_year))

                if not os.path.exists(the_path):
                    yield Request(
                        url=self.get_year_k_data_url(the_year=the_year),
                        meta={
                            'the_year': the_year,
                            'the_path': the_path
                        },
                        callback=self.download_shfe_history_data)
Exemplo n.º 6
0
    def on_event(self, event_item):
        self.logger.debug(event_item)
        if not self.last_date or not is_same_date(self.last_date,
                                                  self.current_time):
            self.last_date = to_timestamp(
                event_item['timestamp']) - timedelta(days=1)
            self.last_kdata = get_kdata(self.security_item,
                                        the_date=to_time_str(self.last_date))

            if self.last_kdata is None:
                fetch_kdata(exchange_str=self.security_item['exchange'])
                self.last_kdata = get_kdata(self.security_item,
                                            the_date=to_time_str(
                                                self.last_date))

            if self.last_kdata is not None:
                self.last_close = self.last_kdata.loc[
                    to_time_str(self.last_date), 'close']
            else:
                self.logger.exception("could not get last close for:{}".format(
                    self.last_date))

            self.update_today_triggered()

        change_pct = (event_item['price'] - self.last_close) / self.last_close

        self.logger.info(
            "{} last day close is:{},now price is:{},the change_pct is:{}".
            format(self.security_item['id'], self.last_close,
                   event_item['price'], change_pct))
        self.check_subscription(current_price=event_item['price'],
                                change_pct=change_pct)
Exemplo n.º 7
0
    def init_new_computing_interval(self, event_timestamp):
        self.last_timestamp = to_timestamp(event_timestamp)
        self.kdata_timestamp = self.last_timestamp + timedelta(seconds=-self.last_timestamp.second,
                                                               microseconds=-self.last_timestamp.microsecond)

        self.last_day_time_str = to_time_str(self.kdata_timestamp)
        self.last_mirco_time_str = to_time_str(self.kdata_timestamp, time_fmt=TIME_FORMAT_MICRO)
Exemplo n.º 8
0
    def on_init(self):
        super().on_init()
        self.security_id = 'cryptocurrency_contract_RAM-EOS'

        query = {
            "term": {"securityId": ""}
        }
        query["term"]["securityId"] = self.security_id

        # get latest kdata timestamp
        latest_kdata_timestamp = es_get_latest_timestamp(index=kdata_index_name, query=query)

        # get latest eos statistic timestamp
        latest_statistic_record = es_get_latest_record(index=statistic_index_name,
                                                       query=query, time_field='updateTimestamp')
        if latest_statistic_record:
            self.latest_statistic_record = CommonStatistic(
                meta={'id': latest_statistic_record['id'], 'index': statistic_index_name},
                **latest_statistic_record)
            if not is_same_time(latest_kdata_timestamp, self.latest_statistic_record['updateTimestamp']):
                self.logger.warning(
                    "latest_kdata_timestamp:{},latest_statistic_timestamp:{}".format(latest_kdata_timestamp,
                                                                                     self.latest_statistic_record[
                                                                                         'updateTimestamp']))
        else:
            self.latest_statistic_record = None

        if latest_kdata_timestamp and self.latest_statistic_record:
            self.start_timestamp = min(latest_kdata_timestamp,
                                       to_timestamp(self.latest_statistic_record['updateTimestamp']))
    def on_event(self, event_item):
        if not self.computing_start:
            self.computing_start = datetime.now()
        if not self.last_timestamp:
            self.init_new_computing_interval(event_item['timestamp'])

        current_timestamp = to_timestamp(event_item['timestamp'])

        # calculating last minute
        if current_timestamp.minute != self.last_timestamp.minute:
            self.df = pd.DataFrame(self.item_list)

            self.generate_user_statistic()

            if self.es_actions:
                resp = elasticsearch.helpers.bulk(es_client, self.es_actions)
                self.logger.info("index success:{} failed:{}".format(resp[0], len(resp[1])))
                if resp[1]:
                    self.logger.error("error:{}".format(resp[1]))

            self.init_new_computing_interval(event_item['timestamp'])
            self.es_actions = []
            self.item_list = []

            self.logger.info("using computing time:{}".format(datetime.now() - self.computing_start))
            self.computing_start = datetime.now()

        self.item_list.append(event_item)
    def on_init(self):
        super().on_init()
        self.security_id = 'cryptocurrency_contract_RAM-EOS'

        query = {
            "term": {"securityId": ""}
        }
        query["term"]["securityId"] = self.security_id

        # get latest user statistic timestamp
        latest_eos_user_statistic_record = es_get_latest_record(index=user_statistic_index_name,
                                                                query=query, time_field='updateTimestamp')

        if latest_eos_user_statistic_record:
            self.latest_eos_user_statistic_record = EosUserStatistic(
                meta={'id': latest_eos_user_statistic_record['id'], 'index': user_statistic_index_name},
                **latest_eos_user_statistic_record)

        else:
            self.latest_eos_user_statistic_record = None

        if self.latest_eos_user_statistic_record:
            self.start_timestamp = to_timestamp(self.latest_eos_user_statistic_record['updateTimestamp'])

        self.user_map_latest_user_statistic = {}
        self.user_map_latest_user_daily_statistic = {}
        self.es_actions = []
    def update_user_statistic(self, user_id, record, update_timestamp):
        latest_user_statistic = self.user_map_latest_user_statistic.get(user_id)
        if not latest_user_statistic:
            doc_id = '{}_{}'.format(user_id, self.security_id)

            the_record = es_get_user_statistic(user_id=user_id)
            if the_record:
                latest_user_statistic = EosUserStatistic(meta={'id': doc_id, 'index': user_statistic_index_name},
                                                         **the_record)
                self.user_map_latest_user_statistic[user_id] = latest_user_statistic
        # ignore the user statistic has computed before
        if latest_user_statistic and self.kdata_timestamp <= to_timestamp(
                latest_user_statistic['updateTimestamp']):
            return

        if not latest_user_statistic:
            latest_user_statistic = EosUserStatistic(meta={'id': doc_id, 'index': user_statistic_index_name},
                                                     id=doc_id,
                                                     userId=user_id,
                                                     timestamp=self.last_day_time_str,
                                                     securityId=self.security_id,
                                                     code=self.security_item['code'],
                                                     name=self.security_item['name'])
            self.user_map_latest_user_statistic[user_id] = latest_user_statistic

        # update user  statistic
        self.update_statistic_doc(latest_user_statistic, record, update_timestamp)
Exemplo n.º 12
0
 def start_requests(self):
     startDate = to_timestamp('2015-05-22')
     today = pd.Timestamp.today()
     for date in pd.date_range(start=startDate, end=today, freq='W'):
         yield Request(
             url=
             "http://www.chinaclear.cn/cms-search/view.action?action=china&dateStr="
             + date.strftime('%Y.%m.%d'),
             meta={'the_date': date.strftime('%Y%m%d')},
             callback=self.download_chinaclear_data_by_date)
Exemplo n.º 13
0
 def start_requests(self):
     if self.dataType is None:
         today = pd.Timestamp.today()
         for date in pd.date_range(start=today.date() -
                                   pd.Timedelta(days=today.dayofyear - 1),
                                   end=today):
             the_dir = get_exchange_cache_path(
                 security_type='future',
                 exchange='czce',
                 the_date=to_timestamp(date),
                 data_type='day_kdata') + '.xls'
             if (date.dayofweek < 5 and not os.path.exists(the_dir)):
                 yield Request(
                     url=
                     "http://www.czce.com.cn/portal/DFSStaticFiles/Future/"
                     + date.strftime("%Y/%Y%m%d") + "/FutureDataDaily.xls",
                     callback=self.download_czce_kline_data,
                     meta={'filename': the_dir})
     elif self.dataType == 'historyk':
         yield Request(
             url=
             "http://www.czce.com.cn/portal/jysj/qhjysj/lshqxz/A09112017index_1.htm",
             callback=self.download_czce_history_data)
     elif self.dataType == 'inventory':
         today = pd.Timestamp.today()
         for date in pd.date_range(start=today.date() -
                                   pd.Timedelta(weeks=450),
                                   end=today):
             the_dir = get_exchange_cache_path(
                 security_type='future',
                 exchange='czce',
                 the_date=to_timestamp(date),
                 data_type='inventory') + '.xls'
             if (date.dayofweek < 5 and not os.path.exists(the_dir)):
                 yield Request(
                     url=
                     "http://www.czce.com.cn/portal/DFSStaticFiles/Future/"
                     + date.strftime("%Y/%Y%m%d") +
                     "/FutureDataHolding.xls",
                     callback=self.download_czce_kline_data,
                     meta={'filename': the_dir})
Exemplo n.º 14
0
    def after_init(self):
        super().after_init()
        if not self.start_timestamp:
            self.start_timestamp = to_timestamp(self.security_item['listDate'])
        # the last timestamp for the computing interval
        self.last_timestamp = None
        self.last_day_time_str = None
        self.last_mirco_time_str = None

        self.df = pd.DataFrame()
        self.item_list = []

        self.computing_start = None
Exemplo n.º 15
0
    def generate_eos_daily_statistic(self):
        # ignore the statistic has computed before
        if self.latest_statistic_record and self.kdata_timestamp <= to_timestamp(
                self.latest_statistic_record['updateTimestamp']):
            return

        # update the statistic
        if (not self.latest_statistic_record) or (not is_same_date(self.latest_statistic_record['timestamp'],
                                                                   self.df['timestamp'][0])):
            doc_id = "{}_{}".format(self.security_id, self.last_day_time_str)
            self.latest_statistic_record = CommonStatistic(meta={'id': doc_id, 'index': statistic_index_name},
                                                           id=doc_id,
                                                           timestamp=self.last_day_time_str,
                                                           securityId=self.security_id,
                                                           code=self.security_item['code'],
                                                           name=self.security_item['name'])

        volume = self.df['volume'].sum()
        turnover = self.df['turnover'].sum()
        flow = (self.df['turnover'] * self.df['direction']).sum()

        flowIn = self.df[self.df['direction'] == 1]['turnover'].sum()
        flowOut = self.df[self.df['direction'] == -1]['turnover'].sum()

        bigFlowIn = self.df[(self.df['direction'] == 1) & (self.df['turnover'] >= self.BIG_ORDER)]['turnover'].sum()
        middleFlowIn = self.df[(self.df['direction'] == 1) & (self.df['turnover'] >= self.MIDDLE_ORDER) & (
                self.df['turnover'] < self.BIG_ORDER)]['turnover'].sum()
        smallFlowIn = self.df[(self.df['direction'] == 1) & (self.df['turnover'] < self.MIDDLE_ORDER)]['turnover'].sum()

        bigFlowOut = self.df[(self.df['direction'] == -1) & (self.df['turnover'] >= self.BIG_ORDER)]['turnover'].sum()
        middleFlowOut = self.df[(self.df['direction'] == -1) & (self.df['turnover'] >= self.MIDDLE_ORDER) & (
                self.df['turnover'] < self.BIG_ORDER)]['turnover'].sum()
        smallFlowOut = self.df[(self.df['direction'] == -1) & (self.df['turnover'] < self.MIDDLE_ORDER)][
            'turnover'].sum()

        self.update_statistic_doc(self.latest_statistic_record, {'volume': volume,
                                                                 'turnover': turnover,
                                                                 'flow': flow,
                                                                 'flowIn': flowIn,
                                                                 'flowOut': flowOut,
                                                                 'bigFlowIn': bigFlowIn,
                                                                 'middleFlowIn': middleFlowIn,
                                                                 'smallFlowIn': smallFlowIn,
                                                                 'bigFlowOut': bigFlowOut,
                                                                 'middleFlowOut': middleFlowOut,
                                                                 'smallFlowOut': smallFlowOut
                                                                 }, updateTimestamp=self.last_mirco_time_str)
Exemplo n.º 16
0
def crawl_rollYield_And_Spread():
    cache_dir = get_exchange_cache_dir(security_type='future',
                                       exchange='shfe',
                                       data_type="day_kdata")
    today = pd.Timestamp.today()
    calendar = fushare.cons.get_calendar()
    filteredCalendar = list(
        filter(lambda x: datetime.strptime(x, '%Y%m%d') <= today, calendar))
    for date in filteredCalendar:
        the_dir = get_exchange_cache_path(security_type='future',
                                          exchange='shfe',
                                          the_date=to_timestamp(date),
                                          data_type='misc')
        datet = date
        if not os.path.exists(the_dir):
            # rydf = fushare.get_rollYield_bar(type="var",date=datet)
            # rydf.to_csv(the_dir+'rollYeild'+datet+'.csv')
            try:
                spdf = fushare.get_spotPrice(datet)
                spdf.to_csv(the_dir + 'spotPrice' + datet + '.csv')
            except BaseException as e:
                print("not downloaded for " + datet)
Exemplo n.º 17
0
    def on_event(self, event_item):
        if not self.computing_start:
            self.computing_start = datetime.now()
        if not self.last_timestamp:
            self.init_new_computing_interval(event_item['timestamp'])

        current_timestamp = to_timestamp(event_item['timestamp'])

        # calculating last minute
        if current_timestamp.minute != self.last_timestamp.minute:
            self.df = pd.DataFrame(self.item_list)

            self.generate_1min_kdata()
            self.generate_eos_daily_statistic()

            self.init_new_computing_interval(event_item['timestamp'])
            self.item_list = []

            self.logger.info("using computing time:{}".format(datetime.now() - self.computing_start))
            self.computing_start = datetime.now()

        self.item_list.append(event_item)
Exemplo n.º 18
0
def get_latest_timestamp_order_from_topic(topic):
    consumer = KafkaConsumer(
        topic,
        # client_id='fooltrader',
        # group_id='fooltrader',
        value_deserializer=lambda m: json.loads(m.decode('utf8')),
        bootstrap_servers=[KAFKA_HOST])
    topic_partition = TopicPartition(topic=topic, partition=0)
    end_offset = consumer.end_offsets([topic_partition])[topic_partition]
    if end_offset > 0:
        # partition  assigned after poll, and we could seek
        consumer.poll(5, 1)

        consumer.seek(topic_partition, end_offset - 1)
        message = consumer.poll(10000, 500)
        msgs = message[topic_partition]
        if len(msgs) > 0:
            record = msgs[-1]
            timestamp = to_timestamp(record.value['timestamp'])
            order = None
            if 'order' in record.value:
                order = record.value['order']
            return timestamp, order
    return None, None
Exemplo n.º 19
0
def eos_ram_to_kafka():
    ram_trade = db.ram_trade

    logger.info("collection:{}".format(ram_trade))

    earliest_record = ram_trade.find_one({
        "$query": {},
        "$orderby": {
            "global_seq": 1
        }
    })
    latest_record = ram_trade.find_one({
        "$query": {},
        "$orderby": {
            "global_seq": -1
        }
    })

    logger.info("earliest_record:{},latest_record:{}".format(
        earliest_record, latest_record))

    security_id = 'cryptocurrency_contract_RAM-EOS'

    latest_timestamp, latest_order = get_latest_timestamp_order(security_id)

    topic = get_kafka_tick_topic(security_id)

    if not latest_timestamp:
        latest_timestamp = earliest_record['block_time']

    start_date, end_date = evaluate_time_range(latest_timestamp)

    while True:
        if latest_order and start_date and end_date:
            condition = {
                "block_time": {
                    "$gte": start_date,
                    "$lt": end_date
                },
                "global_seq": {
                    "$gt": latest_order
                }
            }
        elif start_date and end_date:
            condition = {"block_time": {"$gte": start_date, "$lt": end_date}}
        elif latest_order:
            condition = {"global_seq": {"$gt": latest_order}}

        logger.info("start_date:{},end_date:{},order:{}".format(
            start_date, end_date, latest_order))

        latest_timestamp = end_date

        for item in ram_trade.find(condition):
            tick = to_tick(item)

            record_meta = producer.send(
                topic,
                bytes(json.dumps(tick, ensure_ascii=False), encoding='utf8'),
                key=bytes(security_id, encoding='utf8'),
                timestamp_ms=int(item['block_time'].timestamp() * 1000))
            record = record_meta.get(10)

            latest_timestamp = to_timestamp(record.timestamp)

            latest_order = tick['order']

            logger.debug("tick_to_kafka {}".format(tick))

        if datetime.now() - latest_timestamp < timedelta(minutes=5):
            time.sleep(2)
            logger.info("record latest_timestamp:{},now is:{}".format(
                latest_timestamp, datetime.now()))
            start_date = None
            end_date = None
        else:
            start_date, end_date = evaluate_time_range(latest_timestamp)
Exemplo n.º 20
0
 def request_inventory_data(self):
     today = pd.Timestamp.today()
     requests = []
     for date in pd.date_range(start='20200101',end=today):
         the_dir = get_exchange_cache_path(security_type='future', exchange='dce',the_date=to_timestamp(date),data_type="day_inventory")+'.zip'
         if(date.dayofweek<5 and not os.path.exists(the_dir)):
             requests.append(FormRequest(url="http://www.dce.com.cn/publicweb/quotesdata/exportMemberDealPosiQuotesBatchData.html",formdata={
         'batchExportFlag':'batch',
         'contract.contract_id':'all',
         'contract.variety_id':'a',
         'year':str(date.year),
             'month':str(date.month-1),
             'day':str(date.day),
             'memberDealPosiQuotes.trade_type':'0',
             'memberDealPosiQuotes.variety':'all'
         },callback=self.download_dce_kline_data,meta={
             'filename':the_dir
         }))
     return requests
Exemplo n.º 21
0
    def consume_topic_with_func(self, topic, func):
        if not topic:
            while True:
                self.on_timer({"timestamp": self.current_time})

                if is_same_date(self.current_time, pd.Timestamp.now()):
                    time.sleep(self.time_step.total_seconds())

                self.current_time += self.time_step

        consumer = KafkaConsumer(
            topic,
            # client_id='fooltrader',
            # group_id=self.bot_name,
            value_deserializer=lambda m: json.loads(m.decode('utf8')),
            bootstrap_servers=[KAFKA_HOST])
        topic_partition = TopicPartition(topic=topic, partition=0)
        start_timestamp = int(self.start_date.timestamp())

        end_offset = consumer.end_offsets([topic_partition])[topic_partition]
        if end_offset == 0:
            self.logger.warning("topic:{} end offset:{}".format(
                topic, end_offset))
            # 等有数据才能做进一步的判断
            for message in consumer:
                self.logger.info("first message:{} to topic:{}".format(
                    message, topic))
                break
            consumer.poll(5, 1)
            consumer.seek(topic_partition, 0)

        # 找到以start_timestamp为起点的offset
        partition_map_offset_and_timestamp = consumer.offsets_for_times(
            {topic_partition: start_timestamp})

        if partition_map_offset_and_timestamp:
            offset_and_timestamp = partition_map_offset_and_timestamp[
                topic_partition]

            if offset_and_timestamp:
                # partition  assigned after poll, and we could seek
                consumer.poll(5, 1)
                # move to the offset
                consumer.seek(topic_partition, offset_and_timestamp.offset)
                # 目前的最大offset
                end_offset = consumer.end_offsets([topic_partition
                                                   ])[topic_partition]
                for message in consumer:
                    if 'timestamp' in message.value:
                        message_time = to_timestamp(message.value['timestamp'])
                    else:
                        message_time = to_timestamp(message.timestamp)

                    # 设定了结束日期的话,时间到了或者kafka没数据了就结束
                    if self.end_date and (message_time > self.end_date
                                          or message.offset + 1 == end_offset):
                        consumer.close()
                        break

                    self.current_time = message_time

                    # 收市后计算
                    if False:
                        self.account_service.calculate_closing_account(
                            self.current_time)

                    # self.on_event(message.value)
                    getattr(self, func)(message.value)

            else:
                consumer.poll(5, 1)
                consumer.seek(
                    topic_partition,
                    consumer.end_offsets([topic_partition])[topic_partition] -
                    1)
                message = consumer.poll(5000, 1)
                kafka_end_date = datetime.fromtimestamp(
                    message[topic_partition][0].timestamp).strftime(
                        TIME_FORMAT_DAY)
                self.logger.warning(
                    "start:{} is after the last record:{}".format(
                        self.start_date, kafka_end_date))
Exemplo n.º 22
0
 def request_currentyear_kdata(self):
     today = pd.Timestamp.today()
     requests=[]
     for date in pd.date_range(start='20200101',end=today):
         the_dir = get_exchange_cache_path(security_type='future', exchange='dce',the_date=to_timestamp(date),data_type="day_kdata")+'.xls'
         if(date.dayofweek<5 and not os.path.exists(the_dir)):
             requests.append( FormRequest(url="http://www.dce.com.cn/publicweb/quotesdata/exportDayQuotesChData.html",formdata={
         'year':str(date.year),
             'month':str(date.month-1),
             'day':str(date.day),
             'dayQuotes.trade_type':'0',
             'dayQuotes.variety':'all',
             'exportType':'excel'
         },callback=self.download_dce_kline_data,meta={
             'filename':the_dir
         }))
     return requests
Exemplo n.º 23
0
def es_get_latest_timestamp(index, time_field='timestamp', query=None):
    latest_record = es_get_latest_record(index, time_field, query)
    if latest_record:
        return to_timestamp(latest_record['timestamp'])