Пример #1
0
 def start_requests(self):
     self.dataType = self.settings.get("dataType")
     if self.dataType is None or self.dataType == 'day_kdata':
         today = pd.Timestamp.today()
         for date in pd.date_range(start='20200101', end=today):
             the_dir = get_exchange_cache_path(
                 security_type='future',
                 exchange='czce',
                 the_date=to_timestamp(date),
                 data_type='day_kdata') + '.xls'
             if (date.dayofweek < 5 and not os.path.exists(the_dir)):
                 yield Request(
                     url="http://www.czce.com.cn/cn/DFSStaticFiles/Future/"
                     + date.strftime("%Y/%Y%m%d") + "/FutureDataDaily.xls",
                     callback=self.download_czce_kline_data,
                     meta={'filename': the_dir})
     elif self.dataType == 'historyk':
         yield Request(
             url="http://www.czce.com.cn/cn/jysj/lshqxz/H770319index_1.htm",
             callback=self.download_czce_history_data)
     elif self.dataType == 'inventory':
         today = pd.Timestamp.today()
         for date in pd.date_range(start='20200101', end=today):
             the_dir = get_exchange_cache_path(
                 security_type='future',
                 exchange='czce',
                 the_date=to_timestamp(date),
                 data_type='inventory') + '.xls'
             if (date.dayofweek < 5 and not os.path.exists(the_dir)):
                 yield Request(
                     url="http://www.czce.com.cn/cn/DFSStaticFiles/Future/"
                     + date.strftime("%Y/%Y%m%d") +
                     "/FutureDataHolding.xls",
                     callback=self.download_czce_kline_data,
                     meta={'filename': the_dir})
Пример #2
0
 def start_requests(self):
     if self.dataType is None or self.dataType == 'dayk':
         daterange = pd.date_range(start='2006-06-30',
                                   end=pd.Timestamp.today())
         daterange = daterange[daterange.dayofweek < 5]
         for i in daterange:
             the_dir = get_exchange_cache_path(
                 security_type='future',
                 exchange='cffex',
                 data_type='day_kdata',
                 the_date=to_timestamp(i)) + ".csv"
             if not os.path.exists(the_dir):
                 yield Request(
                     url="http://www.cffex.com.cn/sj/hqsj/rtj/" +
                     i.strftime("%Y%m/%d/%Y%m%d") + "_1.csv",
                     callback=self.download_cffex_history_data_file,
                     meta={'filename': the_dir})
     elif self.dataType == 'inventory':
         daterange = pd.date_range(start='2006-06-30',
                                   end=pd.Timestamp.today())
         k = ['IF', 'IC', 'IH', 'T', 'TF']
         daterange = daterange[daterange.dayofweek < 5]
         for i in daterange:
             for j in k:
                 the_dir = get_exchange_cache_path(
                     security_type='future',
                     exchange='cffex',
                     data_type='inventory',
                     the_date=to_timestamp(i)) + j + ".csv"
                 if not os.path.exists(the_dir):
                     yield Request(
                         url="http://www.cffex.com.cn/sj/ccpm/" +
                         i.strftime("%Y%m/%d/") + j + "_1.csv",
                         callback=self.download_cffex_history_data_file,
                         meta={'filename': the_dir})
Пример #3
0
    def start_requests(self):
        self.dataType = self.settings.get("dataType")
        if self.dataType == 'inventory':
            today = pd.Timestamp.today()
            for date in pd.date_range(start=today.date() -
                                      pd.Timedelta(weeks=520),
                                      end=today):
                the_dir = get_exchange_cache_path(
                    security_type='future',
                    exchange='shfe',
                    the_date=to_timestamp(date),
                    data_type='inventory') + '.json'
                if date.dayofweek < 5 and not os.path.exists(the_dir):
                    yield Request(url=self.get_day_inventory_url(
                        the_date=date.strftime('%Y%m%d')),
                                  meta={
                                      'the_date': date,
                                      'the_path': the_dir
                                  },
                                  callback=self.download_shfe_data_by_date)

        if self.dataType == 'day_kdata':

            daterange = pd.date_range(start='2020-01-01',
                                      end=pd.Timestamp.today())
            daterange = daterange[daterange.dayofweek < 5]
            # 每天的数据
            for the_date in daterange:
                the_path = get_exchange_cache_path(
                    security_type='future',
                    exchange='shfe',
                    the_date=to_timestamp(the_date),
                    data_type='day_kdata')

                if not os.path.exists(the_path):
                    yield Request(url=self.get_day_kdata_url(
                        the_date=the_date.strftime('%Y%m%d')),
                                  meta={
                                      'the_date': the_date,
                                      'the_path': the_path
                                  },
                                  callback=self.download_shfe_data_by_date)
        else:
            # 直接抓年度统计数据
            for the_year in range(2009, datetime.today().year):
                the_dir = get_exchange_cache_dir(security_type='future',
                                                 exchange='shfe')
                the_path = os.path.join(
                    the_dir, "{}_shfe_history_data.zip".format(the_year))

                if not os.path.exists(the_path):
                    yield Request(
                        url=self.get_year_k_data_url(the_year=the_year),
                        meta={
                            'the_year': the_year,
                            'the_path': the_path
                        },
                        callback=self.download_shfe_history_data)
Пример #4
0
    def start_requests(self):
        self.trading_dates = self.settings.get("trading_dates")

        if self.trading_dates:
            # 每天的数据
            for the_date in self.trading_dates:
                the_path = get_exchange_cache_path(
                    security_type='future',
                    exchange='shfe',
                    the_date=to_timestamp(the_date),
                    data_type='day_kdata')

                yield Request(url=self.get_day_kdata_url(the_date=the_date),
                              meta={
                                  'the_date': the_date,
                                  'the_path': the_path
                              },
                              callback=self.download_shfe_data_by_date)
        else:
            # 直接抓年度统计数据
            for the_year in range(2009, datetime.today().year):
                the_dir = get_exchange_cache_dir(security_type='future',
                                                 exchange='shfe')
                the_path = os.path.join(
                    the_dir, "{}_shfe_history_data.zip".format(the_year))

                if not os.path.exists(the_path):
                    yield Request(
                        url=self.get_year_k_data_url(the_year=the_year),
                        meta={
                            'the_year': the_year,
                            'the_path': the_path
                        },
                        callback=self.download_shfe_history_data)
Пример #5
0
 def start_requests(self):
     if self.dataType is None:
         today = pd.Timestamp.today()
         for date in pd.date_range(start=today.date() -
                                   pd.Timedelta(days=today.dayofyear - 1),
                                   end=today):
             the_dir = get_exchange_cache_path(
                 security_type='future',
                 exchange='czce',
                 the_date=to_timestamp(date),
                 data_type='day_kdata') + '.xls'
             if (date.dayofweek < 5 and not os.path.exists(the_dir)):
                 yield Request(
                     url=
                     "http://www.czce.com.cn/portal/DFSStaticFiles/Future/"
                     + date.strftime("%Y/%Y%m%d") + "/FutureDataDaily.xls",
                     callback=self.download_czce_kline_data,
                     meta={'filename': the_dir})
     elif self.dataType == 'historyk':
         yield Request(
             url=
             "http://www.czce.com.cn/portal/jysj/qhjysj/lshqxz/A09112017index_1.htm",
             callback=self.download_czce_history_data)
     elif self.dataType == 'inventory':
         today = pd.Timestamp.today()
         for date in pd.date_range(start=today.date() -
                                   pd.Timedelta(weeks=450),
                                   end=today):
             the_dir = get_exchange_cache_path(
                 security_type='future',
                 exchange='czce',
                 the_date=to_timestamp(date),
                 data_type='inventory') + '.xls'
             if (date.dayofweek < 5 and not os.path.exists(the_dir)):
                 yield Request(
                     url=
                     "http://www.czce.com.cn/portal/DFSStaticFiles/Future/"
                     + date.strftime("%Y/%Y%m%d") +
                     "/FutureDataHolding.xls",
                     callback=self.download_czce_kline_data,
                     meta={'filename': the_dir})
Пример #6
0
 def request_currentyear_kdata(self):
     today = pd.Timestamp.today()
     requests=[]
     for date in pd.date_range(start='20200101',end=today):
         the_dir = get_exchange_cache_path(security_type='future', exchange='dce',the_date=to_timestamp(date),data_type="day_kdata")+'.xls'
         if(date.dayofweek<5 and not os.path.exists(the_dir)):
             requests.append( FormRequest(url="http://www.dce.com.cn/publicweb/quotesdata/exportDayQuotesChData.html",formdata={
         'year':str(date.year),
             'month':str(date.month-1),
             'day':str(date.day),
             'dayQuotes.trade_type':'0',
             'dayQuotes.variety':'all',
             'exportType':'excel'
         },callback=self.download_dce_kline_data,meta={
             'filename':the_dir
         }))
     return requests
Пример #7
0
 def request_inventory_data(self):
     today = pd.Timestamp.today()
     requests = []
     for date in pd.date_range(start='20200101',end=today):
         the_dir = get_exchange_cache_path(security_type='future', exchange='dce',the_date=to_timestamp(date),data_type="day_inventory")+'.zip'
         if(date.dayofweek<5 and not os.path.exists(the_dir)):
             requests.append(FormRequest(url="http://www.dce.com.cn/publicweb/quotesdata/exportMemberDealPosiQuotesBatchData.html",formdata={
         'batchExportFlag':'batch',
         'contract.contract_id':'all',
         'contract.variety_id':'a',
         'year':str(date.year),
             'month':str(date.month-1),
             'day':str(date.day),
             'memberDealPosiQuotes.trade_type':'0',
             'memberDealPosiQuotes.variety':'all'
         },callback=self.download_dce_kline_data,meta={
             'filename':the_dir
         }))
     return requests
def crawl_rollYield_And_Spread():
    cache_dir = get_exchange_cache_dir(security_type='future',
                                       exchange='shfe',
                                       data_type="day_kdata")
    today = pd.Timestamp.today()
    calendar = fushare.cons.get_calendar()
    filteredCalendar = list(
        filter(lambda x: datetime.strptime(x, '%Y%m%d') <= today, calendar))
    for date in filteredCalendar:
        the_dir = get_exchange_cache_path(security_type='future',
                                          exchange='shfe',
                                          the_date=to_timestamp(date),
                                          data_type='misc')
        datet = date
        if not os.path.exists(the_dir):
            # rydf = fushare.get_rollYield_bar(type="var",date=datet)
            # rydf.to_csv(the_dir+'rollYeild'+datet+'.csv')
            try:
                spdf = fushare.get_spotPrice(datet)
                spdf.to_csv(the_dir + 'spotPrice' + datet + '.csv')
            except BaseException as e:
                print("not downloaded for " + datet)