示例#1
0
def test_get_china_stock_list():
    print(settings.FOOLTRADER_STORE_PATH)
    df = quote.get_security_list('stock', exchanges=['sh', 'sz'])
    assert '000001' in df.index
    assert '金融行业' == df.loc['000001', 'sinaIndustry']

    df = quote.get_security_list('stock', exchanges=['sh'])
    assert '600000' in df.index
    assert '金融行业' == df.loc['600000', 'sinaIndustry']

    df = quote.get_security_list('stock',
                                 exchanges=['sh', 'sz'],
                                 start='000338',
                                 end='600388')
    assert '000338' in df.index
    assert '600388' in df.index
    assert '600389' not in df.index

    df = quote.get_security_list('stock',
                                 exchanges=['sh', 'sz'],
                                 codes=['300027', '000002'])
    assert len(df.index) == 2

    df = quote.get_security_list('stock', exchanges=['sh', 'sz'], mode='es')
    assert type(df.loc['600004', 'sinaArea']) == list
    assert '广州' in (df.loc['600004', 'sinaArea'])
    assert '广东' in (df.loc['600004', 'sinaArea'])
示例#2
0
def init_env():
    if not os.path.exists(FOOLTRADER_STORE_PATH):
        print("{} is a wrong path")
        print("please set env FOOLTRADER_STORE_PATH to working path or set it in settings.py")
    else:
        # 初始化股票文件夹
        for _, item in get_security_list(exchanges=EXCHANGE_LIST_COL).iterrows():
            mkdir_for_stock(item)

        # 初始化指数文件夹
        for _, item in get_security_list(security_type='index', exchanges=['sh', 'sz', 'nasdaq']).iterrows():
            kdata_dir = get_kdata_dir(item)
            if not os.path.exists(kdata_dir):
                os.makedirs(kdata_dir)
        # 初始化期货文件夹
        for exchange in ['shfe', 'dce', 'zce']:
            exchange_cache_dir = get_exchange_cache_dir(security_type='future', exchange=exchange)
            if not os.path.exists(exchange_cache_dir):
                os.makedirs(exchange_cache_dir)

            exchange_cache_dir = get_exchange_cache_dir(security_type='future', exchange='shfe',
                                                        the_year=datetime.today().year,
                                                        data_type="day_kdata")
            if not os.path.exists(exchange_cache_dir):
                os.makedirs(exchange_cache_dir)

            exchange_dir = get_exchange_dir(security_type='future', exchange=exchange)
            if not os.path.exists(exchange_dir):
                os.makedirs(exchange_dir)
示例#3
0
def init_env():
    if not os.path.exists(settings.FILES_STORE):
        os.makedirs(settings.FILES_STORE)
    # 初始化股票文件夹
    for _, item in get_security_list(exchanges=EXCHANGE_LIST_COL).iterrows():
        mkdir_for_security(item)

    # 初始化指数文件夹
    for _, item in get_security_list(security_type='index', exchanges=['sh', 'sz', 'nasdaq']).iterrows():
        kdata_dir = get_kdata_dir(item)
        if not os.path.exists(kdata_dir):
            os.makedirs(kdata_dir)
示例#4
0
def init_env():
    if not os.path.exists(settings.FILES_STORE):
        os.makedirs(settings.FILES_STORE)
    # 初始化股票文件夹
    for _, item in get_security_list().iterrows():
        mkdir_for_security(item)

    # 初始化指数文件夹
    for _, item in get_security_list(security_type='index').iterrows():
        kdata_dir = get_kdata_dir(item)
        if not os.path.exists(kdata_dir):
            os.makedirs(kdata_dir)
示例#5
0
def init_env():
    if not os.path.exists(settings.FOOLTRADER_STORE_PATH):
        os.makedirs(settings.FOOLTRADER_STORE_PATH)
    # 初始化股票文件夹
    for _, item in get_security_list(exchanges=EXCHANGE_LIST_COL).iterrows():
        mkdir_for_security(item)

    # 初始化指数文件夹
    for _, item in get_security_list(security_type='index',
                                     exchanges=['sh', 'sz',
                                                'nasdaq']).iterrows():
        kdata_dir = get_kdata_dir(item)
        if not os.path.exists(kdata_dir):
            os.makedirs(kdata_dir)
示例#6
0
def newhighergenerator(start_date,fuquan='qfq',source='163',period=20):

    baseindex = 'index_sh_000001'
    df = quote.get_kdata(baseindex,start_date=start_date,source=source)
    #df = quote.get_kdata('index_sh_000001',start_date='2017-05-10',source='163')
    dh = pd.DataFrame(0,index=df.index,columns=['total'])      #暂时只添加total,后续需要添加各个市场total
    #stocklist = quote.get_security_list(security_type='stock', mode='simple')
    stocklist = quote.get_security_list(security_type='stock',start='600000',end='600030', mode='simple')
    for _, item in stocklist.iterrows():
        print("caculating {}".format(item.id))
        for ts in dh.index:
            if((ts-datetime.datetime.strptime(item.listDate,'%Y-%m-%d')).days<period):
                dh.at[ts,item.id] = 0
            else:
                ds = quote.get_kdata(item.id,fuquan=fuquan)
                indexlist = list(ds['timestamp'])
                tsstr = ts.strftime('%Y-%m-%d')
                if(tsstr in indexlist):
                    pos = list(ds['timestamp']).index(ts.strftime('%Y-%m-%d'))
                    if (ds['close'][pos] >= max(ds['close'][pos -period+1 :pos + 1])):
                        dh.at[ts, item.id] = 1
                    else:
                        dh.at[ts, item.id] = 0
                else:
                    dh.at[ts,item.id] = 0

    df['total'] = dh.apply(lambda x:x.sum(),axis=1)
    df['index_c'] = df['close']
    dh.to_csv('newhigher.csv')
    return True
示例#7
0
def cash_flow_statement_to_es(force=False):
    es_index_mapping('cash_flow_statement', CashFlowStatement)

    for _, security_item in get_security_list().iterrows():
        try:
            start_date = None
            if not force:
                query = {"term": {"securityId": ""}}
                query["term"]["securityId"] = security_item["id"]
                latest_record = es_get_latest_record(
                    index='cash_flow_statement',
                    time_field='reportDate',
                    query=query)
                logger.info("latest_record:{}".format(latest_record))
                if latest_record:
                    start_date = latest_record['reportDate']

            for json_object in get_cash_flow_statement_items(
                    security_item, start_date=start_date):
                if start_date and is_same_date(start_date,
                                               json_object['reportDate']):
                    continue

                cash_flow_statement = CashFlowStatement(
                    meta={'id': json_object['id']})
                fill_doc_type(cash_flow_statement, json_object)
                cash_flow_statement.save()
        except Exception as e:
            logger.warn("wrong CashFlowStatement:{},error:{}", security_item,
                        e)
示例#8
0
def security_meta_to_es(security_type='cryptocurrency', force=False):
    if security_type == 'cryptocurrency':
        doc_type = CryptoCurrencyMeta

    es_index_mapping('crypto_currency_meta', doc_type)
    start_date = None
    if not force:
        latest_record = es_get_latest_record('crypto_currency_meta',
                                             time_field='listDate')
        logger.info("latest_record:{}".format(latest_record))
        if latest_record and 'listDate' in latest_record:
            start_date = latest_record['listDate']

    actions = []
    for _, item in get_security_list(security_type=security_type).iterrows():
        if start_date and item['listDate'] and is_same_date(
                start_date, item['listDate']):
            continue
        try:
            security_meta = doc_type(meta={'id': item['id']})
            fill_doc_type(security_meta, json.loads(item.to_json()))
            actions.append(security_meta.to_dict(include_meta=True))
        except Exception as e:
            logger.warn("wrong SecurityItem:{},error:{}", item, e)
    if actions:
        resp = elasticsearch.helpers.bulk(es_client, actions)
        logger.info(resp)
示例#9
0
def stock_kdata_to_es(start='000001', end='666666', force=False):
    for _, security_item in get_security_list(start=start, end=end).iterrows():
        # 创建索引
        index_name = get_es_kdata_index(security_item['id'])
        es_index_mapping(index_name, StockKData)

        start_date = None
        if not force:
            latest_record = es_get_latest_record(index_name)
            logger.info("latest_record:{}".format(latest_record))
            if latest_record:
                start_date = latest_record['timestamp']

        for _, kdata_item in get_kdata(security_item,
                                       start_date=start_date).iterrows():
            if start_date and is_same_date(start_date,
                                           kdata_item['timestamp']):
                continue

            try:
                id = '{}_{}'.format(kdata_item['securityId'],
                                    kdata_item['timestamp'])
                kdata = StockKData(meta={'id': id}, id=id)
                fill_doc_type(kdata, json.loads(kdata_item.to_json()))
                kdata.save(index=index_name)
            except Exception as e:
                logger.warn("wrong KdataDay:{},error:{}", kdata_item, e)
示例#10
0
def crawl_index_quote():
    for _, security_item in get_security_list(security_type='index').iterrows():
        # 抓取日K线
        logger.info("{} get index kdata start".format(security_item['code']))

        start_date = get_latest_download_trading_date(security_item, source='163')
        end_date = pd.Timestamp.today()
        if start_date > end_date:
            logger.info("{} kdata is ok".format(security_item['code']))
        else:
            process_crawl(StockKdataSpider163, {"security_item": security_item,
                                                "start_date": start_date,
                                                "end_date": end_date})

        logger.info("{} get index kdata from 163 end".format(security_item['code']))

        # 获取市场概况数据[上海,深圳,中小板,创业板]
        if security_item['id'] in ['index_sh_000001', 'index_sz_399106', 'index_sz_399005', 'index_sz_399006']:
            # if security_item['id'] in ['index_sz_399106', 'index_sz_399005', 'index_sz_399006']:
            df = get_kdata(security_item=security_item)
            df = df[df['turnoverRate'].isna() | df['tCap'].isna() | df['mCap'].isna() | df[
                'pe'].isna()]
            if not df.empty:
                dates = df.index.strftime('%Y-%m-%d').tolist()
                # if security_item['id'] == 'index_sz_399106':
                # dates = [the_date for the_date in dates if
                #          pd.Timestamp(the_date).date().year >= 2018]
                if dates:
                    process_crawl(StockSummarySpider, {"security_item": security_item,
                                                       "the_dates": dates})
示例#11
0
def crawl_usa_stock_data():
    # crawl the stock list
    process_crawl(AmericaListSpider, {})
    # crawl the kdata
    for _, security_item in get_security_list(security_type='stock', exchanges=['nasdaq'],
                                              codes=US_STOCK_CODES).iterrows():
        process_crawl(AmericaStockKdataSpider, {"security_item": security_item})
示例#12
0
def legacy_kdata_to_csv():
    for index, security_item in get_security_list().iterrows():
        for fuquan in (True, False):
            dir = get_kdata_dir_old(security_item, fuquan)
            if os.path.exists(dir):
                files = [os.path.join(dir, f) for f in os.listdir(dir) if
                         ('all' not in f and 'json' in f and os.path.isfile(os.path.join(dir, f)))]

                for f in files:
                    tmp = os.path.basename(f).split('_')
                    if fuquan:
                        csv_path = get_kdata_path(security_item, tmp[0], tmp[1], 'hfq')
                        if not os.path.exists(csv_path):
                            df = pd.read_json(f, dtype={'code': str})
                            logger.info("{} to {}".format(f, csv_path))

                            df = df.loc[:,
                                 ['timestamp', 'code', 'low', 'open', 'close', 'high', 'volume', 'turnover',
                                  'securityId',
                                  'fuquan']]
                            df.columns = KDATA_COLUMN_FQ

                            df.to_csv(csv_path, index=False)
                    else:
                        csv_path = get_kdata_path(security_item, tmp[0], tmp[1], 'bfq')
                        if not os.path.exists(csv_path):
                            df = pd.read_json(f, dtype={'code': str})
                            logger.info("{} to {}".format(f, csv_path))

                            df = df.loc[:, KDATA_COLUMN]

                            df.to_csv(csv_path, index=False)
示例#13
0
def stock_kdata_to_es(start='000001', end='666666', codes=US_STOCK_CODES, force=False):
    for _, security_item in get_security_list(start=start, end=end, exchanges=['sh', 'sz', 'nasdaq'],
                                              codes=codes).iterrows():
        # 创建索引
        index_name = get_es_kdata_index(security_item['type'], security_item['exchange'])
        es_index_mapping(index_name, StockKData)

        start_date = None
        if not force:
            query = {
                "term": {"securityId": ""}
            }
            query["term"]["securityId"] = security_item["id"]
            latest_record = es_get_latest_record(index_name, query=query)
            logger.info("latest_record:{}".format(latest_record))
            if latest_record:
                start_date = latest_record['timestamp']
        actions = []
        for _, kdata_item in get_kdata(security_item, start_date=start_date).iterrows():
            if start_date and is_same_date(start_date, kdata_item['timestamp']):
                continue

            try:
                id = '{}_{}'.format(kdata_item['securityId'], kdata_item['timestamp'])
                kdata = StockKData(meta={'id': id}, id=id)
                kdata.meta['index'] = index_name
                fill_doc_type(kdata, json.loads(kdata_item.to_json()))
                # kdata.save(index=index_name)
                actions.append(kdata.to_dict(include_meta=True))
            except Exception as e:
                logger.warn("wrong KdataDay:{},error:{}", kdata_item, e)
        if actions:
            resp = elasticsearch.helpers.bulk(es, actions)
            logger.info(resp)
    def start_requests(self):
        security_item = self.settings.get("security_item")

        if security_item is not None:
            item = security_item
            data_url = self.get_finance_url(item['code'])
            data_path = get_finance_path(item)

            yield Request(url=data_url,
                          meta={
                              'path': data_path,
                              'item': item
                          },
                          callback=self.download_finance_csv)
        else:
            for _, item in get_security_list(exchanges=['nasdaq']).iterrows():
                data_url = self.get_finance_url(item['code'])
                data_path = get_finance_path(item)

                yield Request(url=data_url,
                              meta={
                                  'path': data_path,
                                  'item': item
                              },
                              callback=self.download_finance_csv)
示例#15
0
def income_statement_to_es(force=False):
    es_index_mapping('income_statement', IncomeStatement)

    for _, security_item in get_security_list().iterrows():
        try:
            start_date = None
            if not force:
                query = {"term": {"securityId": ""}}
                query["term"]["securityId"] = security_item["id"]
                latest_record = es_get_latest_record(index='income_statement',
                                                     time_field='reportDate',
                                                     query=query)
                logger.info("latest_record:{}".format(latest_record))
                if latest_record:
                    start_date = latest_record['reportDate']
            actions = []
            for json_object in get_income_statement_items(
                    security_item, start_date=start_date):
                if start_date and is_same_date(start_date,
                                               json_object['reportDate']):
                    continue

                income_statement = IncomeStatement(
                    meta={'id': json_object['id']})
                fill_doc_type(income_statement, json_object)
                # income_statement.save()
                actions.append(income_statement.to_dict(include_meta=True))
            if actions:
                resp = elasticsearch.helpers.bulk(es, actions)
                logger.info(resp)
        except Exception as e:
            logger.warn("wrong IncomeStatement:{},error:{}", security_item, e)
示例#16
0
def check_convert_result():
    for index, security_item in get_security_list().iterrows():
        for fuquan in ('bfq', 'hfq'):
            dayk_path = get_kdata_path(security_item, fuquan=fuquan)
            if os.path.exists(dayk_path):
                df_result = pd.read_csv(dayk_path)

                if fuquan == 'hfq':
                    df = pd.DataFrame(columns=data_contract.KDATA_COLUMN_FQ)
                else:
                    df = pd.DataFrame(columns=data_contract.KDATA_COLUMN)

                dir = get_kdata_dir(security_item, fuquan=fuquan)

                if os.path.exists(dir):
                    files = [
                        os.path.join(dir, f) for f in os.listdir(dir)
                        if ('day' not in f and 'csv' in f
                            and os.path.isfile(os.path.join(dir, f)))
                    ]
                    for f in files:
                        df = df.append(pd.read_csv(f), ignore_index=True)
                    assert_df(df, df_result)
                    logger.info("{} merge as one ok".format(
                        security_item['code']))
示例#17
0
def cash_flow_statement_to_es(force=False):
    es_index_mapping('cash_flow_statement', CashFlowStatement)

    for _, security_item in get_security_list().iterrows():
        try:
            start_date = None
            if not force:
                query = {
                    "term": {"securityId": ""}
                }
                query["term"]["securityId"] = security_item["id"]
                latest_record = es_get_latest_record(index='cash_flow_statement', time_field='reportDate', query=query)
                logger.info("latest_record:{}".format(latest_record))
                if latest_record:
                    start_date = latest_record['reportDate']
            actions = []
            for json_object in get_cash_flow_statement_items(security_item, start_date=start_date):
                if start_date and is_same_date(start_date, json_object['reportDate']):
                    continue

                cash_flow_statement = CashFlowStatement(meta={'id': json_object['id']})
                fill_doc_type(cash_flow_statement, json_object)
                # cash_flow_statement.save()
                actions.append(cash_flow_statement.to_dict(include_meta=True))
            if actions:
                resp = elasticsearch.helpers.bulk(es, actions)
                logger.info(resp)
        except Exception as e:
            logger.warn("wrong CashFlowStatement:{},error:{}", security_item, e)
示例#18
0
def usa_stock_finance_to_es(force=False):
    es_index_mapping('finance_summary', FinanceSummary)

    for _, security_item in get_security_list(exchanges=['nasdaq'], codes=US_STOCK_CODES).iterrows():
        try:
            start_date = None
            if not force:
                query = {
                    "term": {"securityId": ""}
                }
                query["term"]["securityId"] = security_item["id"]
                latest_record = es_get_latest_record(index='finance_summary', time_field='reportDate', query=query)
                logger.info("latest_record:{}".format(latest_record))
                if latest_record:
                    start_date = latest_record['reportDate']
            actions = []
            for _, json_object in get_finance_summary_items(security_item, start_date=start_date).iterrows():
                if start_date and is_same_date(start_date, json_object['reportDate']):
                    continue

                finance_summary = FinanceSummary(meta={'id': json_object['id']})
                fill_doc_type(finance_summary, json_object.to_dict())
                actions.append(finance_summary.to_dict(include_meta=True))
            if actions:
                resp = elasticsearch.helpers.bulk(es, actions)
                logger.info(resp)
        except Exception as e:
            logger.warn("wrong FinanceSummary:{},error:{}", security_item, e)
示例#19
0
def stock_meta_to_es(force=False):
    es_index_mapping('stock_meta', StockMeta)
    start_date = None
    if not force:
        latest_record = es_get_latest_record('stock_meta',
                                             time_field='listDate')
        logger.info("latest_record:{}".format(latest_record))
        if latest_record:
            start_date = latest_record['listDate']

    actions = []
    for _, item in get_security_list(mode='es',
                                     start_date=start_date,
                                     exchanges=EXCHANGE_LIST_COL).iterrows():
        if start_date and is_same_date(start_date, item['listDate']):
            continue
        try:
            stock_meta = StockMeta(meta={'id': item['id']})
            fill_doc_type(stock_meta, json.loads(item.to_json()))
            actions.append(stock_meta.to_dict(include_meta=True))
        except Exception as e:
            logger.warn("wrong SecurityItem:{},error:{}", item, e)
    if actions:
        resp = elasticsearch.helpers.bulk(es, actions)
        logger.info(resp)
示例#20
0
def usa_stock_finance_to_es(force=False):
    es_index_mapping('finance_summary', FinanceSummary)

    for _, security_item in get_security_list(exchanges=['nasdaq'],
                                              codes=US_STOCK_CODES).iterrows():
        try:
            start_date = None
            if not force:
                query = {"term": {"securityId": ""}}
                query["term"]["securityId"] = security_item["id"]
                latest_record = es_get_latest_record(index='finance_summary',
                                                     time_field='reportDate',
                                                     query=query)
                logger.info("latest_record:{}".format(latest_record))
                if latest_record:
                    start_date = latest_record['reportDate']
            actions = []
            for _, json_object in get_finance_summary_items(
                    security_item, start_date=start_date).iterrows():
                if start_date and is_same_date(start_date,
                                               json_object['reportDate']):
                    continue

                finance_summary = FinanceSummary(
                    meta={'id': json_object['id']})
                fill_doc_type(finance_summary, json_object.to_dict())
                actions.append(finance_summary.to_dict(include_meta=True))
            if actions:
                resp = elasticsearch.helpers.bulk(es, actions)
                logger.info(resp)
        except Exception as e:
            logger.warn("wrong FinanceSummary:{},error:{}", security_item, e)
示例#21
0
def stock_kdata_to_es(start='000001', end='666666', force=False):
    for _, security_item in get_security_list(start=start, end=end).iterrows():
        # 创建索引
        index_name = get_es_kdata_index(security_item['type'],
                                        security_item['exchange'])
        es_index_mapping(index_name, StockKData)

        start_date = None
        if not force:
            query = {"term": {"securityId": ""}}
            query["term"]["securityId"] = security_item["id"]
            latest_record = es_get_latest_record(index_name, query=query)
            logger.info("latest_record:{}".format(latest_record))
            if latest_record:
                start_date = latest_record['timestamp']
        actions = []
        for _, kdata_item in get_kdata(security_item,
                                       start_date=start_date).iterrows():
            if start_date and is_same_date(start_date,
                                           kdata_item['timestamp']):
                continue

            try:
                id = '{}_{}'.format(kdata_item['securityId'],
                                    kdata_item['timestamp'])
                kdata = StockKData(meta={'id': id}, id=id)
                fill_doc_type(kdata, json.loads(kdata_item.to_json()))
                # kdata.save(index=index_name)
                actions.append(kdata.to_dict(include_meta=True))
            except Exception as e:
                logger.warn("wrong KdataDay:{},error:{}", kdata_item, e)
        if actions:
            resp = elasticsearch.helpers.bulk(es, actions)
            logger.info(resp)
示例#22
0
def remove_old_kdata():
    for index, security_item in get_security_list().iterrows():
        for fuquan in (True, False):
            dir = get_kdata_dir_old(security_item, fuquan)
            if os.path.exists(dir):
                if fuquan:
                    logger.info("remove {}".format(dir))
                    shutil.rmtree(dir)
示例#23
0
def remove_old_kdata():
    for index, security_item in get_security_list().iterrows():
        for fuquan in (True, False):
            dir = get_kdata_dir_old(security_item, fuquan)
            if os.path.exists(dir):
                if fuquan:
                    logger.info("remove {}".format(dir))
                    shutil.rmtree(dir)
 def start_requests(self):
     item = self.settings.get("security_item")
     if item is not None:
         for request in self.yield_request(item):
             yield request
     else:
         for _, item in get_security_list().iterrows():
             for request in self.yield_request(item):
                 yield request
示例#25
0
def kdata_to_kafka(fuquan):
    for _, security_item in get_security_list().iterrows():
        for _, kdata_item in get_kdata(security_item, source='sina', fuquan=fuquan).iterrows():
            the_json = kdata_item.to_json(force_ascii=False)
            producer.send(get_kafka_kdata_topic(security_item['id'], fuquan),
                          bytes(the_json, encoding='utf8'),
                          timestamp_ms=int(datetime.datetime.strptime(kdata_item['timestamp'],
                                                                      TIME_FORMAT_DAY).timestamp()))
            logger.debug("kdata_to_kafka {}".format(the_json))
示例#26
0
 def start_requests(self):
     security_item = self.settings.get("security_item")
     if security_item is not None:
         for request in self.yield_request(security_item):
             yield request
     else:
         for _, item in get_security_list().iterrows():
             for request in self.yield_request(item):
                 yield request
示例#27
0
 def start_requests(self):
     for _, item in get_security_list().iterrows():
         url = self.get_forecast_url(item['code'])
         yield Request(url=url,
                       headers=DEFAULT_KDATA_HEADER,
                       meta={
                           'item': item,
                       },
                       callback=self.download_forecast_data)
示例#28
0
def remove_old_tick():
    for index, security_item in get_security_list().iterrows():
        dir = get_tick_dir(security_item)
        if os.path.exists(dir):
            files = [os.path.join(dir, f) for f in os.listdir(dir) if
                     ('xls' in f and 'lock' not in f and 'error' not in f and os.path.isfile(os.path.join(dir, f)))]
            for f in files:
                logger.info("remove {}".format(f))
                os.remove(f)
示例#29
0
 def start_requests(self):
     item = self.settings.get("security_item")
     trading_dates = self.settings.get("trading_dates")
     if item is not None:
         for request in self.yield_request(item, trading_dates):
             yield request
     else:
         for _, item in get_security_list().iterrows():
             for request in self.yield_request(item):
                 yield request
 def start_requests(self):
     item = self.settings.get("security_item")
     if item is not None:
         for request in self.yield_request(item):
             yield request
     else:
         for _, item in get_security_list(start=STOCK_START_CODE,
                                          end=STOCK_END_CODE).iterrows():
             for request in self.yield_request(item):
                 yield request
示例#31
0
 def start_requests(self):
     security_item = self.settings.get("security_item")
     finance_type = self.settings.get("report_type")
     if security_item is not None:
         for request in self.yield_request(security_item, finance_type):
             yield request
     else:
         for _, item in get_security_list().iterrows():
             for request in self.yield_request(item):
                 yield request
示例#32
0
def init_env():
    if not os.path.exists(FOOLTRADER_STORE_PATH):
        print("{} is a wrong path")
        print(
            "please set env FOOLTRADER_STORE_PATH to working path or set it in settings.py"
        )
    else:
        # 初始化股票文件夹
        for _, item in get_security_list(
                exchanges=EXCHANGE_LIST_COL).iterrows():
            mkdir_for_security(item)

        # 初始化指数文件夹
        for _, item in get_security_list(security_type='index',
                                         exchanges=['sh', 'sz',
                                                    'nasdaq']).iterrows():
            kdata_dir = get_kdata_dir(item)
            if not os.path.exists(kdata_dir):
                os.makedirs(kdata_dir)
示例#33
0
def tick_to_kafka():
    for _, security_item in get_security_list().iterrows():
        for df in get_ticks(security_item):
            for _, tick_item in df.iterrows():
                the_json = tick_item.to_json(force_ascii=False)
                producer.send(get_kafka_tick_topic(security_item['id']),
                              bytes(the_json, encoding='utf8'),
                              timestamp_ms=int(datetime.datetime.strptime(tick_item['timestamp'],
                                                                          TIME_FORMAT_SEC).timestamp()))
                logger.debug("tick_to_kafka {}".format(the_json))
示例#34
0
def crawl_finance_data(start_code=STOCK_START_CODE, end_code=STOCK_END_CODE):
    for _, security_item in get_security_list(start=start_code, end=end_code).iterrows():
        try:
            # 先抓事件,有些后续抓取依赖事件
            process_crawl(StockFinanceReportEventSpider, {"security_item": security_item})

            current_report_date = get_report_date()

            # 资产负债表
            path = get_balance_sheet_path(security_item)
            if not os.path.exists(path):
                process_crawl(StockFinanceSpider, {"security_item": security_item,
                                                   "report_type": "balance_sheet"})
            else:
                for balance_sheet_item in get_balance_sheet_items(security_item):
                    # 当前报告期还没抓取
                    if balance_sheet_item['reportDate'] != current_report_date:
                        # 报告出来了
                        df = event.get_finance_report_event(security_item, index='reportDate')
                        if current_report_date in df.index:
                            process_crawl(StockFinanceSpider, {"security_item": security_item,
                                                               "report_type": "balance_sheet"})
                    break

            # 利润表
            path = get_income_statement_path(security_item)
            if not os.path.exists(path):
                process_crawl(StockFinanceSpider, {"security_item": security_item,
                                                   "report_type": "income_statement"})
            else:
                for balance_sheet_item in get_income_statement_items(security_item):
                    if balance_sheet_item['reportDate'] != current_report_date:
                        # 报告出来了
                        df = event.get_finance_report_event(security_item, index='reportDate')
                        if current_report_date in df.index:
                            process_crawl(StockFinanceSpider, {"security_item": security_item,
                                                               "report_type": "income_statement"})
                    break

            # 现金流量表
            path = get_cash_flow_statement_path(security_item)
            if not os.path.exists(path):
                process_crawl(StockFinanceSpider, {"security_item": security_item,
                                                   "report_type": "cash_flow"})
            else:
                for balance_sheet_item in get_cash_flow_statement_items(security_item):
                    if balance_sheet_item['reportDate'] != current_report_date:
                        # 报告出来了
                        df = event.get_finance_report_event(security_item, index='reportDate')
                        if current_report_date in df.index:
                            process_crawl(StockFinanceSpider, {"security_item": security_item,
                                                               "report_type": "cash_flow"})
                    break
        except Exception as e:
            logger.error(e)
 def start_requests(self):
     item = self.settings.get("security_item")
     start_date = self.settings.get("start_date")
     end_date = self.settings.get("end_date")
     if item is not None:
         for request in self.yield_request(item, start_date, end_date):
             yield request
     else:
         for _, item in get_security_list().iterrows():
             for request in self.yield_request(item):
                 yield request
示例#36
0
def kdata_to_es(start=None,
                end=None,
                security_type='stock',
                exchanges=['sh', 'sz'],
                force=False):
    if security_type == 'stock':
        doc_type = StockKData
    elif security_type == 'index':
        doc_type = IndexKData
    elif security_type == 'cryptocurrency':
        doc_type = CryptoCurrencyKData

    for _, security_item in get_security_list(security_type=security_type,
                                              exchanges=exchanges,
                                              start=start,
                                              end=end).iterrows():
        # 创建索引
        index_name = get_es_kdata_index(security_item['type'],
                                        security_item['exchange'])
        es_index_mapping(index_name, doc_type)

        start_date = None

        if not force:
            query = {"term": {"securityId": ""}}
            query["term"]["securityId"] = security_item["id"]
            latest_record = es_get_latest_record(index_name, query=query)
            logger.info("latest_record:{}".format(latest_record))
            if latest_record:
                start_date = latest_record['timestamp']
        actions = []

        df_kdata = get_kdata(security_item, start_date=start_date)

        for _, kdata_item in df_kdata.iterrows():
            if start_date and is_same_date(start_date,
                                           kdata_item['timestamp']):
                continue

            try:
                id = '{}_{}'.format(kdata_item['securityId'],
                                    kdata_item['timestamp'])
                kdata = doc_type(meta={'id': id}, id=id)
                kdata.meta['index'] = index_name
                kdata_json = json.loads(kdata_item.to_json())

                fill_doc_type(kdata, kdata_json)
                # kdata.save(index=index_name)
                actions.append(kdata.to_dict(include_meta=True))
            except Exception as e:
                logger.warn("wrong KdataDay:{},error:{}", kdata_item, e)
        if actions:
            resp = elasticsearch.helpers.bulk(es_client, actions)
            logger.info(resp)
示例#37
0
def remove_old_tick():
    for index, security_item in get_security_list().iterrows():
        dir = get_tick_dir(security_item)
        if os.path.exists(dir):
            files = [
                os.path.join(dir, f) for f in os.listdir(dir)
                if ('xls' in f and 'lock' not in f and 'error' not in f
                    and os.path.isfile(os.path.join(dir, f)))
            ]
            for f in files:
                logger.info("remove {}".format(f))
                os.remove(f)
示例#38
0
def tick_to_kafka():
    for _, security_item in get_security_list().iterrows():
        for df in get_ticks(security_item):
            for _, tick_item in df.iterrows():
                the_json = tick_item.to_json(force_ascii=False)
                producer.send(get_kafka_tick_topic(security_item['id']),
                              bytes(the_json, encoding='utf8'),
                              timestamp_ms=int(
                                  datetime.datetime.strptime(
                                      tick_item['timestamp'],
                                      TIME_FORMAT_SEC).timestamp()))
                logger.debug("tick_to_kafka {}".format(the_json))
示例#39
0
def forecast_event_to_csv():
    for index, security_item in get_security_list().iterrows():
        the_path = get_forecast_event_path(security_item)
        if os.path.exists(the_path):
            df = pd.read_json(get_forecast_event_path(security_item))
            df = df.rename(columns={'reportDate': 'timestamp'})
            df = df.loc[:, EVENT_STOCK_FINANCE_FORECAST_COL]
            df.to_csv(get_finance_forecast_event_path(security_item),
                      index=False)
            logger.info("transform {} forecast event".format(
                security_item['code']))
            os.remove(the_path)
示例#40
0
    def start_requests(self):
        self.category_type = self.settings.get("category_type")

        self.sh_df = get_security_list(exchanges=['sh'])
        self.sz_df = get_security_list(exchanges=['sz'])
        self.file_lock = threading.RLock()

        # 清除老数据
        self.sh_df[self.category_type] = None
        self.sz_df[self.category_type] = None

        if self.category_type == 'sinaIndustry':
            url = 'http://vip.stock.finance.sina.com.cn/q/view/newSinaHy.php'
        elif self.category_type == 'sinaConcept':
            url = 'http://money.finance.sina.com.cn/q/view/newFLJK.php?param=class'
        elif self.category_type == 'sinaArea':
            url = 'http://money.finance.sina.com.cn/q/view/newFLJK.php?param=area'
        else:
            return

        yield Request(url=url, callback=self.download_sina_category)
示例#41
0
def kdata_to_kafka(fuquan):
    for _, security_item in get_security_list().iterrows():
        for _, kdata_item in get_kdata(security_item,
                                       source='sina',
                                       fuquan=fuquan).iterrows():
            the_json = kdata_item.to_json(force_ascii=False)
            producer.send(get_kafka_kdata_topic(security_item['id'], fuquan),
                          bytes(the_json, encoding='utf8'),
                          timestamp_ms=int(
                              datetime.datetime.strptime(
                                  kdata_item['timestamp'],
                                  TIME_FORMAT_DAY).timestamp()))
            logger.debug("kdata_to_kafka {}".format(the_json))
示例#42
0
def check_result():
    for index, security_item in get_security_list().iterrows():
        for fuquan in ('bfq', 'hfq'):
            dayk_path = get_kdata_path(security_item, fuquan=fuquan)
            if not os.path.exists(dayk_path):
                logger.warn(get_security_dir(security_item))

        dir = get_tick_dir(security_item)
        if os.path.exists(dir):
            files = [os.path.join(dir, f) for f in os.listdir(dir) if
                     ('csv' in f and os.path.isfile(os.path.join(dir, f)))]
            if not files:
                logger.warn(get_security_dir(security_item))
示例#43
0
def forecast_event_to_es():
    for _, security_item in get_security_list().iterrows():
        # 创建索引
        index_name = get_es_forecast_event_index(security_item['id'])
        es_index_mapping(index_name, ForecastEvent)

        for json_object in get_forecast_items(security_item):
            try:
                forcast_event = ForecastEvent(meta={'id': json_object['id']})
                fill_doc_type(forcast_event, json_object)
                forcast_event.save()
            except Exception as e:
                logger.warn("wrong ForecastEvent:{},error:{}", json_object, e)
示例#44
0
 def start_requests(self):
     # 两种模式:
     # 1)item,trading_dates不指定,用于全量下载数据
     # 2)指定,用于修复
     item = self.settings.get("security_item")
     trading_dates = self.settings.get("trading_dates")
     fuquan = self.settings.get("fuquan")
     if item is not None:
         for request in self.yield_request(item, trading_dates, fuquan):
             yield request
     else:
         for _, item in get_security_list().iterrows():
             for request in self.yield_request(item):
                 yield request
示例#45
0
def handle_error_tick():
    for index, security_item in get_security_list().iterrows():
        dir = get_tick_dir(security_item)
        if os.path.exists(dir):
            files = [os.path.join(dir, f) for f in os.listdir(dir) if
                     (('fatal' in f or 'error' in f) and os.path.isfile(os.path.join(dir, f)))]
            for f in files:
                try:
                    the_date = get_file_name(f)
                    csv_path = get_tick_path(security_item, the_date)
                    if not os.path.exists(csv_path):
                        logger.info("{} to {}".format(f, csv_path))
                        sina_tick_to_csv(security_item, f, the_date)
                except Exception as e:
                    logger.warn(e)
                    os.rename(f, f + ".fatal")
示例#46
0
    def start_requests(self):
        for _, item in get_security_list().iterrows():
            for fuquan in ['hfq', 'bfq']:
                data_path = get_kdata_path(item, fuquan=fuquan, source='ths')
                data_exist = os.path.isfile(data_path)
                if not data_exist or True:
                    # get day k data
                    if fuquan == 'hfq':
                        flag = 2
                    else:
                        flag = 0
                    url = self.get_k_data_url(item['code'], flag)
                    yield Request(url=url, headers=TONGHUASHUN_KDATA_HEADER,
                                  meta={'path': data_path, 'item': item, 'fuquan': fuquan},
                                  callback=self.download_day_k_data)

                else:
                    self.logger.info("{} kdata existed".format(item['code']))
    def start_requests(self):
        security_item = self.settings.get("security_item")

        if security_item is not None:
            item = security_item
            data_url = self.get_finance_url(item['code'])
            data_path = get_finance_path(item)

            yield Request(url=data_url,
                          meta={'path': data_path,
                                'item': item},
                          callback=self.download_finance_csv)
        else:
            for _, item in get_security_list(exchanges=['nasdaq']).iterrows():
                data_url = self.get_finance_url(item['code'])
                data_path = get_finance_path(item)

                yield Request(url=data_url,
                              meta={'path': data_path,
                                    'item': item},
                              callback=self.download_finance_csv)
    def start_requests(self):
        item = self.settings.get("security_item")
        start_date = self.settings.get("start_date")
        end_date = self.settings.get("end_date")

        today = pd.Timestamp.today()

        the_years = None

        if start_date and end_date:
            if (today - start_date).days <= 5:
                pass
            else:
                the_years = range(start_date.year, end_date.year + 1)

        if item is not None and the_years:
            for request in self.yield_request(item, the_years):
                yield request
        else:
            for _, item in get_security_list(exchanges=['nasdaq'], codes=US_STOCK_CODES).iterrows():
                for request in self.yield_request(item):
                    yield request
示例#49
0
def stock_meta_to_es(force=False):
    es_index_mapping('stock_meta', StockMeta)
    start_date = None
    if not force:
        latest_record = es_get_latest_record('stock_meta', time_field='listDate')
        logger.info("latest_record:{}".format(latest_record))
        if latest_record:
            start_date = latest_record['listDate']

    actions = []
    for _, item in get_security_list(mode='es', start_date=start_date, exchanges=EXCHANGE_LIST_COL).iterrows():
        if start_date and is_same_date(start_date, item['listDate']):
            continue
        try:
            stock_meta = StockMeta(meta={'id': item['id']})
            fill_doc_type(stock_meta, json.loads(item.to_json()))
            actions.append(stock_meta.to_dict(include_meta=True))
        except Exception as e:
            logger.warn("wrong SecurityItem:{},error:{}", item, e)
    if actions:
        resp = elasticsearch.helpers.bulk(es, actions)
        logger.info(resp)
示例#50
0
def check_convert_result():
    for index, security_item in get_security_list().iterrows():
        for fuquan in ('bfq', 'hfq'):
            dayk_path = get_kdata_path(security_item, fuquan=fuquan)
            if os.path.exists(dayk_path):
                df_result = pd.read_csv(dayk_path)

                if fuquan == 'hfq':
                    df = pd.DataFrame(
                        columns=data_contract.KDATA_COLUMN_FQ)
                else:
                    df = pd.DataFrame(
                        columns=data_contract.KDATA_COLUMN)

                dir = get_kdata_dir(security_item, fuquan=fuquan)

                if os.path.exists(dir):
                    files = [os.path.join(dir, f) for f in os.listdir(dir) if
                             ('day' not in f and 'csv' in f and os.path.isfile(os.path.join(dir, f)))]
                    for f in files:
                        df = df.append(pd.read_csv(f), ignore_index=True)
                    assert_df(df, df_result)
                    logger.info("{} merge as one ok".format(security_item['code']))
示例#51
0
def remove_old_163_trading_dates():
    for index, security_item in get_security_list().iterrows():
        the_path = get_trading_dates_path_163(security_item)
        if os.path.exists(the_path):
            logger.info("remove {}".format(the_path))
            os.remove(the_path)
示例#52
0
 def start_requests(self):
     for _, item in get_security_list().iterrows():
         url = self.get_forecast_url(item['code'])
         yield Request(url=url, headers=DEFAULT_KDATA_HEADER,
                       meta={'item': item, },
                       callback=self.download_forecast_data)
示例#53
0
 def __init__(self, name=None, **kwargs):
     super().__init__(name, **kwargs)
     self.sh_df = get_security_list(exchanges=['sh'])
     self.sz_df = get_security_list(exchanges=['sz'])
     self.file_lock = threading.RLock()