def income_statement_to_es(force=False): es_index_mapping('income_statement', IncomeStatement) for _, security_item in get_security_list().iterrows(): try: start_date = None if not force: query = { "term": {"securityId": ""} } query["term"]["securityId"] = security_item["id"] latest_record = es_get_latest_record(index='income_statement', time_field='reportDate', query=query) logger.info("latest_record:{}".format(latest_record)) if latest_record: start_date = latest_record['reportDate'] actions = [] for json_object in get_income_statement_items(security_item, start_date=start_date): if start_date and is_same_date(start_date, json_object['reportDate']): continue income_statement = IncomeStatement(meta={'id': json_object['id']}) fill_doc_type(income_statement, json_object) # income_statement.save() actions.append(income_statement.to_dict(include_meta=True)) if actions: resp = elasticsearch.helpers.bulk(es, actions) logger.info(resp) except Exception as e: logger.warn("wrong IncomeStatement:{},error:{}", security_item, e)
def income_statement_to_es(force=False): es_index_mapping('income_statement', IncomeStatement) for _, security_item in get_security_list().iterrows(): try: start_date = None if not force: query = {"term": {"securityId": ""}} query["term"]["securityId"] = security_item["id"] latest_record = es_get_latest_record(index='income_statement', time_field='reportDate', query=query) logger.info("latest_record:{}".format(latest_record)) if latest_record: start_date = latest_record['reportDate'] for json_object in get_income_statement_items( security_item, start_date=start_date): if start_date and is_same_date(start_date, json_object['reportDate']): continue income_statement = IncomeStatement( meta={'id': json_object['id']}) fill_doc_type(income_statement, json_object) income_statement.save() except Exception as e: logger.warn("wrong IncomeStatement:{},error:{}", security_item, e)
def check_net_profit(security_item): income_statement_list = get_income_statement_items(security_item=security_item) for income_statement in income_statement_list: netProfit = income_statement["totalProfits"] - income_statement["incomeTaxExpense"] diff = netProfit - income_statement["netProfit"] if abs(diff) >= 1: print("{} net profit calculating not pass,calculating result:{},report result:{}".format( income_statement['id'], netProfit, income_statement["netProfit"])) else: print("{} net profit calculating pass".format(income_statement['id']))
def crawl_finance_data(start_code=STOCK_START_CODE, end_code=STOCK_END_CODE): for _, security_item in get_security_list(start=start_code, end=end_code).iterrows(): try: # 先抓事件,有些后续抓取依赖事件 process_crawl(StockFinanceReportEventSpider, {"security_item": security_item}) current_report_date = get_report_date() # 资产负债表 path = get_balance_sheet_path(security_item) if not os.path.exists(path): process_crawl(StockFinanceSpider, {"security_item": security_item, "report_type": "balance_sheet"}) else: for balance_sheet_item in get_balance_sheet_items(security_item): # 当前报告期还没抓取 if balance_sheet_item['reportDate'] != current_report_date: # 报告出来了 df = event.get_finance_report_event(security_item, index='reportDate') if current_report_date in df.index: process_crawl(StockFinanceSpider, {"security_item": security_item, "report_type": "balance_sheet"}) break # 利润表 path = get_income_statement_path(security_item) if not os.path.exists(path): process_crawl(StockFinanceSpider, {"security_item": security_item, "report_type": "income_statement"}) else: for balance_sheet_item in get_income_statement_items(security_item): if balance_sheet_item['reportDate'] != current_report_date: # 报告出来了 df = event.get_finance_report_event(security_item, index='reportDate') if current_report_date in df.index: process_crawl(StockFinanceSpider, {"security_item": security_item, "report_type": "income_statement"}) break # 现金流量表 path = get_cash_flow_statement_path(security_item) if not os.path.exists(path): process_crawl(StockFinanceSpider, {"security_item": security_item, "report_type": "cash_flow"}) else: for balance_sheet_item in get_cash_flow_statement_items(security_item): if balance_sheet_item['reportDate'] != current_report_date: # 报告出来了 df = event.get_finance_report_event(security_item, index='reportDate') if current_report_date in df.index: process_crawl(StockFinanceSpider, {"security_item": security_item, "report_type": "cash_flow"}) break except Exception as e: logger.error(e)
def finance_sheet_to_es(sheet_type='balance_sheet', force=False): if sheet_type == 'balance_sheet': doc_type = BalanceSheet elif sheet_type == 'income_statement': doc_type = IncomeStatement elif sheet_type == 'cash_flow_statement': doc_type = CashFlowStatement es_index_mapping(sheet_type, doc_type) for _, security_item in get_security_list().iterrows(): try: start_date = None if not force: query = {"term": {"securityId": ""}} query["term"]["securityId"] = security_item["id"] latest_record = es_get_latest_record(index=sheet_type, time_field='reportDate', query=query) logger.info("latest_record:{}".format(latest_record)) if latest_record: start_date = latest_record['reportDate'] actions = [] items = [] if sheet_type == 'balance_sheet': items = get_balance_sheet_items(security_item, start_date=start_date) elif sheet_type == 'income_statement': items = get_income_statement_items(security_item, start_date=start_date) elif sheet_type == 'cash_flow_statement': items = get_cash_flow_statement_items(security_item, start_date=start_date) for json_object in items: if start_date and is_same_date(start_date, json_object['reportDate']): continue the_doc = doc_type(meta={'id': json_object['id']}) fill_doc_type(the_doc, json_object) # balance_sheet.save() actions.append(the_doc.to_dict(include_meta=True)) if actions: resp = elasticsearch.helpers.bulk(es_client, actions) logger.info(resp) except Exception as e: logger.warning("{} wrong {},error:{}", security_item, sheet_type, e)
def check_eps(security_item): income_statement_list = get_income_statement_items(security_item=security_item) for income_statement in income_statement_list: balance_sheet = get_balance_sheet_items(security_item=security_item, report_period=income_statement['reportDate']) if not balance_sheet or balance_sheet['totalShareCapital'] == 0: continue eps = (income_statement["netProfit"] - income_statement["minorityInterestIncome"]) / ( balance_sheet['totalShareCapital']) diff = eps - income_statement["EPS"] if abs(diff) >= 0.01: print("{} EPS calculating not pass,calculating result:{},report result:{}".format( income_statement['id'], eps, income_statement["EPS"])) else: print("{} EPS calculating pass".format(income_statement['id']))
def check_operating_profit(security_item): income_statement_list = get_income_statement_items(security_item=security_item) for income_statement in income_statement_list: operatingProfit = income_statement["operatingRevenue"] \ - income_statement["operatingCosts"] \ - income_statement["businessTaxesAndSurcharges"] \ - income_statement["sellingExpenses"] \ - income_statement["ManagingCosts"] \ - income_statement["financingExpenses"] \ - income_statement["assetsDevaluation"] \ + income_statement["incomeFromChangesInFairValue"] \ + income_statement["investmentIncome"] diff = operatingProfit - income_statement["operatingProfit"] if abs(diff) >= 1: print("{} operating profit calculating not pass,calculating result:{},report result:{}".format( income_statement['id'], operatingProfit, income_statement["operatingProfit"])) else: print("{} operating profit calculating pass".format(income_statement['id']))
def test_get_income_statement_items(): income_statements = finance.get_income_statement_items('600977') assert len(income_statements) > 0 for item in income_statements: assert item['operatingRevenue'] > 0 assert item['reportEventDate'] > item['reportPeriod']