def yield_request(self, item=None, finance_type=None): if finance_type == 'balance_sheet': url_and_path = [(self.get_balance_sheet_url(item['code']), get_balance_sheet_path(item))] elif finance_type == 'income_statement': url_and_path = [(self.get_income_statement_url(item['code']), get_income_statement_path(item))] elif finance_type == 'cash_flow': url_and_path = [(self.get_cash_flow_statement_url(item['code']), get_cash_flow_statement_path(item))] else: url_and_path = [(self.get_balance_sheet_url(item['code']), get_balance_sheet_path(item)), (self.get_income_statement_url(item['code']), get_income_statement_path(item)), (self.get_cash_flow_statement_url(item['code']), get_cash_flow_statement_path(item))] for (data_url, data_path) in url_and_path: yield Request(url=data_url, meta={ 'path': data_path, 'item': item }, headers=DEFAULT_BALANCE_SHEET_HEADER, callback=self.download_finance_sheet)
def yield_request(self, item=None, finance_type=None): if finance_type == 'balance_sheet': url_and_path = [(self.get_balance_sheet_url(item['code']), get_balance_sheet_path(item))] elif finance_type == 'income_statement': url_and_path = [(self.get_income_statement_url(item['code']), get_income_statement_path(item))] elif finance_type == 'cash_flow': url_and_path = [(self.get_cash_flow_statement_url(item['code']), get_cash_flow_statement_path(item))] else: url_and_path = [ (self.get_balance_sheet_url(item['code']), get_balance_sheet_path(item)), (self.get_income_statement_url(item['code']), get_income_statement_path(item)), (self.get_cash_flow_statement_url(item['code']), get_cash_flow_statement_path(item))] for (data_url, data_path) in url_and_path: yield Request(url=data_url, meta={'path': data_path, 'item': item}, headers=DEFAULT_BALANCE_SHEET_HEADER, callback=self.download_finance_sheet)
def crawl_finance_data(start_code=STOCK_START_CODE, end_code=STOCK_END_CODE): for _, security_item in get_security_list(start=start_code, end=end_code).iterrows(): try: # 先抓事件,有些后续抓取依赖事件 process_crawl(StockFinanceReportEventSpider, {"security_item": security_item}) current_report_date = get_report_date() # 资产负债表 path = get_balance_sheet_path(security_item) if not os.path.exists(path): process_crawl(StockFinanceSpider, {"security_item": security_item, "report_type": "balance_sheet"}) else: for balance_sheet_item in get_balance_sheet_items(security_item): # 当前报告期还没抓取 if balance_sheet_item['reportDate'] != current_report_date: # 报告出来了 df = event.get_finance_report_event(security_item, index='reportDate') if current_report_date in df.index: process_crawl(StockFinanceSpider, {"security_item": security_item, "report_type": "balance_sheet"}) break # 利润表 path = get_income_statement_path(security_item) if not os.path.exists(path): process_crawl(StockFinanceSpider, {"security_item": security_item, "report_type": "income_statement"}) else: for balance_sheet_item in get_income_statement_items(security_item): if balance_sheet_item['reportDate'] != current_report_date: # 报告出来了 df = event.get_finance_report_event(security_item, index='reportDate') if current_report_date in df.index: process_crawl(StockFinanceSpider, {"security_item": security_item, "report_type": "income_statement"}) break # 现金流量表 path = get_cash_flow_statement_path(security_item) if not os.path.exists(path): process_crawl(StockFinanceSpider, {"security_item": security_item, "report_type": "cash_flow"}) else: for balance_sheet_item in get_cash_flow_statement_items(security_item): if balance_sheet_item['reportDate'] != current_report_date: # 报告出来了 df = event.get_finance_report_event(security_item, index='reportDate') if current_report_date in df.index: process_crawl(StockFinanceSpider, {"security_item": security_item, "report_type": "cash_flow"}) break except Exception as e: logger.error(e)
def crawl_finance_data(start_code=STOCK_START_CODE, end_code=STOCK_END_CODE): for _, security_item in get_security_list(start_code=start_code, end_code=end_code).iterrows(): try: # 先抓事件,有些后续抓取依赖事件 process_crawl(StockFinanceReportEventSpider, {"security_item": security_item}) current_report_period = get_report_period() # 资产负债表 path = get_balance_sheet_path(security_item) if not os.path.exists(path): process_crawl(StockFinanceSpider, {"security_item": security_item, "report_type": "balance_sheet"}) else: current_items = get_balance_sheet_items(security_item) # 当前报告期还没抓取 if current_report_period != current_items[-1]['reportPeriod']: # 报告出来了 # df = event.get_finance_report_event(security_item, index='reportPeriod') # if current_report_period in df.index: process_crawl(StockFinanceSpider, {"security_item": security_item, "report_type": "balance_sheet"}) # 利润表 path = get_income_statement_path(security_item) if not os.path.exists(path): process_crawl(StockFinanceSpider, {"security_item": security_item, "report_type": "income_statement"}) else: current_items = get_income_statement_items(security_item) # 当前报告期还没抓取 if current_report_period != current_items[-1]['reportPeriod']: # 报告出来了 # df = event.get_finance_report_event(security_item, index='reportPeriod') # if current_report_period in df.index: process_crawl(StockFinanceSpider, {"security_item": security_item, "report_type": "income_statement"}) # 现金流量表 path = get_cash_flow_statement_path(security_item) if not os.path.exists(path): process_crawl(StockFinanceSpider, {"security_item": security_item, "report_type": "cash_flow"}) else: current_items = get_cash_flow_statement_items(security_item) # 当前报告期还没抓取 if current_report_period != current_items[-1]['reportPeriod']: # 报告出来了 # df = event.get_finance_report_event(security_item, index='reportPeriod') # if current_report_period in df.index: process_crawl(StockFinanceSpider, {"security_item": security_item, "report_type": "cash_flow"}) except Exception as e: logger.exception(e)
def get_balance_sheet_items(security_item, start_date=None, report_period=None, report_event_date=None): path = get_balance_sheet_path(security_item) if not os.path.exists(path): return [] encoding = settings.DOWNLOAD_TXT_ENCODING if settings.DOWNLOAD_TXT_ENCODING else detect_encoding( url='file://' + os.path.abspath(path)).get('encoding') with open(path, encoding=encoding) as fr: lines = fr.readlines() # for idx, line in enumerate(lines): # yield idx, line.split() reportDate = lines[0].split()[1:-1] # 货币资金 moneyFunds = lines[3].split()[1:-1] # 交易性金融资产 heldForTradingFinancialAssets = lines[4].split()[1:-1] # 衍生金融资产 derivative = lines[5].split()[1:-1] # 应收票据 billsReceivable = lines[6].split()[1:-1] # 应收账款 accountsReceivable = lines[7].split()[1:-1] # 预付款项 prepaidAccounts = lines[8].split()[1:-1] # 应收利息 interestReceivable = lines[9].split()[1:-1] # 应收股利 dividendReceivable = lines[10].split()[1:-1] # 其他应收款 otherReceivables = lines[11].split()[1:-1] # 买入返售金融资产 buyingBackTheSaleOfFinancialAssets = lines[12].split()[1:-1] # 存货 inventory = lines[13].split()[1:-1] # 划分为持有待售的资产 assetsForSale = lines[14].split()[1:-1] # 一年内到期的非流动资产 nonCurrentAssetsDueWithinOneYear = lines[15].split()[1:-1] # 待摊费用 unamortizedExpenditures = lines[16].split()[1:-1] # 待处理流动资产损益 waitDealIntangibleAssetsLossOrIncome = lines[17].split()[1:-1] # 其他流动资产 otherCurrentAssets = lines[18].split()[1:-1] # 流动资产合计 totalCurrentAssets = lines[19].split()[1:-1] # 非流动资产 # 发放贷款及垫款 loansAndPaymentsOnBehalf = lines[21].split()[1:-1] # 可供出售金融资产 availableForSaleFinancialAssets = lines[22].split()[1:-1] # 持有至到期投资 heldToMaturityInvestment = lines[23].split()[1:-1] # 长期应收款 longTermReceivables = lines[24].split()[1:-1] # 长期股权投资 longTermEquityInvestment = lines[25].split()[1:-1] # 投资性房地产 investmentRealEstate = lines[26].split()[1:-1] # 固定资产净额 NetfixedAssets = lines[27].split()[1:-1] # 在建工程 constructionInProcess = lines[28].split()[1:-1] # 工程物资 engineerMaterial = lines[29].split()[1:-1] # 固定资产清理 fixedAssetsInLiquidation = lines[30].split()[1:-1] # 生产性生物资产 productiveBiologicalAssets = lines[31].split()[1:-1] # 公益性生物资产 nonProfitLivingAssets = lines[32].split()[1:-1] # 油气资产 oilAndGasAssets = lines[33].split()[1:-1] # 无形资产 intangibleAssets = lines[34].split()[1:-1] # 开发支出 developmentExpenditure = lines[35].split()[1:-1] # 商誉 goodwill = lines[36].split()[1:-1] # 长期待摊费用 longTermDeferredExpenses = lines[37].split()[1:-1] # 递延所得税资产 deferredIncomeTaxAssets = lines[38].split()[1:-1] # 其他非流动资产 OtherNonCurrentAssets = lines[39].split()[1:-1] # 非流动资产合计 nonCurrentAssets = lines[40].split()[1:-1] # 资产总计 totalAssets = lines[41].split()[1:-1] # / *流动负债 * / # 短期借款 shortTermBorrowing = lines[43].split()[1:-1] # 交易性金融负债 transactionFinancialLiabilities = lines[44].split()[1:-1] # 应付票据 billsPayable = lines[45].split()[1:-1] # 应付账款 accountsPayable = lines[46].split()[1:-1] # 预收款项 accountsReceivedInAdvance = lines[47].split()[1:-1] # 应付手续费及佣金 handlingChargesAndCommissionsPayable = lines[48].split()[1:-1] # 应付职工薪酬 employeeBenefitsPayable = lines[49].split()[1:-1] # 应交税费 taxesAndSurchargesPayable = lines[50].split()[1:-1] # 应付利息 interestPayable = lines[51].split()[1:-1] # 应付股利 dividendpayable = lines[52].split()[1:-1] # 其他应付款 otherPayables = lines[53].split()[1:-1] # 预提费用 withholdingExpenses = lines[54].split()[1:-1] # 一年内的递延收益 deferredIncomeWithinOneYear = lines[55].split()[1:-1] # 应付短期债券 shortTermDebenturesPayable = lines[56].split()[1:-1] # 一年内到期的非流动负债 nonCurrentLiabilitiesMaturingWithinOneYear = lines[57].split()[1:-1] # 其他流动负债 otherCurrentLiability = lines[58].split()[1:-1] # 流动负债合计 totalCurrentLiabilities = lines[59].split()[1:-1] # / *非流动负债 * / # 长期借款 LongTermBorrowing = lines[61].split()[1:-1] # 应付债券 bondPayable = lines[62].split()[1:-1] # 长期应付款 longTermPayables = lines[63].split()[1:-1] # 长期应付职工薪酬 longTermEmployeeBenefitsPayable = lines[64].split()[1:-1] # 专项应付款 specialPayable = lines[65].split()[1:-1] # 预计非流动负债 expectedNonCurrentLiabilities = lines[66].split()[1:-1] # 递延所得税负债 deferredIncomeTaxLiabilities = lines[67].split()[1:-1] # 长期递延收益 longTermDeferredRevenue = lines[68].split()[1:-1] # 其他非流动负债 otherNonCurrentLiabilities = lines[69].split()[1:-1] # 非流动负债合计 totalNonCurrentLiabilities = lines[70].split()[1:-1] # 负债合计 totalLiabilities = lines[71].split()[1:-1] # / *所有者权益 * / # 实收资本(或股本) totalShareCapital = lines[73].split()[1:-1] # 资本公积 capitalSurplus = lines[74].split()[1:-1] # 减:库存股 treasuryStock = lines[75].split()[1:-1] # 其他综合收益 otherComprehensiveIncome = lines[76].split()[1:-1] # 专项储备 theSpecialReserve = lines[77].split()[1:-1] # 盈余公积 surplusReserves = lines[78].split()[1:-1] # 一般风险准备 generalRiskPreparation = lines[79].split()[1:-1] # 未分配利润 undistributedProfits = lines[80].split()[1:-1] # 归属于母公司股东权益合计(净资产) bookValue = lines[81].split()[1:-1] # 少数股东权益 minorityBookValue = lines[82].split()[1:-1] # 所有者权益(或股东权益)合计 totalBookValue = lines[83].split()[1:-1] # 负债和所有者权益(或股东权益)总计 totalLiabilitiesAndOwnersEquity = lines[84].split()[1:-1] result_json = [] for idx, _ in enumerate(reportDate): if start_date: if pd.Timestamp(reportDate[idx]) < pd.Timestamp(start_date): continue reportEventDate = get_report_event_date( security_item, report_date=reportDate[idx]) if report_period and not is_same_date(report_period, reportDate[idx]): continue # use report_event_date to filter the reportEventDate before it for not getting future data if report_event_date and pd.Timestamp( report_event_date) < pd.Timestamp(reportEventDate): continue the_json = { "id": '{}_{}'.format(security_item["id"], reportDate[idx]), "reportDate": to_time_str(reportDate[idx]), "reportEventDate": reportEventDate, "securityId": security_item["id"], "code": security_item["code"], # 货币资金 "moneyFunds": to_float(moneyFunds[idx]), # 交易性金融资产 "heldForTradingFinancialAssets": to_float(heldForTradingFinancialAssets[idx]), # 衍生金融资产 "derivative": to_float(derivative[idx]), # 应收票据 "billsReceivable": to_float(billsReceivable[idx]), # 应收账款 "accountsReceivable": to_float(accountsReceivable[idx]), # 预付款项 "prepaidAccounts": to_float(prepaidAccounts[idx]), # 应收利息 "interestReceivable": to_float(interestReceivable[idx]), # 应收股利 "dividendReceivable": to_float(dividendReceivable[idx]), # 其他应收款 "otherReceivables": to_float(otherReceivables[idx]), # 买入返售金融资产 "buyingBackTheSaleOfFinancialAssets": to_float(buyingBackTheSaleOfFinancialAssets[idx]), # 存货 "inventory": to_float(inventory[idx]), # 划分为持有待售的资产 "assetsForSale": to_float(assetsForSale[idx]), # 一年内到期的非流动资产 "nonCurrentAssetsDueWithinOneYear": to_float(nonCurrentAssetsDueWithinOneYear[idx]), # 待摊费用 "unamortizedExpenditures": to_float(unamortizedExpenditures[idx]), # 待处理流动资产损益 "waitDealIntangibleAssetsLossOrIncome": to_float(waitDealIntangibleAssetsLossOrIncome[idx]), # 其他流动资产 "otherCurrentAssets": to_float(otherCurrentAssets[idx]), # 流动资产合计 "totalCurrentAssets": to_float(totalCurrentAssets[idx]), # 非流动资产 # 发放贷款及垫款 "loansAndPaymentsOnBehalf": to_float(loansAndPaymentsOnBehalf[idx]), # 可供出售金融资产 "availableForSaleFinancialAssets": to_float(availableForSaleFinancialAssets[idx]), # 持有至到期投资 "heldToMaturityInvestment": to_float(heldToMaturityInvestment[idx]), # 长期应收款 "longTermReceivables": to_float(longTermReceivables[idx]), # 长期股权投资 "longTermEquityInvestment": to_float(longTermEquityInvestment[idx]), # 投资性房地产 "investmentRealEstate": to_float(investmentRealEstate[idx]), # 固定资产净额 "NetfixedAssets": to_float(NetfixedAssets[idx]), # 在建工程 "constructionInProcess": to_float(constructionInProcess[idx]), # 工程物资 "engineerMaterial": to_float(engineerMaterial[idx]), # 固定资产清理 "fixedAssetsInLiquidation": to_float(fixedAssetsInLiquidation[idx]), # 生产性生物资产 "productiveBiologicalAssets": to_float(productiveBiologicalAssets[idx]), # 公益性生物资产 "nonProfitLivingAssets": to_float(nonProfitLivingAssets[idx]), # 油气资产 "oilAndGasAssets": to_float(oilAndGasAssets[idx]), # 无形资产 "intangibleAssets": to_float(intangibleAssets[idx]), # 开发支出 "developmentExpenditure": to_float(developmentExpenditure[idx]), # 商誉 "goodwill": to_float(goodwill[idx]), # 长期待摊费用 "longTermDeferredExpenses": to_float(longTermDeferredExpenses[idx]), # 递延所得税资产 "deferredIncomeTaxAssets": to_float(deferredIncomeTaxAssets[idx]), # 其他非流动资产 "OtherNonCurrentAssets": to_float(OtherNonCurrentAssets[idx]), # 非流动资产合计 "nonCurrentAssets": to_float(nonCurrentAssets[idx]), # 资产总计 "totalAssets": to_float(totalAssets[idx]), # / *流动负债 * / # 短期借款 "shortTermBorrowing": to_float(shortTermBorrowing[idx]), # 交易性金融负债 "transactionFinancialLiabilities": to_float(transactionFinancialLiabilities[idx]), # 应付票据 "billsPayable": to_float(billsPayable[idx]), # 应付账款 "accountsPayable": to_float(accountsPayable[idx]), # 预收款项 "accountsReceivedInAdvance": to_float(accountsReceivedInAdvance[idx]), # 应付手续费及佣金 "handlingChargesAndCommissionsPayable": to_float(handlingChargesAndCommissionsPayable[idx]), # 应付职工薪酬 "employeeBenefitsPayable": to_float(employeeBenefitsPayable[idx]), # 应交税费 "taxesAndSurchargesPayable": to_float(taxesAndSurchargesPayable[idx]), # 应付利息 "interestPayable": to_float(interestPayable[idx]), # 应付股利 "dividendpayable": to_float(dividendpayable[idx]), # 其他应付款 "otherPayables": to_float(otherPayables[idx]), # 预提费用 "withholdingExpenses": to_float(withholdingExpenses[idx]), # 一年内的递延收益 "deferredIncomeWithinOneYear": to_float(deferredIncomeWithinOneYear[idx]), # 应付短期债券 "shortTermDebenturesPayable": to_float(shortTermDebenturesPayable[idx]), # 一年内到期的非流动负债 "nonCurrentLiabilitiesMaturingWithinOneYear": to_float(nonCurrentLiabilitiesMaturingWithinOneYear[idx]), # 其他流动负债 "otherCurrentLiability": to_float(otherCurrentLiability[idx]), # 流动负债合计 "totalCurrentLiabilities": to_float(totalCurrentLiabilities[idx]), # / *非流动负债 * / # 长期借款 "LongTermBorrowing": to_float(LongTermBorrowing[idx]), # 应付债券 "bondPayable": to_float(bondPayable[idx]), # 长期应付款 "longTermPayables": to_float(longTermPayables[idx]), # 长期应付职工薪酬 "longTermEmployeeBenefitsPayable": to_float(longTermEmployeeBenefitsPayable[idx]), # 专项应付款 "specialPayable": to_float(specialPayable[idx]), # 预计非流动负债 "expectedNonCurrentLiabilities": to_float(expectedNonCurrentLiabilities[idx]), # 递延所得税负债 "deferredIncomeTaxLiabilities": to_float(deferredIncomeTaxLiabilities[idx]), # 长期递延收益 "longTermDeferredRevenue": to_float(longTermDeferredRevenue[idx]), # 其他非流动负债 "otherNonCurrentLiabilities": to_float(otherNonCurrentLiabilities[idx]), # 非流动负债合计 "totalNonCurrentLiabilities": to_float(totalNonCurrentLiabilities[idx]), # 负债合计 "totalLiabilities": to_float(totalLiabilities[idx]), # / *所有者权益 * / # 实收资本(或股本) "totalShareCapital": to_float(totalShareCapital[idx]), # 资本公积 "capitalSurplus": to_float(capitalSurplus[idx]), # 减:库存股 "treasuryStock": to_float(treasuryStock[idx]), # 其他综合收益 "otherComprehensiveIncome": to_float(otherComprehensiveIncome[idx]), # 专项储备 "theSpecialReserve": to_float(theSpecialReserve[idx]), # 盈余公积 "surplusReserves": to_float(surplusReserves[idx]), # 一般风险准备 "generalRiskPreparation": to_float(generalRiskPreparation[idx]), # 未分配利润 "undistributedProfits": to_float(undistributedProfits[idx]), # 归属于母公司股东权益合计(净资产) "bookValue": to_float(bookValue[idx]), # 少数股东权益 "minorityBookValue": to_float(minorityBookValue[idx]), # 所有者权益(或股东权益)合计 "totalBookValue": to_float(totalBookValue[idx]), # 负债和所有者权益(或股东权益)总计 "totalLiabilitiesAndOwnersEquity": to_float(totalLiabilitiesAndOwnersEquity[idx]) } if report_period and is_same_date(report_period, reportDate[idx]): return the_json result_json.append(the_json) if (result_json): result_json = sorted(result_json, key=lambda x: pd.Timestamp(x['reportDate'])) return result_json
def get_balance_sheet_items(security_item, start_date=None, report_period=None, report_event_date=None): path = get_balance_sheet_path(security_item) if not os.path.exists(path): return [] encoding = settings.DOWNLOAD_TXT_ENCODING if settings.DOWNLOAD_TXT_ENCODING else detect_encoding( url='file://' + os.path.abspath(path)).get('encoding') with open(path, encoding=encoding) as fr: lines = fr.readlines() # for idx, line in enumerate(lines): # yield idx, line.split() reportDate = lines[0].split()[1:-1] # 货币资金 moneyFunds = lines[3].split()[1:-1] # 交易性金融资产 heldForTradingFinancialAssets = lines[4].split()[1:-1] # 衍生金融资产 derivative = lines[5].split()[1:-1] # 应收票据 billsReceivable = lines[6].split()[1:-1] # 应收账款 accountsReceivable = lines[7].split()[1:-1] # 预付款项 prepaidAccounts = lines[8].split()[1:-1] # 应收利息 interestReceivable = lines[9].split()[1:-1] # 应收股利 dividendReceivable = lines[10].split()[1:-1] # 其他应收款 otherReceivables = lines[11].split()[1:-1] # 买入返售金融资产 buyingBackTheSaleOfFinancialAssets = lines[12].split()[1:-1] # 存货 inventory = lines[13].split()[1:-1] # 划分为持有待售的资产 assetsForSale = lines[14].split()[1:-1] # 一年内到期的非流动资产 nonCurrentAssetsDueWithinOneYear = lines[15].split()[1:-1] # 待摊费用 unamortizedExpenditures = lines[16].split()[1:-1] # 待处理流动资产损益 waitDealIntangibleAssetsLossOrIncome = lines[17].split()[1:-1] # 其他流动资产 otherCurrentAssets = lines[18].split()[1:-1] # 流动资产合计 totalCurrentAssets = lines[19].split()[1:-1] # 非流动资产 # 发放贷款及垫款 loansAndPaymentsOnBehalf = lines[21].split()[1:-1] # 可供出售金融资产 availableForSaleFinancialAssets = lines[22].split()[1:-1] # 持有至到期投资 heldToMaturityInvestment = lines[23].split()[1:-1] # 长期应收款 longTermReceivables = lines[24].split()[1:-1] # 长期股权投资 longTermEquityInvestment = lines[25].split()[1:-1] # 投资性房地产 investmentRealEstate = lines[26].split()[1:-1] # 固定资产净额 NetfixedAssets = lines[27].split()[1:-1] # 在建工程 constructionInProcess = lines[28].split()[1:-1] # 工程物资 engineerMaterial = lines[29].split()[1:-1] # 固定资产清理 fixedAssetsInLiquidation = lines[30].split()[1:-1] # 生产性生物资产 productiveBiologicalAssets = lines[31].split()[1:-1] # 公益性生物资产 nonProfitLivingAssets = lines[32].split()[1:-1] # 油气资产 oilAndGasAssets = lines[33].split()[1:-1] # 无形资产 intangibleAssets = lines[34].split()[1:-1] # 开发支出 developmentExpenditure = lines[35].split()[1:-1] # 商誉 goodwill = lines[36].split()[1:-1] # 长期待摊费用 longTermDeferredExpenses = lines[37].split()[1:-1] # 递延所得税资产 deferredIncomeTaxAssets = lines[38].split()[1:-1] # 其他非流动资产 OtherNonCurrentAssets = lines[39].split()[1:-1] # 非流动资产合计 nonCurrentAssets = lines[40].split()[1:-1] # 资产总计 totalAssets = lines[41].split()[1:-1] # / *流动负债 * / # 短期借款 shortTermBorrowing = lines[43].split()[1:-1] # 交易性金融负债 transactionFinancialLiabilities = lines[44].split()[1:-1] # 应付票据 billsPayable = lines[45].split()[1:-1] # 应付账款 accountsPayable = lines[46].split()[1:-1] # 预收款项 accountsReceivedInAdvance = lines[47].split()[1:-1] # 应付手续费及佣金 handlingChargesAndCommissionsPayable = lines[48].split()[1:-1] # 应付职工薪酬 employeeBenefitsPayable = lines[49].split()[1:-1] # 应交税费 taxesAndSurchargesPayable = lines[50].split()[1:-1] # 应付利息 interestPayable = lines[51].split()[1:-1] # 应付股利 dividendpayable = lines[52].split()[1:-1] # 其他应付款 otherPayables = lines[53].split()[1:-1] # 预提费用 withholdingExpenses = lines[54].split()[1:-1] # 一年内的递延收益 deferredIncomeWithinOneYear = lines[55].split()[1:-1] # 应付短期债券 shortTermDebenturesPayable = lines[56].split()[1:-1] # 一年内到期的非流动负债 nonCurrentLiabilitiesMaturingWithinOneYear = lines[57].split()[1:-1] # 其他流动负债 otherCurrentLiability = lines[58].split()[1:-1] # 流动负债合计 totalCurrentLiabilities = lines[59].split()[1:-1] # / *非流动负债 * / # 长期借款 LongTermBorrowing = lines[61].split()[1:-1] # 应付债券 bondPayable = lines[62].split()[1:-1] # 长期应付款 longTermPayables = lines[63].split()[1:-1] # 长期应付职工薪酬 longTermEmployeeBenefitsPayable = lines[64].split()[1:-1] # 专项应付款 specialPayable = lines[65].split()[1:-1] # 预计非流动负债 expectedNonCurrentLiabilities = lines[66].split()[1:-1] # 递延所得税负债 deferredIncomeTaxLiabilities = lines[67].split()[1:-1] # 长期递延收益 longTermDeferredRevenue = lines[68].split()[1:-1] # 其他非流动负债 otherNonCurrentLiabilities = lines[69].split()[1:-1] # 非流动负债合计 totalNonCurrentLiabilities = lines[70].split()[1:-1] # 负债合计 totalLiabilities = lines[71].split()[1:-1] # / *所有者权益 * / # 实收资本(或股本) totalShareCapital = lines[73].split()[1:-1] # 资本公积 capitalSurplus = lines[74].split()[1:-1] # 减:库存股 treasuryStock = lines[75].split()[1:-1] # 其他综合收益 otherComprehensiveIncome = lines[76].split()[1:-1] # 专项储备 theSpecialReserve = lines[77].split()[1:-1] # 盈余公积 surplusReserves = lines[78].split()[1:-1] # 一般风险准备 generalRiskPreparation = lines[79].split()[1:-1] # 未分配利润 undistributedProfits = lines[80].split()[1:-1] # 归属于母公司股东权益合计(净资产) bookValue = lines[81].split()[1:-1] # 少数股东权益 minorityBookValue = lines[82].split()[1:-1] # 所有者权益(或股东权益)合计 totalBookValue = lines[83].split()[1:-1] # 负债和所有者权益(或股东权益)总计 totalLiabilitiesAndOwnersEquity = lines[84].split()[1:-1] result_json = [] for idx, _ in enumerate(reportDate): if start_date: if pd.Timestamp(reportDate[idx]) < pd.Timestamp(start_date): continue reportEventDate = get_report_event_date(security_item, report_date=reportDate[idx]) if report_period and not is_same_date(report_period, reportDate[idx]): continue # use report_event_date to filter the reportEventDate before it for not getting future data if report_event_date and pd.Timestamp(report_event_date) < pd.Timestamp(reportEventDate): continue the_json = { "id": '{}_{}'.format(security_item["id"], reportDate[idx]), "reportDate": to_time_str(reportDate[idx]), "reportEventDate": reportEventDate, "securityId": security_item["id"], "code": security_item["code"], # 货币资金 "moneyFunds": to_float(moneyFunds[idx]), # 交易性金融资产 "heldForTradingFinancialAssets": to_float(heldForTradingFinancialAssets[idx]), # 衍生金融资产 "derivative": to_float(derivative[idx]), # 应收票据 "billsReceivable": to_float(billsReceivable[idx]), # 应收账款 "accountsReceivable": to_float(accountsReceivable[idx]), # 预付款项 "prepaidAccounts": to_float(prepaidAccounts[idx]), # 应收利息 "interestReceivable": to_float(interestReceivable[idx]), # 应收股利 "dividendReceivable": to_float(dividendReceivable[idx]), # 其他应收款 "otherReceivables": to_float(otherReceivables[idx]), # 买入返售金融资产 "buyingBackTheSaleOfFinancialAssets": to_float(buyingBackTheSaleOfFinancialAssets[idx]), # 存货 "inventory": to_float(inventory[idx]), # 划分为持有待售的资产 "assetsForSale": to_float(assetsForSale[idx]), # 一年内到期的非流动资产 "nonCurrentAssetsDueWithinOneYear": to_float(nonCurrentAssetsDueWithinOneYear[idx]), # 待摊费用 "unamortizedExpenditures": to_float(unamortizedExpenditures[idx]), # 待处理流动资产损益 "waitDealIntangibleAssetsLossOrIncome": to_float(waitDealIntangibleAssetsLossOrIncome[idx]), # 其他流动资产 "otherCurrentAssets": to_float(otherCurrentAssets[idx]), # 流动资产合计 "totalCurrentAssets": to_float(totalCurrentAssets[idx]), # 非流动资产 # 发放贷款及垫款 "loansAndPaymentsOnBehalf": to_float(loansAndPaymentsOnBehalf[idx]), # 可供出售金融资产 "availableForSaleFinancialAssets": to_float(availableForSaleFinancialAssets[idx]), # 持有至到期投资 "heldToMaturityInvestment": to_float(heldToMaturityInvestment[idx]), # 长期应收款 "longTermReceivables": to_float(longTermReceivables[idx]), # 长期股权投资 "longTermEquityInvestment": to_float(longTermEquityInvestment[idx]), # 投资性房地产 "investmentRealEstate": to_float(investmentRealEstate[idx]), # 固定资产净额 "NetfixedAssets": to_float(NetfixedAssets[idx]), # 在建工程 "constructionInProcess": to_float(constructionInProcess[idx]), # 工程物资 "engineerMaterial": to_float(engineerMaterial[idx]), # 固定资产清理 "fixedAssetsInLiquidation": to_float(fixedAssetsInLiquidation[idx]), # 生产性生物资产 "productiveBiologicalAssets": to_float(productiveBiologicalAssets[idx]), # 公益性生物资产 "nonProfitLivingAssets": to_float(nonProfitLivingAssets[idx]), # 油气资产 "oilAndGasAssets": to_float(oilAndGasAssets[idx]), # 无形资产 "intangibleAssets": to_float(intangibleAssets[idx]), # 开发支出 "developmentExpenditure": to_float(developmentExpenditure[idx]), # 商誉 "goodwill": to_float(goodwill[idx]), # 长期待摊费用 "longTermDeferredExpenses": to_float(longTermDeferredExpenses[idx]), # 递延所得税资产 "deferredIncomeTaxAssets": to_float(deferredIncomeTaxAssets[idx]), # 其他非流动资产 "OtherNonCurrentAssets": to_float(OtherNonCurrentAssets[idx]), # 非流动资产合计 "nonCurrentAssets": to_float(nonCurrentAssets[idx]), # 资产总计 "totalAssets": to_float(totalAssets[idx]), # / *流动负债 * / # 短期借款 "shortTermBorrowing": to_float(shortTermBorrowing[idx]), # 交易性金融负债 "transactionFinancialLiabilities": to_float(transactionFinancialLiabilities[idx]), # 应付票据 "billsPayable": to_float(billsPayable[idx]), # 应付账款 "accountsPayable": to_float(accountsPayable[idx]), # 预收款项 "accountsReceivedInAdvance": to_float(accountsReceivedInAdvance[idx]), # 应付手续费及佣金 "handlingChargesAndCommissionsPayable": to_float(handlingChargesAndCommissionsPayable[idx]), # 应付职工薪酬 "employeeBenefitsPayable": to_float(employeeBenefitsPayable[idx]), # 应交税费 "taxesAndSurchargesPayable": to_float(taxesAndSurchargesPayable[idx]), # 应付利息 "interestPayable": to_float(interestPayable[idx]), # 应付股利 "dividendpayable": to_float(dividendpayable[idx]), # 其他应付款 "otherPayables": to_float(otherPayables[idx]), # 预提费用 "withholdingExpenses": to_float(withholdingExpenses[idx]), # 一年内的递延收益 "deferredIncomeWithinOneYear": to_float(deferredIncomeWithinOneYear[idx]), # 应付短期债券 "shortTermDebenturesPayable": to_float(shortTermDebenturesPayable[idx]), # 一年内到期的非流动负债 "nonCurrentLiabilitiesMaturingWithinOneYear": to_float(nonCurrentLiabilitiesMaturingWithinOneYear[idx]), # 其他流动负债 "otherCurrentLiability": to_float(otherCurrentLiability[idx]), # 流动负债合计 "totalCurrentLiabilities": to_float(totalCurrentLiabilities[idx]), # / *非流动负债 * / # 长期借款 "LongTermBorrowing": to_float(LongTermBorrowing[idx]), # 应付债券 "bondPayable": to_float(bondPayable[idx]), # 长期应付款 "longTermPayables": to_float(longTermPayables[idx]), # 长期应付职工薪酬 "longTermEmployeeBenefitsPayable": to_float(longTermEmployeeBenefitsPayable[idx]), # 专项应付款 "specialPayable": to_float(specialPayable[idx]), # 预计非流动负债 "expectedNonCurrentLiabilities": to_float(expectedNonCurrentLiabilities[idx]), # 递延所得税负债 "deferredIncomeTaxLiabilities": to_float(deferredIncomeTaxLiabilities[idx]), # 长期递延收益 "longTermDeferredRevenue": to_float(longTermDeferredRevenue[idx]), # 其他非流动负债 "otherNonCurrentLiabilities": to_float(otherNonCurrentLiabilities[idx]), # 非流动负债合计 "totalNonCurrentLiabilities": to_float(totalNonCurrentLiabilities[idx]), # 负债合计 "totalLiabilities": to_float(totalLiabilities[idx]), # / *所有者权益 * / # 实收资本(或股本) "totalShareCapital": to_float(totalShareCapital[idx]), # 资本公积 "capitalSurplus": to_float(capitalSurplus[idx]), # 减:库存股 "treasuryStock": to_float(treasuryStock[idx]), # 其他综合收益 "otherComprehensiveIncome": to_float(otherComprehensiveIncome[idx]), # 专项储备 "theSpecialReserve": to_float(theSpecialReserve[idx]), # 盈余公积 "surplusReserves": to_float(surplusReserves[idx]), # 一般风险准备 "generalRiskPreparation": to_float(generalRiskPreparation[idx]), # 未分配利润 "undistributedProfits": to_float(undistributedProfits[idx]), # 归属于母公司股东权益合计(净资产) "bookValue": to_float(bookValue[idx]), # 少数股东权益 "minorityBookValue": to_float(minorityBookValue[idx]), # 所有者权益(或股东权益)合计 "totalBookValue": to_float(totalBookValue[idx]), # 负债和所有者权益(或股东权益)总计 "totalLiabilitiesAndOwnersEquity": to_float(totalLiabilitiesAndOwnersEquity[idx]) } if report_period and is_same_date(report_period, reportDate[idx]): return the_json result_json.append(the_json) if (result_json): result_json = sorted(result_json, key=lambda x: pd.Timestamp(x['reportDate'])) return result_json