def init_new_computing_interval(self, event_timestamp): self.last_timestamp = to_timestamp(event_timestamp) self.kdata_timestamp = self.last_timestamp + timedelta(seconds=-self.last_timestamp.second, microseconds=-self.last_timestamp.microsecond) self.last_day_time_str = to_time_str(self.kdata_timestamp) self.last_mirco_time_str = to_time_str(self.kdata_timestamp, time_fmt=TIME_FORMAT_MICRO)
def on_event(self, event_item): self.logger.debug(event_item) if not self.last_date or not is_same_date(self.last_date, self.current_time): self.last_date = to_timestamp( event_item['timestamp']) - timedelta(days=1) self.last_kdata = get_kdata(self.security_item, the_date=to_time_str(self.last_date)) if self.last_kdata is None: fetch_kdata(exchange_str=self.security_item['exchange']) self.last_kdata = get_kdata(self.security_item, the_date=to_time_str( self.last_date)) if self.last_kdata is not None: self.last_close = self.last_kdata.loc[ to_time_str(self.last_date), 'close'] else: self.logger.exception("could not get last close for:{}".format( self.last_date)) self.update_today_triggered() change_pct = (event_item['price'] - self.last_close) / self.last_close self.logger.info( "{} last day close is:{},now price is:{},the change_pct is:{}". format(self.security_item['id'], self.last_close, event_item['price'], change_pct)) self.check_subscription(current_price=event_item['price'], change_pct=change_pct)
def eos_account_to_es(): account = db.accounts count = account.count() logger.info("current account size:{}".format(count)) actions = [] # { # "_id": ObjectId("5b6651aa30cafb28be710275"), # "name": "eosio.ram", # "create_time": ISODate("2018-06-09T11:57:39.000Z"), # "liquid_eos": NumberLong(26757051448), # "stacked_eos": NumberLong(0), # "total_eos": NumberLong(26757051448), # "unstacking_eos": NumberLong(0) # } start = 0 size = 1000 while True: for item in account.find().skip(start).limit(size): liquidEos = item.get('liquid_eos', 0) stackedEos = item.get('stacked_eos', 0) unstackingEos = item.get('unstacking_eos', 0) totalEos = item.get('total_eos', 0) createTime = item.get('create_time', datetime.now()) json_item = { "id": str(item["_id"]), "userId": item["name"], "liquidEos": liquidEos, "stackedEos": stackedEos, "totalEos": totalEos, "unstackingEos": unstackingEos, "timestamp": to_time_str(createTime), "updateTimestamp": to_time_str(datetime.now()) } eos_account = EosAccount(meta={ 'id': json_item['id'], 'index': "eos_account" }) fill_doc_type(eos_account, json_item) actions.append(eos_account.to_dict(include_meta=True)) if actions: resp = elasticsearch.helpers.bulk(es_client, actions) logger.info("index to {} success:{} failed:{}".format( "eos_account", resp[0], len(resp[1]))) if resp[1]: logger.error("index to {} error:{}".format( "eos_account", resp[1])) if len(actions) < size: break actions = [] start += (size - 1)
def es_get_statistic(security_item, the_date=None, start_date=None, end_date=None, level='day', from_idx=0, size=500): security_item = to_security_item(security_item) index = get_es_statistic_index(security_type=security_item['type'], exchange=security_item['exchange'], level=level) # 单日的日k线直接按id获取 if level == 'day' and the_date: doc_id = '{}_{}'.format(security_item['id'], to_time_str(the_date)) return es_client.get_source(index=index, doc_type='doc', id=doc_id) elif start_date and end_date: s = Search(using=es_client, index=index, doc_type='doc') \ .filter('term', code=security_item['code']) \ .filter('range', timestamp={'gte': start_date, 'lte': end_date}) \ .sort({"timestamp": {"order": "asc"}}) resp = s[from_idx:from_idx + size].execute() return es_resp_to_payload(resp)
def download_stock_list(self, response): exchange = response.meta['exchange'] path = files_contract.get_security_list_path('stock', exchange) df = pd.read_csv(io.BytesIO(response.body), dtype=str) if df is not None: if os.path.exists(path): df_current = pd.read_csv(path, dtype=str) df_current = df_current.set_index('code', drop=False) else: df_current = pd.DataFrame() df = df.loc[:, ['Symbol', 'Name', 'IPOyear', 'Sector', 'industry']] df = df.dropna(subset=['Symbol', 'Name']) df.columns = ['code', 'name', 'listDate', 'sector', 'industry'] df.listDate = df.listDate.apply(lambda x: to_time_str(x)) df['exchange'] = exchange df['type'] = 'stock' df['id'] = df[['type', 'exchange', 'code']].apply(lambda x: '_'.join(x.astype(str)), axis=1) df['sinaIndustry'] = '' df['sinaConcept'] = '' df['sinaArea'] = '' df = df.set_index('code', drop=False) diff = set(df.index.tolist()) - set(df_current.index.tolist()) diff = [item for item in diff if item != 'nan'] if diff: df_current = df_current.append(df.loc[diff, :], ignore_index=False) df_current = df_current.loc[:, STOCK_META_COL] df_current.columns = STOCK_META_COL df_current.to_csv(path, index=False)
def es_get_kdata(security_item, the_date=None, start_date=None, end_date=None, level='day', fields=None, from_idx=0, size=10): """ get kdata. Parameters ---------- security_item : SecurityItem or str the security item,id or code the_date : TimeStamp str or TimeStamp get the kdata for the exact date start_date : TimeStamp str or TimeStamp start date end_date : TimeStamp str or TimeStamp end date level : str or int the kdata level,{1,5,15,30,60,'day','week','month'},default : 'day' fields : filed list for es _source if not set,would use the default fields for the security type from_idx : int pagination start offset size : int pagination return size Returns ------- JSON """ security_item = to_security_item(security_item) index = get_es_kdata_index(security_type=security_item['type'], exchange=security_item['exchange'], level=level) if not fields: if security_item['type'] == 'stock': fields = KDATA_STOCK_COL elif security_item['type'] == 'future': fields = KDATA_FUTURE_COL elif security_item['type'] == 'index': fields = KDATA_INDEX_COL else: fields = KDATA_COMMON_COL # 单日的日k线直接按id获取 if level == 'day' and the_date: doc_id = '{}_{}'.format(security_item['id'], to_time_str(the_date)) return es_client.get_source(index=index, doc_type='doc', id=doc_id, _source_include=fields) elif start_date and end_date: s = Search(using=es_client, index=index, doc_type='doc') \ .source(include=fields) \ .filter('term', code=security_item['code']) \ .filter('range', timestamp={'gte': start_date, 'lte': end_date}) \ .sort({"timestamp": {"order": "asc"}}) resp = s[from_idx:from_idx + size].execute() return resp['hits'].to_dict()
def update_today_triggered(self): sub_triggered_search = SubscriptionTriggered.search() sub_triggered_search = sub_triggered_search.filter('term', subType='price') \ .filter('range', timestamp={'gte': to_time_str(datetime.now())}) results = sub_triggered_search.execute() for hit in results['hits']['hits']: json_data = hit['_source'].to_dict() self.has_triggered["{}_{}".format(json_data['subId'], json_data['conditionType'])] = json_data
def fetch_kdata(exchange_str='bitstamp'): ccxt_exchange = eval("ccxt.{}()".format(exchange_str)) if ccxt_exchange.has['fetchOHLCV']: for _, security_item in get_security_list(security_type='cryptocurrency', exchanges=[exchange_str]).iterrows(): try: if security_item['name'] not in CRYPTOCURRENCY_PAIR: continue start_date, df = get_latest_download_trading_date(security_item) # 日K线只抓到昨天 end_date = pd.Timestamp.today() - pd.DateOffset(1) if start_date and (start_date > end_date): logger.info("{} kdata is ok".format(security_item['code'])) continue try: kdatas = ccxt_exchange.fetch_ohlcv(security_item['name'], timeframe='1d') # for rateLimit time.sleep(5) except Exception as e: logger.exception("fetch_kdata for {} {} failed".format(exchange_str, security_item['name']), e) continue for kdata in kdatas: timestamp = pd.Timestamp.fromtimestamp(int(kdata[0] / 1000)) if is_same_date(timestamp, pd.Timestamp.today()): continue kdata_json = { 'timestamp': to_time_str(timestamp), 'code': security_item['code'], 'name': security_item['name'], 'open': kdata[1], 'high': kdata[2], 'low': kdata[3], 'close': kdata[4], 'volume': kdata[5], 'securityId': security_item['id'], 'preClose': None, 'change': None, 'changePct': None } df = df.append(kdata_json, ignore_index=True) if not df.empty: df = df.loc[:, KDATA_COMMON_COL] kdata_df_save(df, get_kdata_path(security_item), calculate_change=True) logger.info( "fetch_kdata for exchange:{} security:{} success".format(exchange_str, security_item['name'])) except Exception as e: logger.info( "fetch_kdata for exchange:{} security:{} failed".format(exchange_str, security_item['name'], e)) else: logger.warning("exchange:{} not support fetchOHLCV".format(exchange_str))
def restore_kdata(): for index, security_item in get_security_list(start_code='600000', end_code='600017').iterrows(): path_163 = get_kdata_path(security_item, source='163', fuquan='bfq') df = pd.read_csv(path_163, dtype=str) df = time_index_df(df) if 'id' in df.columns: df = df.drop(['id'], axis=1) df = df[~df.index.duplicated(keep='first')] df.timestamp.apply(lambda x: to_time_str(x)) df.to_csv(path_163, index=False) for fuquan in ('hfq', 'bfq'): path_sina = get_kdata_path(security_item, source='sina', fuquan=fuquan) df = pd.read_csv(path_sina, dtype=str) df = time_index_df(df) if 'id' in df.columns: df = df.drop(['id'], axis=1) df = df[~df.index.duplicated(keep='first')] df.timestamp = df.timestamp.apply(lambda x: to_time_str(x)) df.to_csv(path_sina, index=False)
def set_subscription(sub_type, id): the_json = request.get_json() if not the_json: return error(ERROR_NO_INPUT_JSON_PROVIDED) # Validate and deserialize input try: sub_dict, _ = price_subscription_shema.load(the_json) except ValidationError as err: return error(ERROR_INVALID_INPUT_JSON, err.messages) # the update operation if id: sub_model = PriceSubscription.get(id=id, ignore=404) sub_dict['id'] = id if not sub_model: logger.warning('could not find subscription:{}'.format(id)) return error(ERROR_SUBSCRIPTION_NOT_FOUND, id) else: # generate securityId sub_dict['securityId'] = get_security_id(sub_dict['securityType'], sub_dict['exchange'], sub_dict['code']) # generate subscription id sub_dict['id'] = "{}_{}".format(sub_dict['userId'], sub_dict['securityId']) sub_dict['timestamp'] = to_time_str(datetime.now(), time_fmt=TIME_FORMAT_MICRO) sub_model = PriceSubscription(meta={'id': sub_dict['id']}) fill_doc_type(sub_model, sub_dict) sub_model.save(force=True) result_json = sub_model.to_dict(include_meta=True) logger.info('subscription:{} saved'.format(result_json)) resp = kafka_producer.send('subscription', bytes(json.dumps(sub_dict), encoding='utf8'), key=bytes(sub_dict['id'], encoding='utf8'), timestamp_ms=int( pd.Timestamp.now().timestamp() * 1000)) kafka_producer.flush() logger.info(resp) return success(payload=result_json)
def save(self, using=None, index=None, validate=True, force=True, **kwargs): # assign now if no timestamp given if not self.timestamp: self.timestamp = to_time_str(datetime.now(), time_fmt=TIME_FORMAT_MICRO) if force or not self.exist(index=index): return super().save(using, index, validate, **kwargs) else: logger.debug("doc{} exists".format(self['id']))
def get_ticks(security_item, the_date=None, start_date=None, end_date=None): """ get the ticks. Parameters ---------- security_item : SecurityItem or str the security item,id or code the_date : TimeStamp str or TimeStamp get the tick for the exact date start_date : TimeStamp str or TimeStamp start date end_date: TimeStamp str or TimeStamp end date Yields ------- DataFrame """ security_item = to_security_item(security_item) if the_date: the_date = to_time_str(the_date) tick_path = files_contract.get_tick_path(security_item, the_date) yield _parse_tick(tick_path, security_item) else: tick_dir = files_contract.get_tick_dir(security_item) if start_date or end_date: if not start_date: start_date = security_item['listDate'] if not end_date: end_date = datetime.datetime.today() tick_paths = [ os.path.join(tick_dir, f) for f in os.listdir(tick_dir) if get_file_name(f) in pd.date_range(start=start_date, end=end_date) ] else: tick_paths = [ os.path.join(tick_dir, f) for f in os.listdir(tick_dir) ] for tick_path in sorted(tick_paths): yield _parse_tick(tick_path, security_item)
def download_sp500_pe(self, response): trs = response.xpath('//*[@id="datatable"]/tr').extract() price_jsons = [] try: for tr in trs[1:]: tds = Selector(text=tr).xpath('//td//text()').extract() tds = [x.strip() for x in tds if x.strip()] price_jsons.append({"timestamp": to_time_str(tds[0]), "pe": to_float(tds[1])}) if price_jsons: self.df_pe = self.df_pe.append(price_jsons, ignore_index=True) self.df_pe = index_df_with_time(self.df_pe) except Exception as e: self.logger.exception('error when getting sp500 pe url={} error={}'.format(response.url, e))
def download_sp500_pe(self, response): trs = response.xpath('//*[@id="datatable"]/tr').extract() price_jsons = [] try: for tr in trs[1:]: tds = Selector(text=tr).xpath('//td//text()').extract() tds = [x.strip() for x in tds if x.strip()] price_jsons.append({"timestamp": to_time_str(tds[0]), "pe": to_float(tds[1])}) if price_jsons: self.df_pe = self.df_pe.append(price_jsons, ignore_index=True) self.df_pe = index_df_with_time(self.df_pe) except Exception as e: self.logger.error('error when getting sp500 pe url={} error={}'.format(response.url, e))
def start_requests(self): # 往年的统计数据可以直接下载,只从今年开始 latest_trading_date = "{}0101".format(datetime.today().year) # 检查已经保存的 if self.saved_trading_dates: latest_trading_date = self.saved_trading_dates[-1] latest_trading_date = next_date(latest_trading_date) for the_date in pd.date_range(start=latest_trading_date, end=datetime.today()): # 双休 if the_date.weekday() == 5 or the_date.weekday() == 6: continue the_date_str = to_time_str(the_time=the_date, time_fmt='%Y%m%d') yield Request(url=self.get_trading_date_url(the_date=the_date_str), meta={'the_date': the_date_str}, callback=self.download_trading_calendar)
def get_kdata(security_item, the_date=None, start_date=None, end_date=None, fuquan='bfq', dtype=None, source='163', level='day'): if type(security_item) == str: if 'stock' in security_item: security_item = get_security_item(id=security_item) else: security_item = get_security_item(code=security_item) the_path = files_contract.get_kdata_path(security_item, source=source, fuquan=fuquan) if os.path.isfile(the_path): if not dtype: dtype = {"code": str, 'timestamp': str} df = pd.read_csv(the_path, dtype=dtype) df.timestamp = df.timestamp.apply(lambda x: to_time_str(x)) df = df.set_index(df['timestamp'], drop=False) df.index = pd.to_datetime(df.index) df = df.sort_index() if the_date: if the_date in df.index: return df.loc[the_date] else: return pd.DataFrame() if not start_date: if type(security_item['listDate']) != str and np.isnan(security_item['listDate']): start_date = '2002-01-01' else: start_date = security_item['listDate'] if not end_date: end_date = datetime.datetime.today() if start_date and end_date: df = df.loc[start_date:end_date] return df return pd.DataFrame()
def get_cash_flow_statement_items(security_item, start_date=None, report_period=None, report_event_date=None): path = get_cash_flow_statement_path(security_item) if not os.path.exists(path): return [] encoding = settings.DOWNLOAD_TXT_ENCODING if settings.DOWNLOAD_TXT_ENCODING else detect_encoding( url='file://' + os.path.abspath(path)).get('encoding') with open(path, encoding=encoding) as fr: lines = fr.readlines() # for idx, line in enumerate(lines): # yield idx, line.split() reportDate = lines[0].split()[1:-1] # /*一、经营活动产生的现金流量*/ # 销售商品、提供劳务收到的现金 cashFromSellingCommoditiesOrOfferingLabor = lines[3].split()[1:-1] # 收到的税费返还 refundOfTaxAndFeeReceived = lines[4].split()[1:-1] # 收到的其他与经营活动有关的现金 cashReceivedRelatingToOtherOperatingActivities = lines[5].split()[1:-1] # 经营活动现金流入小计 subTotalOfCashInflowsFromOperatingActivities = lines[6].split()[1:-1] # 购买商品、接受劳务支付的现金 cashPaidForGoodsAndServices = lines[7].split()[1:-1] # 支付给职工以及为职工支付的现金 cashPaidToAndOnBehalfOfemployees = lines[8].split()[1:-1] # 支付的各项税费 paymentsOfTaxesAndSurcharges = lines[9].split()[1:-1] # 支付的其他与经营活动有关的现金 cashPaidRelatingToOtherOperatingActivities = lines[10].split()[1:-1] # 经营活动现金流出小计 subTotalOfCashOutflowsFromOperatingActivities = lines[11].split()[1:-1] # 经营活动产生的现金流量净额 netCashFlowsFromOperatingActivities = lines[12].split()[1:-1] # /*二、投资活动产生的现金流量*/ # 收回投资所收到的现金 cashReceivedFromDisposalOfInvestments = lines[14].split()[1:-1] # 取得投资收益所收到的现金 cashReceivedFromReturnsOnIvestments = lines[15].split()[1:-1] # 处置固定资产、无形资产和其他长期资产所收回的现金净额 netCashReceivedFromDisposalAssets = lines[16].split()[1:-1] # 处置子公司及其他营业单位收到的现金净额 netCashReceivedFromDisposalSubsidiaries = lines[17].split()[1:-1] # 收到的其他与投资活动有关的现金 cashReceivedFromOtherInvesting = lines[18].split()[1:-1] # 投资活动现金流入小计 subTotalOfCashInflowsFromInvesting = lines[19].split()[1:-1] # 购建固定资产、无形资产和其他长期资产所支付的现金 cashPaidToAcquireFixedAssets = lines[20].split()[1:-1] # 投资所支付的现金 cashPaidToAcquireInvestments = lines[21].split()[1:-1] # 取得子公司及其他营业单位支付的现金净额 netCashPaidToAcquireSubsidiaries = lines[22].split()[1:-1] # 支付的其他与投资活动有关的现金 cashPaidRelatingToOtherInvesting = lines[23].split()[1:-1] # 投资活动现金流出小计 subTotalOfCashOutflowsFromInvesting = lines[24].split()[1:-1] # 投资活动产生的现金流量净额 netCashFlowsFromInvesting = lines[25].split()[1:-1] # /*三、筹资活动产生的现金流量*/ # 吸收投资收到的现金 cashReceivedFromCapitalContributions = lines[27].split()[1:-1] # 其中:子公司吸收少数股东投资收到的现金 cashReceivedFromMinorityShareholdersOfSubsidiaries = lines[28].split()[1:-1] # 取得借款收到的现金 cashReceivedFromBorrowings = lines[29].split()[1:-1] # 发行债券收到的现金 cashReceivedFromIssuingBonds = lines[30].split()[1:-1] # 收到其他与筹资活动有关的现金 cashReceivedRelatingToOtherFinancingActivities = lines[31].split()[1:-1] # 筹资活动现金流入小计 subTotalOfCashInflowsFromFinancingActivities = lines[32].split()[1:-1] # 偿还债务支付的现金 cashRepaymentsOfBorrowings = lines[33].split()[1:-1] # 分配股利、利润或偿付利息所支付的现金 cashPaymentsForInterestExpensesAndDistributionOfDividendsOrProfits = lines[34].split()[1:-1] # 其中:子公司支付给少数股东的股利、利润 cashPaymentsForDividendsOrProfitToMinorityShareholders = lines[35].split()[1:-1] # 支付其他与筹资活动有关的现金 cashPaymentsRelatingToOtherFinancingActivities = lines[36].split()[1:-1] # 筹资活动现金流出小计 subTotalOfCashOutflowsFromFinancingActivities = lines[37].split()[1:-1] # 筹资活动产生的现金流量净额 netCashFlowsFromFinancingActivities = lines[38].split()[1:-1] # /*四、汇率变动对现金及现金等价物的影响*/ effectOfForeignExchangeRate = lines[39].split()[1:-1] # /*五、现金及现金等价物净增加额*/ netIncreaseInCash = lines[40].split()[1:-1] # 加:期初现金及现金等价物余额 cashAtBeginningOfyear = lines[41].split()[1:-1] # /*六、期末现金及现金等价物余额*/ cashAtEndOfyear = lines[42].split()[1:-1] # /*附注*/ # 净利润 netProfit = lines[44].split()[1:-1] # 少数股东权益 minorityBookValue = lines[45].split()[1:-1] # 未确认的投资损失 unrealisedInvestmentLosses = lines[46].split()[1:-1] # 资产减值准备 allowanceForAssetDevaluation = lines[47].split()[1:-1] # 固定资产折旧、油气资产折耗、生产性物资折旧 depreciationOfFixedAssets = lines[48].split()[1:-1] # 无形资产摊销 amorizationOfIntangibleAssets = lines[49].split()[1:-1] # 长期待摊费用摊销 longTermDeferredExpenses = lines[50].split()[1:-1] # 待摊费用的减少 decreaseOfDeferredExpenses = lines[51].split()[1:-1] # 预提费用的增加 IncreaseOfwithholdingExpenses = lines[52].split()[1:-1] # 处置固定资产、无形资产和其他长期资产的损失 lossOnDisposalOfFixedAssets = lines[53].split()[1:-1] # 固定资产报废损失 lossOnFixedAssetsDamaged = lines[54].split()[1:-1] # 公允价值变动损失 lossOnFairValueChange = lines[55].split()[1:-1] # 递延收益增加(减:减少) changeOnDeferredRevenue = lines[56].split()[1:-1] # 预计负债 estimatedLiabilities = lines[57].split()[1:-1] # 财务费用 financingExpenses = lines[58].split()[1:-1] # 投资损失 investmentLoss = lines[59].split()[1:-1] # 递延所得税资产减少 decreaseOnDeferredIncomeTaxAssets = lines[60].split()[1:-1] # 递延所得税负债增加 increaseOnDeferredIncomeTaxLiabilities = lines[61].split()[1:-1] # 存货的减少 decreaseInInventories = lines[62].split()[1:-1] # 经营性应收项目的减少 decreaseInReceivablesUnderOperatingActivities = lines[63].split()[1:-1] # 经营性应付项目的增加 increaseInReceivablesUnderOperatingActivities = lines[64].split()[1:-1] # 已完工尚未结算款的减少(减:增加) decreaseOnAmountDue = lines[65].split()[1:-1] # 已结算尚未完工款的增加(减:减少) increaseOnSettlementNotYetCompleted = lines[66].split()[1:-1] # 其他 other = lines[67].split()[1:-1] # 经营活动产生现金流量净额 netCashFlowFromOperatingActivities = lines[68].split()[1:-1] # 债务转为资本 debtsTransferToCapital = lines[69].split()[1:-1] # 一年内到期的可转换公司债券 oneYearDueConvertibleBonds = lines[70].split()[1:-1] # 融资租入固定资产 financingRentToFixedAsset = lines[71].split()[1:-1] # 现金的期末余额 cashAtTheEndOfPeriod = lines[72].split()[1:-1] # 现金的期初余额 cashAtTheBeginningOfPeriod = lines[73].split()[1:-1] # 现金等价物的期末余额 cashEquivalentsAtTheEndOfPeriod = lines[74].split()[1:-1] # 现金等价物的期初余额 cashEquivalentsAtTheBeginningOfPeriod = lines[75].split()[1:-1] # 现金及现金等价物的净增加额 netIncreaseInCashAndCashEquivalents = lines[76].split()[1:-1] result_json = [] for idx, _ in enumerate(reportDate): if start_date: if pd.Timestamp(reportDate[idx]) < pd.Timestamp(start_date): continue if report_period and not is_same_date(report_period, reportDate[idx]): continue reportEventDate = get_report_event_date(security_item, report_date=reportDate[idx]) # use report_event_date to filter the reportEventDate before it for not getting future data if report_event_date and pd.Timestamp(report_event_date) < pd.Timestamp(reportEventDate): continue the_json = { "id": '{}_{}'.format(security_item["id"], reportDate[idx]), "reportDate": to_time_str(reportDate[idx]), "reportEventDate": reportEventDate, "securityId": security_item["id"], "code": security_item["code"], # /*一、经营活动产生的现金流量*/ # 销售商品、提供劳务收到的现金 "cashFromSellingCommoditiesOrOfferingLabor": to_float(cashFromSellingCommoditiesOrOfferingLabor[idx]), # 收到的税费返还 "refundOfTaxAndFeeReceived": to_float(refundOfTaxAndFeeReceived[idx]), # 收到的其他与经营活动有关的现金 "cashReceivedRelatingToOtherOperatingActivities": to_float( cashReceivedRelatingToOtherOperatingActivities[idx]), # 经营活动现金流入小计 "subTotalOfCashInflowsFromOperatingActivities": to_float( subTotalOfCashInflowsFromOperatingActivities[idx]), # 购买商品、接受劳务支付的现金 "cashPaidForGoodsAndServices": to_float(cashPaidForGoodsAndServices[idx]), # 支付给职工以及为职工支付的现金 "cashPaidToAndOnBehalfOfemployees": to_float(cashPaidToAndOnBehalfOfemployees[idx]), # 支付的各项税费 "paymentsOfTaxesAndSurcharges": to_float(paymentsOfTaxesAndSurcharges[idx]), # 支付的其他与经营活动有关的现金 "cashPaidRelatingToOtherOperatingActivities": to_float(cashPaidRelatingToOtherOperatingActivities[idx]), # 经营活动现金流出小计 "subTotalOfCashOutflowsFromOperatingActivities": to_float( subTotalOfCashOutflowsFromOperatingActivities[idx]), # 经营活动产生的现金流量净额 "netCashFlowsFromOperatingActivities": to_float(netCashFlowsFromOperatingActivities[idx]), # /*二、投资活动产生的现金流量*/ # 收回投资所收到的现金 "cashReceivedFromDisposalOfInvestments": to_float(cashReceivedFromDisposalOfInvestments[idx]), # 取得投资收益所收到的现金 "cashReceivedFromReturnsOnIvestments": to_float(cashReceivedFromReturnsOnIvestments[idx]), # 处置固定资产、无形资产和其他长期资产所收回的现金净额 "netCashReceivedFromDisposalAssets": to_float(netCashReceivedFromDisposalAssets[idx]), # 处置子公司及其他营业单位收到的现金净额 "netCashReceivedFromDisposalSubsidiaries": to_float(netCashReceivedFromDisposalSubsidiaries[idx]), # 收到的其他与投资活动有关的现金 "cashReceivedFromOtherInvesting": to_float(cashReceivedFromOtherInvesting[idx]), # 投资活动现金流入小计 "subTotalOfCashInflowsFromInvesting": to_float(subTotalOfCashInflowsFromInvesting[idx]), # 购建固定资产、无形资产和其他长期资产所支付的现金 "cashPaidToAcquireFixedAssets": to_float(cashPaidToAcquireFixedAssets[idx]), # 投资所支付的现金 "cashPaidToAcquireInvestments": to_float(cashPaidToAcquireInvestments[idx]), # 取得子公司及其他营业单位支付的现金净额 "netCashPaidToAcquireSubsidiaries": to_float(netCashPaidToAcquireSubsidiaries[idx]), # 支付的其他与投资活动有关的现金 "cashPaidRelatingToOtherInvesting": to_float(cashPaidRelatingToOtherInvesting[idx]), # 投资活动现金流出小计 "subTotalOfCashOutflowsFromInvesting": to_float(subTotalOfCashOutflowsFromInvesting[idx]), # 投资活动产生的现金流量净额 "netCashFlowsFromInvesting": to_float(netCashFlowsFromInvesting[idx]), # /*三、筹资活动产生的现金流量*/ # 吸收投资收到的现金 "cashReceivedFromCapitalContributions": to_float(cashReceivedFromCapitalContributions[idx]), # 其中:子公司吸收少数股东投资收到的现金 "cashReceivedFromMinorityShareholdersOfSubsidiaries": cashReceivedFromMinorityShareholdersOfSubsidiaries[ idx], # 取得借款收到的现金 "cashReceivedFromBorrowings": to_float(cashReceivedFromBorrowings[idx]), # 发行债券收到的现金 "cashReceivedFromIssuingBonds": to_float(cashReceivedFromIssuingBonds[idx]), # 收到其他与筹资活动有关的现金 "cashReceivedRelatingToOtherFinancingActivities": to_float( cashReceivedRelatingToOtherFinancingActivities[idx]), # 筹资活动现金流入小计 "subTotalOfCashInflowsFromFinancingActivities": to_float( subTotalOfCashInflowsFromFinancingActivities[idx]), # 偿还债务支付的现金 "cashRepaymentsOfBorrowings": to_float(cashRepaymentsOfBorrowings[idx]), # 分配股利、利润或偿付利息所支付的现金 "cashPaymentsForInterestExpensesAndDistributionOfDividendsOrProfits": cashPaymentsForInterestExpensesAndDistributionOfDividendsOrProfits[idx], # 其中:子公司支付给少数股东的股利、利润 "cashPaymentsForDividendsOrProfitToMinorityShareholders": cashPaymentsForDividendsOrProfitToMinorityShareholders[idx], # 支付其他与筹资活动有关的现金 "cashPaymentsRelatingToOtherFinancingActivities": to_float( cashPaymentsRelatingToOtherFinancingActivities[idx]), # 筹资活动现金流出小计 "subTotalOfCashOutflowsFromFinancingActivities": to_float( subTotalOfCashOutflowsFromFinancingActivities[idx]), # 筹资活动产生的现金流量净额 "netCashFlowsFromFinancingActivities": to_float(netCashFlowsFromFinancingActivities[idx]), # /*四、汇率变动对现金及现金等价物的影响*/ "effectOfForeignExchangeRate": to_float(effectOfForeignExchangeRate[idx]), # /*五、现金及现金等价物净增加额*/ "netIncreaseInCash": to_float(netIncreaseInCash[idx]), # 加:期初现金及现金等价物余额 "cashAtBeginningOfyear": to_float(cashAtBeginningOfyear[idx]), # /*六、期末现金及现金等价物余额*/ "cashAtEndOfyear": to_float(cashAtEndOfyear[idx]), # /*附注*/ # 净利润 "netProfit": to_float(netProfit[idx]), # 少数股东权益 "minorityBookValue": to_float(minorityBookValue[idx]), # 未确认的投资损失 "unrealisedInvestmentLosses": to_float(unrealisedInvestmentLosses[idx]), # 资产减值准备 "allowanceForAssetDevaluation": to_float(allowanceForAssetDevaluation[idx]), # 固定资产折旧、油气资产折耗、生产性物资折旧 "depreciationOfFixedAssets": to_float(depreciationOfFixedAssets[idx]), # 无形资产摊销 "amorizationOfIntangibleAssets": to_float(amorizationOfIntangibleAssets[idx]), # 长期待摊费用摊销 "longTermDeferredExpenses": to_float(longTermDeferredExpenses[idx]), # 待摊费用的减少 "decreaseOfDeferredExpenses": to_float(decreaseOfDeferredExpenses[idx]), # 预提费用的增加 "IncreaseOfwithholdingExpenses": to_float(IncreaseOfwithholdingExpenses[idx]), # 处置固定资产、无形资产和其他长期资产的损失 "lossOnDisposalOfFixedAssets": to_float(lossOnDisposalOfFixedAssets[idx]), # 固定资产报废损失 "lossOnFixedAssetsDamaged": to_float(lossOnFixedAssetsDamaged[idx]), # 公允价值变动损失 "lossOnFairValueChange": to_float(lossOnFairValueChange[idx]), # 递延收益增加(减:减少) "changeOnDeferredRevenue": to_float(changeOnDeferredRevenue[idx]), # 预计负债 "estimatedLiabilities": to_float(estimatedLiabilities[idx]), # 财务费用 "financingExpenses": to_float(financingExpenses[idx]), # 投资损失 "investmentLoss": to_float(investmentLoss[idx]), # 递延所得税资产减少 "decreaseOnDeferredIncomeTaxAssets": to_float(decreaseOnDeferredIncomeTaxAssets[idx]), # 递延所得税负债增加 "increaseOnDeferredIncomeTaxLiabilities": to_float(increaseOnDeferredIncomeTaxLiabilities[idx]), # 存货的减少 "decreaseInInventories": to_float(decreaseInInventories[idx]), # 经营性应收项目的减少 "decreaseInReceivablesUnderOperatingActivities": to_float( decreaseInReceivablesUnderOperatingActivities[idx]), # 经营性应付项目的增加 "increaseInReceivablesUnderOperatingActivities": to_float( increaseInReceivablesUnderOperatingActivities[idx]), # 已完工尚未结算款的减少(减:增加) "decreaseOnAmountDue": to_float(decreaseOnAmountDue[idx]), # 已结算尚未完工款的增加(减:减少) "increaseOnSettlementNotYetCompleted": to_float(increaseOnSettlementNotYetCompleted[idx]), # 其他 "other": to_float(other[idx]), # 经营活动产生现金流量净额 "netCashFlowFromOperatingActivities": to_float(netCashFlowFromOperatingActivities[idx]), # 债务转为资本 "debtsTransferToCapital": to_float(debtsTransferToCapital[idx]), # 一年内到期的可转换公司债券 "oneYearDueConvertibleBonds": to_float(oneYearDueConvertibleBonds[idx]), # 融资租入固定资产 "financingRentToFixedAsset": to_float(financingRentToFixedAsset[idx]), # 现金的期末余额 "cashAtTheEndOfPeriod": to_float(cashAtTheEndOfPeriod[idx]), # 现金的期初余额 "cashAtTheBeginningOfPeriod": to_float(cashAtTheBeginningOfPeriod[idx]), # 现金等价物的期末余额 "cashEquivalentsAtTheEndOfPeriod": to_float(cashEquivalentsAtTheEndOfPeriod[idx]), # 现金等价物的期初余额 "cashEquivalentsAtTheBeginningOfPeriod": to_float(cashEquivalentsAtTheBeginningOfPeriod[idx]), # 现金及现金等价物的净增加额 "netIncreaseInCashAndCashEquivalents": to_float(netIncreaseInCashAndCashEquivalents[idx]) } if report_period and is_same_date(report_period, reportDate[idx]): return the_json result_json.append(the_json) if result_json: result_json = sorted(result_json, key=lambda x: pd.Timestamp(x['reportDate'])) return result_json
def parse_shfe_day_data(force_parse=False): cache_dir = get_exchange_cache_dir(security_type='future', exchange='shfe', the_year=datetime.datetime.today().year, data_type="day_kdata") the_parsed_path = os.path.join(cache_dir, 'parsed') the_parsed = [] if os.path.exists(the_parsed_path): with open(the_parsed_path) as data_file: the_parsed = json.load(data_file) if force_parse: the_dates = [f for f in os.listdir(cache_dir) if f != 'parsed' and f] else: the_dates = [ f for f in os.listdir(cache_dir) if f != 'parsed' and f not in the_parsed ] for the_date in the_dates: the_path = os.path.join(cache_dir, the_date) logger.info("start handling {}".format(the_path)) with open(the_path, 'r', encoding='UTF8') as f: tmp_str = f.read() the_json = json.loads(tmp_str) the_datas = the_json['o_curinstrument'] # 日期,代码,名称,最低,开盘,收盘,最高,成交量(手),成交额(元),唯一标识,前收盘,涨跌额,涨跌幅(%),持仓量,结算价,前结算,涨跌额(按结算价),涨跌幅(按结算价) KDATA_COLUMN_FUTURE = [ 'timestamp', 'code', 'name', 'low', 'open', 'close', 'high', 'volume', 'turnover', 'securityId', 'preClose', 'change', 'changePct', 'openInterest', 'settlement', 'preSettlement', 'change1', 'changePct1' ] for the_data in the_datas: # {'CLOSEPRICE': 11480, # 'DELIVERYMONTH': '1809', # 'HIGHESTPRICE': 11555, # 'LOWESTPRICE': 11320, # 'OPENINTEREST': 425692, # 'OPENINTERESTCHG': 3918, # 'OPENPRICE': 11495, # 'ORDERNO': 0, # 'PRESETTLEMENTPRICE': 11545, # 'PRODUCTID': 'ru_f ', # 'PRODUCTNAME': '天然橡胶 ', # 'PRODUCTSORTNO': 100, # 'SETTLEMENTPRICE': 11465, # 'VOLUME': 456574, # 'ZD1_CHG': -65, # 'ZD2_CHG': -80} if not re.match("\d{4}", the_data['DELIVERYMONTH']): continue code = "{}{}".format( the_data['PRODUCTID'][:the_data['PRODUCTID'].index('_')], the_data['DELIVERYMONTH']) logger.info("start handling {} for {}".format(code, the_date)) name = get_future_name(code) security_id = "future_shfe_{}".format(code) security_list = get_security_list(security_type='future', exchanges=['shfe']) logger.info("start handling {} for {}".format(code, the_date)) security_item = { 'code': code, 'name': name, 'id': security_id, 'exchange': 'shfe', 'type': 'future' } # 检查是否需要保存合约meta if security_list is not None and 'code' in security_list.columns: security_list = security_list.set_index( security_list['code'], drop=False) if code not in security_list.index: security_list = security_list.append(security_item, ignore_index=True) security_list.to_csv(get_security_list_path( 'future', 'shfe'), index=False) kdata_path = get_kdata_path(item=security_item, source='exchange') # TODO:这些逻辑应该统一处理 kdata_dir = get_kdata_dir(item=security_item) if not os.path.exists(kdata_dir): os.makedirs(kdata_dir) if os.path.exists(kdata_path): saved_df = pd.read_csv(kdata_path, dtype=str) saved_df = saved_df.set_index(saved_df['timestamp'], drop=False) else: saved_df = pd.DataFrame() if saved_df.empty or the_date not in saved_df.index: low_price = the_data['LOWESTPRICE'] if not low_price: low_price = 0 open_price = the_data['OPENPRICE'] if not open_price: open_price = 0 close_price = the_data['CLOSEPRICE'] if not close_price: close_price = 0 high_price = the_data['HIGHESTPRICE'] if not high_price: high_price = 0 volume = the_data['VOLUME'] if not volume: volume = 0 if type(the_data['ZD1_CHG']) == str: change = 0 else: change = the_data['ZD1_CHG'] if type(the_data['ZD2_CHG']) == str: change1 = 0 else: change1 = the_data['ZD2_CHG'] pre_close = close_price - change pre_settlement = the_data['PRESETTLEMENTPRICE'] # 首日交易 if pre_close != 0: change_pct = change / pre_close else: change_pct = 0 if pre_settlement != 0: change_pct1 = change1 / pre_settlement else: change_pct1 = 0 the_json = { "timestamp": to_time_str(the_date), "code": code, "name": name, "low": low_price, "open": open_price, "close": close_price, "high": high_price, "volume": volume, # 成交额为估算 "turnover": (low_price + open_price + close_price + high_price / 4) * volume, "securityId": security_id, "preClose": pre_close, "change": change, "changePct": change_pct, "openInterest": the_data['OPENINTEREST'], "settlement": the_data['SETTLEMENTPRICE'], "preSettlement": the_data['PRESETTLEMENTPRICE'], "change1": change1, "changePct1": change_pct1 } saved_df = saved_df.append(the_json, ignore_index=True) saved_df = saved_df.loc[:, KDATA_COLUMN_FUTURE] saved_df = saved_df.drop_duplicates(subset='timestamp', keep='last') saved_df = saved_df.set_index(saved_df['timestamp'], drop=False) saved_df.index = pd.to_datetime(saved_df.index) saved_df = saved_df.sort_index() saved_df.to_csv(kdata_path, index=False) logger.info("end handling {} for {}".format( code, the_date)) if the_date not in the_parsed: the_parsed.append(the_date) if the_parsed: result_list = drop_duplicate(the_parsed) result_list = sorted(result_list) with open(the_parsed_path, 'w') as outfile: json.dump(result_list, outfile) logger.info("end handling {}".format(the_path))
def get_kdata(security_item, exchange=None, the_date=None, start_date=None, end_date=None, fuquan='bfq', dtype=None, source=None, level='day'): """ get kdata. Parameters ---------- security_item : SecurityItem or str the security item,id or code exchange : str the exchange,set this for cryptocurrency the_date : TimeStamp str or TimeStamp get the kdata for the exact date start_date : TimeStamp str or TimeStamp start date end_date : TimeStamp str or TimeStamp end date fuquan : str {"qfq","hfq","bfq"},default:"bfq" dtype : type the data type for the csv column,default: None source : str the data source,{'163','sina','exchange'},just used for internal merge level : str or int the kdata level,{1,5,15,30,60,'day','week','month'},default : 'day' Returns ------- DataFrame """ # 由于数字货币的交易所太多,必须指定exchange security_item = to_security_item(security_item, exchange) source = adjust_source(security_item, source) # 163的数据是合并过的,有复权因子,都存在'bfq'目录下,只需从一个地方取数据,并做相应转换 if source == '163': the_path = files_contract.get_kdata_path(security_item, source=source, fuquan='bfq') else: the_path = files_contract.get_kdata_path(security_item, source=source, fuquan=fuquan) if os.path.isfile(the_path): if not dtype: dtype = {"code": str, 'timestamp': str} df = pd.read_csv(the_path, dtype=dtype) if 'factor' in df.columns and source == '163' and security_item[ 'type'] == 'stock': df_kdata_has_factor = df[df['factor'].notna()] if df_kdata_has_factor.shape[0] > 0: latest_factor = df_kdata_has_factor.tail(1).factor.iat[0] else: latest_factor = None df.timestamp = df.timestamp.apply(lambda x: to_time_str(x)) df = df.set_index(df['timestamp'], drop=False) df.index = pd.to_datetime(df.index) df = df.sort_index() if the_date: if the_date in df.index: df = df.loc[df['timestamp'] == the_date] else: return None else: if not start_date and not pd.isna(security_item['listDate']): start_date = security_item['listDate'] if not end_date: end_date = datetime.datetime.today() if start_date and end_date: df = df.loc[start_date:end_date] # 复权处理 if source == '163' and security_item['type'] == 'stock': if 'factor' in df.columns: # 后复权是不变的 df['hfqClose'] = df.close * df.factor df['hfqOpen'] = df.open * df.factor df['hfqHigh'] = df.high * df.factor df['hfqLow'] = df.low * df.factor # 前复权需要根据最新的factor往回算,当前价格不变 if latest_factor: df['qfqClose'] = df.hfqClose / latest_factor df['qfqOpen'] = df.hfqOpen / latest_factor df['qfqHigh'] = df.hfqHigh / latest_factor df['qfqLow'] = df.hfqLow / latest_factor else: logger.exception("missing latest factor for {}".format( security_item['id'])) return df return pd.DataFrame()
def download_day_k_data(self, response): path = response.meta['path'] item = response.meta['item'] try: # 已经保存的csv数据 if os.path.exists(path): df_current = pd.read_csv(path, dtype=str) # 补全历史数据 if 'name' not in df_current.columns: df_current['name'] = item['name'] else: df_current = pd.DataFrame() tmp_str = response.text json_str = tmp_str[tmp_str.index('{'):tmp_str.index('}') + 1] tmp_json = json.loads(json_str) the_datas = tmp_json['data'] # 开,高,收,低,量,幅 the_jsons = [] pre_json = None for the_data in the_datas: the_json = { 'code': item['code'], 'securityId': item['id'], 'name': item['name'], 'timestamp': to_time_str(the_data[0]), 'open': the_data[1], 'high': the_data[2], 'close': the_data[3], 'low': the_data[4], 'volume': the_data[5], 'changePct': the_data[6] } # 有些数据位置不对 real_high = max(the_data[1], the_data[2], the_data[3], the_data[4]) if the_json['high'] != real_high: if the_json['close'] == real_high: the_json['close'], the_json['high'] = the_json[ 'high'], the_json['close'] elif the_json['open'] == real_high: the_json['open'], the_json['high'] = the_json[ 'high'], the_json['open'] elif the_json['low'] == real_high: the_json['low'], the_json['high'] = the_json[ 'high'], the_json['low'] real_low = min(the_data[1], the_data[2], the_data[3], the_data[4]) if the_json['low'] != real_low: if the_json['close'] == real_low: the_json['close'], the_json['low'] = the_json[ 'low'], the_json['close'] elif the_json['open'] == real_low: the_json['open'], the_json['low'] = the_json[ 'low'], the_json['open'] elif the_json['high'] == real_low: the_json['high'], the_json['low'] = the_json[ 'low'], the_json['high'] # 成交额为估算 avgPrice = (the_json['open'] + the_json['high'] + the_json['close'] + the_json['low']) / 4 the_json['turnover'] = avgPrice * the_json['volume'] if pre_json: the_json['preClose'] = pre_json['close'] the_json['change'] = the_json['close'] - pre_json['close'] # TODO:这些数据目前没有,后面补全 the_json['turnoverRate'] = 0 the_json['tCap'] = 0 the_json['mCap'] = 0 the_json['factor'] = 0 pre_json = the_json the_jsons.append(the_json) # 合并到当前csv中 df_current = df_current.append(the_jsons, ignore_index=True) if item['type'] == 'index': df_current = df_current.dropna(subset=KDATA_INDEX_COLUMN_163) # 保证col顺序 df_current = df_current.loc[:, KDATA_COLUMN_INDEX] else: df_current = df_current.dropna(subset=KDATA_COLUMN_163) # 保证col顺序 df_current = df_current.loc[:, KDATA_COLUMN_STOCK] df_current = df_current.drop_duplicates(subset='timestamp', keep='last') df_current = df_current.set_index(df_current['timestamp'], drop=False) df_current.index = pd.to_datetime(df_current.index) df_current = df_current.sort_index() df_current.to_csv(path, index=False) except Exception as e: self.logger.error( 'error when getting k data url={} error={}'.format( response.url, e))
def get_cash_flow_statement_items(security_item, start_date=None, report_period=None, report_event_date=None): path = get_cash_flow_statement_path(security_item) if not os.path.exists(path): return [] encoding = settings.DOWNLOAD_TXT_ENCODING if settings.DOWNLOAD_TXT_ENCODING else detect_encoding( url='file://' + os.path.abspath(path)).get('encoding') with open(path, encoding=encoding) as fr: lines = fr.readlines() # for idx, line in enumerate(lines): # yield idx, line.split() reportDate = lines[0].split()[1:-1] # /*一、经营活动产生的现金流量*/ # 销售商品、提供劳务收到的现金 cashFromSellingCommoditiesOrOfferingLabor = lines[3].split()[1:-1] # 收到的税费返还 refundOfTaxAndFeeReceived = lines[4].split()[1:-1] # 收到的其他与经营活动有关的现金 cashReceivedRelatingToOtherOperatingActivities = lines[5].split()[1:-1] # 经营活动现金流入小计 subTotalOfCashInflowsFromOperatingActivities = lines[6].split()[1:-1] # 购买商品、接受劳务支付的现金 cashPaidForGoodsAndServices = lines[7].split()[1:-1] # 支付给职工以及为职工支付的现金 cashPaidToAndOnBehalfOfemployees = lines[8].split()[1:-1] # 支付的各项税费 paymentsOfTaxesAndSurcharges = lines[9].split()[1:-1] # 支付的其他与经营活动有关的现金 cashPaidRelatingToOtherOperatingActivities = lines[10].split()[1:-1] # 经营活动现金流出小计 subTotalOfCashOutflowsFromOperatingActivities = lines[11].split()[1:-1] # 经营活动产生的现金流量净额 netCashFlowsFromOperatingActivities = lines[12].split()[1:-1] # /*二、投资活动产生的现金流量*/ # 收回投资所收到的现金 cashReceivedFromDisposalOfInvestments = lines[14].split()[1:-1] # 取得投资收益所收到的现金 cashReceivedFromReturnsOnIvestments = lines[15].split()[1:-1] # 处置固定资产、无形资产和其他长期资产所收回的现金净额 netCashReceivedFromDisposalAssets = lines[16].split()[1:-1] # 处置子公司及其他营业单位收到的现金净额 netCashReceivedFromDisposalSubsidiaries = lines[17].split()[1:-1] # 收到的其他与投资活动有关的现金 cashReceivedFromOtherInvesting = lines[18].split()[1:-1] # 投资活动现金流入小计 subTotalOfCashInflowsFromInvesting = lines[19].split()[1:-1] # 购建固定资产、无形资产和其他长期资产所支付的现金 cashPaidToAcquireFixedAssets = lines[20].split()[1:-1] # 投资所支付的现金 cashPaidToAcquireInvestments = lines[21].split()[1:-1] # 取得子公司及其他营业单位支付的现金净额 netCashPaidToAcquireSubsidiaries = lines[22].split()[1:-1] # 支付的其他与投资活动有关的现金 cashPaidRelatingToOtherInvesting = lines[23].split()[1:-1] # 投资活动现金流出小计 subTotalOfCashOutflowsFromInvesting = lines[24].split()[1:-1] # 投资活动产生的现金流量净额 netCashFlowsFromInvesting = lines[25].split()[1:-1] # /*三、筹资活动产生的现金流量*/ # 吸收投资收到的现金 cashReceivedFromCapitalContributions = lines[27].split()[1:-1] # 其中:子公司吸收少数股东投资收到的现金 cashReceivedFromMinorityShareholdersOfSubsidiaries = lines[28].split( )[1:-1] # 取得借款收到的现金 cashReceivedFromBorrowings = lines[29].split()[1:-1] # 发行债券收到的现金 cashReceivedFromIssuingBonds = lines[30].split()[1:-1] # 收到其他与筹资活动有关的现金 cashReceivedRelatingToOtherFinancingActivities = lines[31].split( )[1:-1] # 筹资活动现金流入小计 subTotalOfCashInflowsFromFinancingActivities = lines[32].split()[1:-1] # 偿还债务支付的现金 cashRepaymentsOfBorrowings = lines[33].split()[1:-1] # 分配股利、利润或偿付利息所支付的现金 cashPaymentsForInterestExpensesAndDistributionOfDividendsOrProfits = lines[ 34].split()[1:-1] # 其中:子公司支付给少数股东的股利、利润 cashPaymentsForDividendsOrProfitToMinorityShareholders = lines[ 35].split()[1:-1] # 支付其他与筹资活动有关的现金 cashPaymentsRelatingToOtherFinancingActivities = lines[36].split( )[1:-1] # 筹资活动现金流出小计 subTotalOfCashOutflowsFromFinancingActivities = lines[37].split()[1:-1] # 筹资活动产生的现金流量净额 netCashFlowsFromFinancingActivities = lines[38].split()[1:-1] # /*四、汇率变动对现金及现金等价物的影响*/ effectOfForeignExchangeRate = lines[39].split()[1:-1] # /*五、现金及现金等价物净增加额*/ netIncreaseInCash = lines[40].split()[1:-1] # 加:期初现金及现金等价物余额 cashAtBeginningOfyear = lines[41].split()[1:-1] # /*六、期末现金及现金等价物余额*/ cashAtEndOfyear = lines[42].split()[1:-1] # /*附注*/ # 净利润 netProfit = lines[44].split()[1:-1] # 少数股东权益 minorityBookValue = lines[45].split()[1:-1] # 未确认的投资损失 unrealisedInvestmentLosses = lines[46].split()[1:-1] # 资产减值准备 allowanceForAssetDevaluation = lines[47].split()[1:-1] # 固定资产折旧、油气资产折耗、生产性物资折旧 depreciationOfFixedAssets = lines[48].split()[1:-1] # 无形资产摊销 amorizationOfIntangibleAssets = lines[49].split()[1:-1] # 长期待摊费用摊销 longTermDeferredExpenses = lines[50].split()[1:-1] # 待摊费用的减少 decreaseOfDeferredExpenses = lines[51].split()[1:-1] # 预提费用的增加 IncreaseOfwithholdingExpenses = lines[52].split()[1:-1] # 处置固定资产、无形资产和其他长期资产的损失 lossOnDisposalOfFixedAssets = lines[53].split()[1:-1] # 固定资产报废损失 lossOnFixedAssetsDamaged = lines[54].split()[1:-1] # 公允价值变动损失 lossOnFairValueChange = lines[55].split()[1:-1] # 递延收益增加(减:减少) changeOnDeferredRevenue = lines[56].split()[1:-1] # 预计负债 estimatedLiabilities = lines[57].split()[1:-1] # 财务费用 financingExpenses = lines[58].split()[1:-1] # 投资损失 investmentLoss = lines[59].split()[1:-1] # 递延所得税资产减少 decreaseOnDeferredIncomeTaxAssets = lines[60].split()[1:-1] # 递延所得税负债增加 increaseOnDeferredIncomeTaxLiabilities = lines[61].split()[1:-1] # 存货的减少 decreaseInInventories = lines[62].split()[1:-1] # 经营性应收项目的减少 decreaseInReceivablesUnderOperatingActivities = lines[63].split()[1:-1] # 经营性应付项目的增加 increaseInReceivablesUnderOperatingActivities = lines[64].split()[1:-1] # 已完工尚未结算款的减少(减:增加) decreaseOnAmountDue = lines[65].split()[1:-1] # 已结算尚未完工款的增加(减:减少) increaseOnSettlementNotYetCompleted = lines[66].split()[1:-1] # 其他 other = lines[67].split()[1:-1] # 经营活动产生现金流量净额 netCashFlowFromOperatingActivities = lines[68].split()[1:-1] # 债务转为资本 debtsTransferToCapital = lines[69].split()[1:-1] # 一年内到期的可转换公司债券 oneYearDueConvertibleBonds = lines[70].split()[1:-1] # 融资租入固定资产 financingRentToFixedAsset = lines[71].split()[1:-1] # 现金的期末余额 cashAtTheEndOfPeriod = lines[72].split()[1:-1] # 现金的期初余额 cashAtTheBeginningOfPeriod = lines[73].split()[1:-1] # 现金等价物的期末余额 cashEquivalentsAtTheEndOfPeriod = lines[74].split()[1:-1] # 现金等价物的期初余额 cashEquivalentsAtTheBeginningOfPeriod = lines[75].split()[1:-1] # 现金及现金等价物的净增加额 netIncreaseInCashAndCashEquivalents = lines[76].split()[1:-1] result_json = [] for idx, _ in enumerate(reportDate): if start_date: if pd.Timestamp(reportDate[idx]) < pd.Timestamp(start_date): continue if report_period and not is_same_date(report_period, reportDate[idx]): continue reportEventDate = get_report_event_date( security_item, report_date=reportDate[idx]) # use report_event_date to filter the reportEventDate before it for not getting future data if report_event_date and pd.Timestamp( report_event_date) < pd.Timestamp(reportEventDate): continue the_json = { "id": '{}_{}'.format(security_item["id"], reportDate[idx]), "reportDate": to_time_str(reportDate[idx]), "reportEventDate": reportEventDate, "securityId": security_item["id"], "code": security_item["code"], # /*一、经营活动产生的现金流量*/ # 销售商品、提供劳务收到的现金 "cashFromSellingCommoditiesOrOfferingLabor": to_float(cashFromSellingCommoditiesOrOfferingLabor[idx]), # 收到的税费返还 "refundOfTaxAndFeeReceived": to_float(refundOfTaxAndFeeReceived[idx]), # 收到的其他与经营活动有关的现金 "cashReceivedRelatingToOtherOperatingActivities": to_float(cashReceivedRelatingToOtherOperatingActivities[idx]), # 经营活动现金流入小计 "subTotalOfCashInflowsFromOperatingActivities": to_float(subTotalOfCashInflowsFromOperatingActivities[idx]), # 购买商品、接受劳务支付的现金 "cashPaidForGoodsAndServices": to_float(cashPaidForGoodsAndServices[idx]), # 支付给职工以及为职工支付的现金 "cashPaidToAndOnBehalfOfemployees": to_float(cashPaidToAndOnBehalfOfemployees[idx]), # 支付的各项税费 "paymentsOfTaxesAndSurcharges": to_float(paymentsOfTaxesAndSurcharges[idx]), # 支付的其他与经营活动有关的现金 "cashPaidRelatingToOtherOperatingActivities": to_float(cashPaidRelatingToOtherOperatingActivities[idx]), # 经营活动现金流出小计 "subTotalOfCashOutflowsFromOperatingActivities": to_float(subTotalOfCashOutflowsFromOperatingActivities[idx]), # 经营活动产生的现金流量净额 "netCashFlowsFromOperatingActivities": to_float(netCashFlowsFromOperatingActivities[idx]), # /*二、投资活动产生的现金流量*/ # 收回投资所收到的现金 "cashReceivedFromDisposalOfInvestments": to_float(cashReceivedFromDisposalOfInvestments[idx]), # 取得投资收益所收到的现金 "cashReceivedFromReturnsOnIvestments": to_float(cashReceivedFromReturnsOnIvestments[idx]), # 处置固定资产、无形资产和其他长期资产所收回的现金净额 "netCashReceivedFromDisposalAssets": to_float(netCashReceivedFromDisposalAssets[idx]), # 处置子公司及其他营业单位收到的现金净额 "netCashReceivedFromDisposalSubsidiaries": to_float(netCashReceivedFromDisposalSubsidiaries[idx]), # 收到的其他与投资活动有关的现金 "cashReceivedFromOtherInvesting": to_float(cashReceivedFromOtherInvesting[idx]), # 投资活动现金流入小计 "subTotalOfCashInflowsFromInvesting": to_float(subTotalOfCashInflowsFromInvesting[idx]), # 购建固定资产、无形资产和其他长期资产所支付的现金 "cashPaidToAcquireFixedAssets": to_float(cashPaidToAcquireFixedAssets[idx]), # 投资所支付的现金 "cashPaidToAcquireInvestments": to_float(cashPaidToAcquireInvestments[idx]), # 取得子公司及其他营业单位支付的现金净额 "netCashPaidToAcquireSubsidiaries": to_float(netCashPaidToAcquireSubsidiaries[idx]), # 支付的其他与投资活动有关的现金 "cashPaidRelatingToOtherInvesting": to_float(cashPaidRelatingToOtherInvesting[idx]), # 投资活动现金流出小计 "subTotalOfCashOutflowsFromInvesting": to_float(subTotalOfCashOutflowsFromInvesting[idx]), # 投资活动产生的现金流量净额 "netCashFlowsFromInvesting": to_float(netCashFlowsFromInvesting[idx]), # /*三、筹资活动产生的现金流量*/ # 吸收投资收到的现金 "cashReceivedFromCapitalContributions": to_float(cashReceivedFromCapitalContributions[idx]), # 其中:子公司吸收少数股东投资收到的现金 "cashReceivedFromMinorityShareholdersOfSubsidiaries": cashReceivedFromMinorityShareholdersOfSubsidiaries[idx], # 取得借款收到的现金 "cashReceivedFromBorrowings": to_float(cashReceivedFromBorrowings[idx]), # 发行债券收到的现金 "cashReceivedFromIssuingBonds": to_float(cashReceivedFromIssuingBonds[idx]), # 收到其他与筹资活动有关的现金 "cashReceivedRelatingToOtherFinancingActivities": to_float(cashReceivedRelatingToOtherFinancingActivities[idx]), # 筹资活动现金流入小计 "subTotalOfCashInflowsFromFinancingActivities": to_float(subTotalOfCashInflowsFromFinancingActivities[idx]), # 偿还债务支付的现金 "cashRepaymentsOfBorrowings": to_float(cashRepaymentsOfBorrowings[idx]), # 分配股利、利润或偿付利息所支付的现金 "cashPaymentsForInterestExpensesAndDistributionOfDividendsOrProfits": cashPaymentsForInterestExpensesAndDistributionOfDividendsOrProfits[ idx], # 其中:子公司支付给少数股东的股利、利润 "cashPaymentsForDividendsOrProfitToMinorityShareholders": cashPaymentsForDividendsOrProfitToMinorityShareholders[idx], # 支付其他与筹资活动有关的现金 "cashPaymentsRelatingToOtherFinancingActivities": to_float(cashPaymentsRelatingToOtherFinancingActivities[idx]), # 筹资活动现金流出小计 "subTotalOfCashOutflowsFromFinancingActivities": to_float(subTotalOfCashOutflowsFromFinancingActivities[idx]), # 筹资活动产生的现金流量净额 "netCashFlowsFromFinancingActivities": to_float(netCashFlowsFromFinancingActivities[idx]), # /*四、汇率变动对现金及现金等价物的影响*/ "effectOfForeignExchangeRate": to_float(effectOfForeignExchangeRate[idx]), # /*五、现金及现金等价物净增加额*/ "netIncreaseInCash": to_float(netIncreaseInCash[idx]), # 加:期初现金及现金等价物余额 "cashAtBeginningOfyear": to_float(cashAtBeginningOfyear[idx]), # /*六、期末现金及现金等价物余额*/ "cashAtEndOfyear": to_float(cashAtEndOfyear[idx]), # /*附注*/ # 净利润 "netProfit": to_float(netProfit[idx]), # 少数股东权益 "minorityBookValue": to_float(minorityBookValue[idx]), # 未确认的投资损失 "unrealisedInvestmentLosses": to_float(unrealisedInvestmentLosses[idx]), # 资产减值准备 "allowanceForAssetDevaluation": to_float(allowanceForAssetDevaluation[idx]), # 固定资产折旧、油气资产折耗、生产性物资折旧 "depreciationOfFixedAssets": to_float(depreciationOfFixedAssets[idx]), # 无形资产摊销 "amorizationOfIntangibleAssets": to_float(amorizationOfIntangibleAssets[idx]), # 长期待摊费用摊销 "longTermDeferredExpenses": to_float(longTermDeferredExpenses[idx]), # 待摊费用的减少 "decreaseOfDeferredExpenses": to_float(decreaseOfDeferredExpenses[idx]), # 预提费用的增加 "IncreaseOfwithholdingExpenses": to_float(IncreaseOfwithholdingExpenses[idx]), # 处置固定资产、无形资产和其他长期资产的损失 "lossOnDisposalOfFixedAssets": to_float(lossOnDisposalOfFixedAssets[idx]), # 固定资产报废损失 "lossOnFixedAssetsDamaged": to_float(lossOnFixedAssetsDamaged[idx]), # 公允价值变动损失 "lossOnFairValueChange": to_float(lossOnFairValueChange[idx]), # 递延收益增加(减:减少) "changeOnDeferredRevenue": to_float(changeOnDeferredRevenue[idx]), # 预计负债 "estimatedLiabilities": to_float(estimatedLiabilities[idx]), # 财务费用 "financingExpenses": to_float(financingExpenses[idx]), # 投资损失 "investmentLoss": to_float(investmentLoss[idx]), # 递延所得税资产减少 "decreaseOnDeferredIncomeTaxAssets": to_float(decreaseOnDeferredIncomeTaxAssets[idx]), # 递延所得税负债增加 "increaseOnDeferredIncomeTaxLiabilities": to_float(increaseOnDeferredIncomeTaxLiabilities[idx]), # 存货的减少 "decreaseInInventories": to_float(decreaseInInventories[idx]), # 经营性应收项目的减少 "decreaseInReceivablesUnderOperatingActivities": to_float(decreaseInReceivablesUnderOperatingActivities[idx]), # 经营性应付项目的增加 "increaseInReceivablesUnderOperatingActivities": to_float(increaseInReceivablesUnderOperatingActivities[idx]), # 已完工尚未结算款的减少(减:增加) "decreaseOnAmountDue": to_float(decreaseOnAmountDue[idx]), # 已结算尚未完工款的增加(减:减少) "increaseOnSettlementNotYetCompleted": to_float(increaseOnSettlementNotYetCompleted[idx]), # 其他 "other": to_float(other[idx]), # 经营活动产生现金流量净额 "netCashFlowFromOperatingActivities": to_float(netCashFlowFromOperatingActivities[idx]), # 债务转为资本 "debtsTransferToCapital": to_float(debtsTransferToCapital[idx]), # 一年内到期的可转换公司债券 "oneYearDueConvertibleBonds": to_float(oneYearDueConvertibleBonds[idx]), # 融资租入固定资产 "financingRentToFixedAsset": to_float(financingRentToFixedAsset[idx]), # 现金的期末余额 "cashAtTheEndOfPeriod": to_float(cashAtTheEndOfPeriod[idx]), # 现金的期初余额 "cashAtTheBeginningOfPeriod": to_float(cashAtTheBeginningOfPeriod[idx]), # 现金等价物的期末余额 "cashEquivalentsAtTheEndOfPeriod": to_float(cashEquivalentsAtTheEndOfPeriod[idx]), # 现金等价物的期初余额 "cashEquivalentsAtTheBeginningOfPeriod": to_float(cashEquivalentsAtTheBeginningOfPeriod[idx]), # 现金及现金等价物的净增加额 "netIncreaseInCashAndCashEquivalents": to_float(netIncreaseInCashAndCashEquivalents[idx]) } if report_period and is_same_date(report_period, reportDate[idx]): return the_json result_json.append(the_json) if result_json: result_json = sorted(result_json, key=lambda x: pd.Timestamp(x['reportDate'])) return result_json
def get_balance_sheet_items(security_item, start_date=None, report_period=None, report_event_date=None): path = get_balance_sheet_path(security_item) if not os.path.exists(path): return [] encoding = settings.DOWNLOAD_TXT_ENCODING if settings.DOWNLOAD_TXT_ENCODING else detect_encoding( url='file://' + os.path.abspath(path)).get('encoding') with open(path, encoding=encoding) as fr: lines = fr.readlines() # for idx, line in enumerate(lines): # yield idx, line.split() reportDate = lines[0].split()[1:-1] # 货币资金 moneyFunds = lines[3].split()[1:-1] # 交易性金融资产 heldForTradingFinancialAssets = lines[4].split()[1:-1] # 衍生金融资产 derivative = lines[5].split()[1:-1] # 应收票据 billsReceivable = lines[6].split()[1:-1] # 应收账款 accountsReceivable = lines[7].split()[1:-1] # 预付款项 prepaidAccounts = lines[8].split()[1:-1] # 应收利息 interestReceivable = lines[9].split()[1:-1] # 应收股利 dividendReceivable = lines[10].split()[1:-1] # 其他应收款 otherReceivables = lines[11].split()[1:-1] # 买入返售金融资产 buyingBackTheSaleOfFinancialAssets = lines[12].split()[1:-1] # 存货 inventory = lines[13].split()[1:-1] # 划分为持有待售的资产 assetsForSale = lines[14].split()[1:-1] # 一年内到期的非流动资产 nonCurrentAssetsDueWithinOneYear = lines[15].split()[1:-1] # 待摊费用 unamortizedExpenditures = lines[16].split()[1:-1] # 待处理流动资产损益 waitDealIntangibleAssetsLossOrIncome = lines[17].split()[1:-1] # 其他流动资产 otherCurrentAssets = lines[18].split()[1:-1] # 流动资产合计 totalCurrentAssets = lines[19].split()[1:-1] # 非流动资产 # 发放贷款及垫款 loansAndPaymentsOnBehalf = lines[21].split()[1:-1] # 可供出售金融资产 availableForSaleFinancialAssets = lines[22].split()[1:-1] # 持有至到期投资 heldToMaturityInvestment = lines[23].split()[1:-1] # 长期应收款 longTermReceivables = lines[24].split()[1:-1] # 长期股权投资 longTermEquityInvestment = lines[25].split()[1:-1] # 投资性房地产 investmentRealEstate = lines[26].split()[1:-1] # 固定资产净额 NetfixedAssets = lines[27].split()[1:-1] # 在建工程 constructionInProcess = lines[28].split()[1:-1] # 工程物资 engineerMaterial = lines[29].split()[1:-1] # 固定资产清理 fixedAssetsInLiquidation = lines[30].split()[1:-1] # 生产性生物资产 productiveBiologicalAssets = lines[31].split()[1:-1] # 公益性生物资产 nonProfitLivingAssets = lines[32].split()[1:-1] # 油气资产 oilAndGasAssets = lines[33].split()[1:-1] # 无形资产 intangibleAssets = lines[34].split()[1:-1] # 开发支出 developmentExpenditure = lines[35].split()[1:-1] # 商誉 goodwill = lines[36].split()[1:-1] # 长期待摊费用 longTermDeferredExpenses = lines[37].split()[1:-1] # 递延所得税资产 deferredIncomeTaxAssets = lines[38].split()[1:-1] # 其他非流动资产 OtherNonCurrentAssets = lines[39].split()[1:-1] # 非流动资产合计 nonCurrentAssets = lines[40].split()[1:-1] # 资产总计 totalAssets = lines[41].split()[1:-1] # / *流动负债 * / # 短期借款 shortTermBorrowing = lines[43].split()[1:-1] # 交易性金融负债 transactionFinancialLiabilities = lines[44].split()[1:-1] # 应付票据 billsPayable = lines[45].split()[1:-1] # 应付账款 accountsPayable = lines[46].split()[1:-1] # 预收款项 accountsReceivedInAdvance = lines[47].split()[1:-1] # 应付手续费及佣金 handlingChargesAndCommissionsPayable = lines[48].split()[1:-1] # 应付职工薪酬 employeeBenefitsPayable = lines[49].split()[1:-1] # 应交税费 taxesAndSurchargesPayable = lines[50].split()[1:-1] # 应付利息 interestPayable = lines[51].split()[1:-1] # 应付股利 dividendpayable = lines[52].split()[1:-1] # 其他应付款 otherPayables = lines[53].split()[1:-1] # 预提费用 withholdingExpenses = lines[54].split()[1:-1] # 一年内的递延收益 deferredIncomeWithinOneYear = lines[55].split()[1:-1] # 应付短期债券 shortTermDebenturesPayable = lines[56].split()[1:-1] # 一年内到期的非流动负债 nonCurrentLiabilitiesMaturingWithinOneYear = lines[57].split()[1:-1] # 其他流动负债 otherCurrentLiability = lines[58].split()[1:-1] # 流动负债合计 totalCurrentLiabilities = lines[59].split()[1:-1] # / *非流动负债 * / # 长期借款 LongTermBorrowing = lines[61].split()[1:-1] # 应付债券 bondPayable = lines[62].split()[1:-1] # 长期应付款 longTermPayables = lines[63].split()[1:-1] # 长期应付职工薪酬 longTermEmployeeBenefitsPayable = lines[64].split()[1:-1] # 专项应付款 specialPayable = lines[65].split()[1:-1] # 预计非流动负债 expectedNonCurrentLiabilities = lines[66].split()[1:-1] # 递延所得税负债 deferredIncomeTaxLiabilities = lines[67].split()[1:-1] # 长期递延收益 longTermDeferredRevenue = lines[68].split()[1:-1] # 其他非流动负债 otherNonCurrentLiabilities = lines[69].split()[1:-1] # 非流动负债合计 totalNonCurrentLiabilities = lines[70].split()[1:-1] # 负债合计 totalLiabilities = lines[71].split()[1:-1] # / *所有者权益 * / # 实收资本(或股本) totalShareCapital = lines[73].split()[1:-1] # 资本公积 capitalSurplus = lines[74].split()[1:-1] # 减:库存股 treasuryStock = lines[75].split()[1:-1] # 其他综合收益 otherComprehensiveIncome = lines[76].split()[1:-1] # 专项储备 theSpecialReserve = lines[77].split()[1:-1] # 盈余公积 surplusReserves = lines[78].split()[1:-1] # 一般风险准备 generalRiskPreparation = lines[79].split()[1:-1] # 未分配利润 undistributedProfits = lines[80].split()[1:-1] # 归属于母公司股东权益合计(净资产) bookValue = lines[81].split()[1:-1] # 少数股东权益 minorityBookValue = lines[82].split()[1:-1] # 所有者权益(或股东权益)合计 totalBookValue = lines[83].split()[1:-1] # 负债和所有者权益(或股东权益)总计 totalLiabilitiesAndOwnersEquity = lines[84].split()[1:-1] result_json = [] for idx, _ in enumerate(reportDate): if start_date: if pd.Timestamp(reportDate[idx]) < pd.Timestamp(start_date): continue reportEventDate = get_report_event_date( security_item, report_date=reportDate[idx]) if report_period and not is_same_date(report_period, reportDate[idx]): continue # use report_event_date to filter the reportEventDate before it for not getting future data if report_event_date and pd.Timestamp( report_event_date) < pd.Timestamp(reportEventDate): continue the_json = { "id": '{}_{}'.format(security_item["id"], reportDate[idx]), "reportDate": to_time_str(reportDate[idx]), "reportEventDate": reportEventDate, "securityId": security_item["id"], "code": security_item["code"], # 货币资金 "moneyFunds": to_float(moneyFunds[idx]), # 交易性金融资产 "heldForTradingFinancialAssets": to_float(heldForTradingFinancialAssets[idx]), # 衍生金融资产 "derivative": to_float(derivative[idx]), # 应收票据 "billsReceivable": to_float(billsReceivable[idx]), # 应收账款 "accountsReceivable": to_float(accountsReceivable[idx]), # 预付款项 "prepaidAccounts": to_float(prepaidAccounts[idx]), # 应收利息 "interestReceivable": to_float(interestReceivable[idx]), # 应收股利 "dividendReceivable": to_float(dividendReceivable[idx]), # 其他应收款 "otherReceivables": to_float(otherReceivables[idx]), # 买入返售金融资产 "buyingBackTheSaleOfFinancialAssets": to_float(buyingBackTheSaleOfFinancialAssets[idx]), # 存货 "inventory": to_float(inventory[idx]), # 划分为持有待售的资产 "assetsForSale": to_float(assetsForSale[idx]), # 一年内到期的非流动资产 "nonCurrentAssetsDueWithinOneYear": to_float(nonCurrentAssetsDueWithinOneYear[idx]), # 待摊费用 "unamortizedExpenditures": to_float(unamortizedExpenditures[idx]), # 待处理流动资产损益 "waitDealIntangibleAssetsLossOrIncome": to_float(waitDealIntangibleAssetsLossOrIncome[idx]), # 其他流动资产 "otherCurrentAssets": to_float(otherCurrentAssets[idx]), # 流动资产合计 "totalCurrentAssets": to_float(totalCurrentAssets[idx]), # 非流动资产 # 发放贷款及垫款 "loansAndPaymentsOnBehalf": to_float(loansAndPaymentsOnBehalf[idx]), # 可供出售金融资产 "availableForSaleFinancialAssets": to_float(availableForSaleFinancialAssets[idx]), # 持有至到期投资 "heldToMaturityInvestment": to_float(heldToMaturityInvestment[idx]), # 长期应收款 "longTermReceivables": to_float(longTermReceivables[idx]), # 长期股权投资 "longTermEquityInvestment": to_float(longTermEquityInvestment[idx]), # 投资性房地产 "investmentRealEstate": to_float(investmentRealEstate[idx]), # 固定资产净额 "NetfixedAssets": to_float(NetfixedAssets[idx]), # 在建工程 "constructionInProcess": to_float(constructionInProcess[idx]), # 工程物资 "engineerMaterial": to_float(engineerMaterial[idx]), # 固定资产清理 "fixedAssetsInLiquidation": to_float(fixedAssetsInLiquidation[idx]), # 生产性生物资产 "productiveBiologicalAssets": to_float(productiveBiologicalAssets[idx]), # 公益性生物资产 "nonProfitLivingAssets": to_float(nonProfitLivingAssets[idx]), # 油气资产 "oilAndGasAssets": to_float(oilAndGasAssets[idx]), # 无形资产 "intangibleAssets": to_float(intangibleAssets[idx]), # 开发支出 "developmentExpenditure": to_float(developmentExpenditure[idx]), # 商誉 "goodwill": to_float(goodwill[idx]), # 长期待摊费用 "longTermDeferredExpenses": to_float(longTermDeferredExpenses[idx]), # 递延所得税资产 "deferredIncomeTaxAssets": to_float(deferredIncomeTaxAssets[idx]), # 其他非流动资产 "OtherNonCurrentAssets": to_float(OtherNonCurrentAssets[idx]), # 非流动资产合计 "nonCurrentAssets": to_float(nonCurrentAssets[idx]), # 资产总计 "totalAssets": to_float(totalAssets[idx]), # / *流动负债 * / # 短期借款 "shortTermBorrowing": to_float(shortTermBorrowing[idx]), # 交易性金融负债 "transactionFinancialLiabilities": to_float(transactionFinancialLiabilities[idx]), # 应付票据 "billsPayable": to_float(billsPayable[idx]), # 应付账款 "accountsPayable": to_float(accountsPayable[idx]), # 预收款项 "accountsReceivedInAdvance": to_float(accountsReceivedInAdvance[idx]), # 应付手续费及佣金 "handlingChargesAndCommissionsPayable": to_float(handlingChargesAndCommissionsPayable[idx]), # 应付职工薪酬 "employeeBenefitsPayable": to_float(employeeBenefitsPayable[idx]), # 应交税费 "taxesAndSurchargesPayable": to_float(taxesAndSurchargesPayable[idx]), # 应付利息 "interestPayable": to_float(interestPayable[idx]), # 应付股利 "dividendpayable": to_float(dividendpayable[idx]), # 其他应付款 "otherPayables": to_float(otherPayables[idx]), # 预提费用 "withholdingExpenses": to_float(withholdingExpenses[idx]), # 一年内的递延收益 "deferredIncomeWithinOneYear": to_float(deferredIncomeWithinOneYear[idx]), # 应付短期债券 "shortTermDebenturesPayable": to_float(shortTermDebenturesPayable[idx]), # 一年内到期的非流动负债 "nonCurrentLiabilitiesMaturingWithinOneYear": to_float(nonCurrentLiabilitiesMaturingWithinOneYear[idx]), # 其他流动负债 "otherCurrentLiability": to_float(otherCurrentLiability[idx]), # 流动负债合计 "totalCurrentLiabilities": to_float(totalCurrentLiabilities[idx]), # / *非流动负债 * / # 长期借款 "LongTermBorrowing": to_float(LongTermBorrowing[idx]), # 应付债券 "bondPayable": to_float(bondPayable[idx]), # 长期应付款 "longTermPayables": to_float(longTermPayables[idx]), # 长期应付职工薪酬 "longTermEmployeeBenefitsPayable": to_float(longTermEmployeeBenefitsPayable[idx]), # 专项应付款 "specialPayable": to_float(specialPayable[idx]), # 预计非流动负债 "expectedNonCurrentLiabilities": to_float(expectedNonCurrentLiabilities[idx]), # 递延所得税负债 "deferredIncomeTaxLiabilities": to_float(deferredIncomeTaxLiabilities[idx]), # 长期递延收益 "longTermDeferredRevenue": to_float(longTermDeferredRevenue[idx]), # 其他非流动负债 "otherNonCurrentLiabilities": to_float(otherNonCurrentLiabilities[idx]), # 非流动负债合计 "totalNonCurrentLiabilities": to_float(totalNonCurrentLiabilities[idx]), # 负债合计 "totalLiabilities": to_float(totalLiabilities[idx]), # / *所有者权益 * / # 实收资本(或股本) "totalShareCapital": to_float(totalShareCapital[idx]), # 资本公积 "capitalSurplus": to_float(capitalSurplus[idx]), # 减:库存股 "treasuryStock": to_float(treasuryStock[idx]), # 其他综合收益 "otherComprehensiveIncome": to_float(otherComprehensiveIncome[idx]), # 专项储备 "theSpecialReserve": to_float(theSpecialReserve[idx]), # 盈余公积 "surplusReserves": to_float(surplusReserves[idx]), # 一般风险准备 "generalRiskPreparation": to_float(generalRiskPreparation[idx]), # 未分配利润 "undistributedProfits": to_float(undistributedProfits[idx]), # 归属于母公司股东权益合计(净资产) "bookValue": to_float(bookValue[idx]), # 少数股东权益 "minorityBookValue": to_float(minorityBookValue[idx]), # 所有者权益(或股东权益)合计 "totalBookValue": to_float(totalBookValue[idx]), # 负债和所有者权益(或股东权益)总计 "totalLiabilitiesAndOwnersEquity": to_float(totalLiabilitiesAndOwnersEquity[idx]) } if report_period and is_same_date(report_period, reportDate[idx]): return the_json result_json.append(the_json) if (result_json): result_json = sorted(result_json, key=lambda x: pd.Timestamp(x['reportDate'])) return result_json
def get_income_statement_items(security_item, start_date=None, report_period=None, report_event_date=None): path = get_income_statement_path(security_item) if not os.path.exists(path): return [] encoding = settings.DOWNLOAD_TXT_ENCODING if settings.DOWNLOAD_TXT_ENCODING else detect_encoding( url='file://' + os.path.abspath(path)).get('encoding') with open(path, encoding=encoding) as fr: lines = fr.readlines() # for idx, line in enumerate(lines): # yield idx, line.split() reportDate = lines[0].split()[1:-1] # /*营业总收入*/ # 营业收入 operatingRevenue = lines[2].split()[1:-1] # /*营业总成本*/ OperatingTotalCosts = lines[4].split()[1:-1] # 营业成本 OperatingCosts = lines[5].split()[1:-1] # 营业税金及附加 businessTaxesAndSurcharges = lines[6].split()[1:-1] # 销售费用 sellingExpenses = lines[7].split()[1:-1] # 管理费用 ManagingCosts = lines[8].split()[1:-1] # 财务费用 financingExpenses = lines[9].split()[1:-1] # 资产减值损失 assetsDevaluation = lines[10].split()[1:-1] # 公允价值变动收益 incomeFromChangesInFairValue = lines[11].split()[1:-1] # 投资收益 investmentIncome = lines[12].split()[1:-1] # 其中:对联营企业和合营企业的投资收益 investmentIncomeFromRelatedEnterpriseAndJointlyOperating = lines[13].split()[1:-1] # 汇兑收益 exchangeGains = lines[14].split()[1:-1] # /*营业利润*/ operatingProfit = lines[15].split()[1:-1] # 加:营业外收入 nonOperatingIncome = lines[16].split()[1:-1] # 减:营业外支出 nonOperatingExpenditure = lines[17].split()[1:-1] # 其中:非流动资产处置损失 disposalLossOnNonCurrentLiability = lines[18].split()[1:-1] # /*利润总额*/ totalProfits = lines[19].split()[1:-1] # 减:所得税费用 incomeTaxExpense = lines[20].split()[1:-1] # /*净利润*/ netProfit = lines[21].split()[1:-1] # 归属于母公司所有者的净利润 netProfitAttributedToParentCompanyOwner = lines[22].split()[1:-1] # 少数股东损益 minorityInterestIncome = lines[23].split()[1:-1] # /*每股收益*/ # 基本每股收益(元/股) EPS = lines[25].split()[1:-1] # 稀释每股收益(元/股) dilutedEPS = lines[26].split()[1:-1] # /*其他综合收益*/ otherComprehensiveIncome = lines[27].split()[1:-1] # /*综合收益总额*/ accumulatedOtherComprehensiveIncome = lines[28].split()[1:-1] # 归属于母公司所有者的综合收益总额 attributableToOwnersOfParentCompany = lines[29].split()[1:-1] # 归属于少数股东的综合收益总额 attributableToMinorityShareholders = lines[30].split()[1:-1] result_json = [] for idx, _ in enumerate(reportDate): if start_date: if pd.Timestamp(reportDate[idx]) < pd.Timestamp(start_date): continue if report_period and not is_same_date(report_period, reportDate[idx]): continue reportEventDate = get_report_event_date(security_item, report_date=reportDate[idx]) # use report_event_date to filter the reportEventDate before it for not getting future data if report_event_date and pd.Timestamp(report_event_date) < pd.Timestamp(reportEventDate): continue the_json = { "id": '{}_{}'.format(security_item["id"], reportDate[idx]), "reportDate": to_time_str(reportDate[idx]), "reportEventDate": reportEventDate, "securityId": security_item["id"], "code": security_item["code"], # /*营业总收入*/ # 营业收入 "operatingRevenue": to_float(operatingRevenue[idx]), # /*营业总成本*/ "OperatingTotalCosts": to_float(OperatingTotalCosts[idx]), # 营业成本 "OperatingCosts": to_float(OperatingCosts[idx]), # 营业税金及附加 "businessTaxesAndSurcharges": to_float(businessTaxesAndSurcharges[idx]), # 销售费用 "sellingExpenses": to_float(sellingExpenses[idx]), # 管理费用 "ManagingCosts": to_float(ManagingCosts[idx]), # 财务费用 "financingExpenses": to_float(financingExpenses[idx]), # 资产减值损失 "assetsDevaluation": to_float(assetsDevaluation[idx]), # 公允价值变动收益 "incomeFromChangesInFairValue": to_float(incomeFromChangesInFairValue[idx]), # 投资收益 "investmentIncome": to_float(investmentIncome[idx]), # 其中:对联营企业和合营企业的投资收益 "investmentIncomeFromRelatedEnterpriseAndJointlyOperating": investmentIncomeFromRelatedEnterpriseAndJointlyOperating[idx], # 汇兑收益 "exchangeGains": to_float(exchangeGains[idx]), # /*营业利润*/ "operatingProfit": to_float(operatingProfit[idx]), # 加:营业外收入 "nonOperatingIncome": to_float(nonOperatingIncome[idx]), # 减:营业外支出 "nonOperatingExpenditure": to_float(nonOperatingExpenditure[idx]), # 其中:非流动资产处置损失 "disposalLossOnNonCurrentLiability": to_float(disposalLossOnNonCurrentLiability[idx]), # /*利润总额*/ "totalProfits": to_float(totalProfits[idx]), # 减:所得税费用 "incomeTaxExpense": to_float(incomeTaxExpense[idx]), # /*净利润*/ "netProfit": to_float(netProfit[idx]), # 归属于母公司所有者的净利润 "netProfitAttributedToParentCompanyOwner": to_float(netProfitAttributedToParentCompanyOwner[idx]), # 少数股东损益 "minorityInterestIncome": to_float(minorityInterestIncome[idx]), # /*每股收益*/ # 基本每股收益(元/股) "EPS": to_float(EPS[idx]), # 稀释每股收益(元/股) "dilutedEPS": to_float(dilutedEPS[idx]), # /*其他综合收益*/ "otherComprehensiveIncome": to_float(otherComprehensiveIncome[idx]), # /*综合收益总额*/ "accumulatedOtherComprehensiveIncome": to_float(accumulatedOtherComprehensiveIncome[idx]), # 归属于母公司所有者的综合收益总额 "attributableToOwnersOfParentCompany": to_float(attributableToOwnersOfParentCompany[idx]), # 归属于少数股东的综合收益总额 "attributableToMinorityShareholders": to_float(attributableToMinorityShareholders[idx]) } if report_period and is_same_date(report_period, reportDate[idx]): return the_json result_json.append(the_json) if result_json: result_json = sorted(result_json, key=lambda x: pd.Timestamp(x['reportDate'])) return result_json
def download_day_k_data(self, response): path = response.meta['path'] item = response.meta['item'] try: # 已经保存的csv数据 if os.path.exists(path): df_current = pd.read_csv(path, dtype=str) # 补全历史数据 if 'name' not in df_current.columns: df_current['name'] = item['name'] else: df_current = pd.DataFrame() tmp_str = response.text json_str = tmp_str[tmp_str.index('{'):tmp_str.index('}') + 1] tmp_json = json.loads(json_str) the_datas = tmp_json['data'] # 开,高,收,低,量,幅 the_jsons = [] pre_json = None for the_data in the_datas: the_json = {'code': item['code'], 'securityId': item['id'], 'name': item['name'], 'timestamp': to_time_str(the_data[0]), 'open': the_data[1], 'high': the_data[2], 'close': the_data[3], 'low': the_data[4], 'volume': the_data[5], 'changePct': the_data[6]} # 有些数据位置不对 real_high = max(the_data[1], the_data[2], the_data[3], the_data[4]) if the_json['high'] != real_high: if the_json['close'] == real_high: the_json['close'], the_json['high'] = the_json['high'], the_json['close'] elif the_json['open'] == real_high: the_json['open'], the_json['high'] = the_json['high'], the_json['open'] elif the_json['low'] == real_high: the_json['low'], the_json['high'] = the_json['high'], the_json['low'] real_low = min(the_data[1], the_data[2], the_data[3], the_data[4]) if the_json['low'] != real_low: if the_json['close'] == real_low: the_json['close'], the_json['low'] = the_json['low'], the_json['close'] elif the_json['open'] == real_low: the_json['open'], the_json['low'] = the_json['low'], the_json['open'] elif the_json['high'] == real_low: the_json['high'], the_json['low'] = the_json['low'], the_json['high'] # 成交额为估算 avgPrice = (the_json['open'] + the_json['high'] + the_json['close'] + the_json['low']) / 4 the_json['turnover'] = avgPrice * the_json['volume'] if pre_json: the_json['preClose'] = pre_json['close'] the_json['change'] = the_json['close'] - pre_json['close'] # TODO:这些数据目前没有,后面补全 the_json['turnoverRate'] = 0 the_json['tCap'] = 0 the_json['mCap'] = 0 the_json['factor'] = 0 pre_json = the_json the_jsons.append(the_json) # 合并到当前csv中 df_current = df_current.append(the_jsons, ignore_index=True) if item['type'] == 'index': df_current = df_current.dropna(subset=KDATA_INDEX_COLUMN_163) # 保证col顺序 df_current = df_current.loc[:, KDATA_COLUMN_INDEX] else: df_current = df_current.dropna(subset=KDATA_COLUMN_163) # 保证col顺序 df_current = df_current.loc[:, KDATA_COLUMN_STOCK] df_current = df_current.drop_duplicates(subset='timestamp', keep='last') df_current = df_current.set_index(df_current['timestamp'], drop=False) df_current.index = pd.to_datetime(df_current.index) df_current = df_current.sort_index() df_current.to_csv(path, index=False) except Exception as e: self.logger.error('error when getting k data url={} error={}'.format(response.url, e))
def get_income_statement_items(security_item, start_date=None, report_period=None, report_event_date=None, return_type='json'): """ get income statement items. Parameters ---------- security_item : SecurityItem or str the security item,id or code start_date : TimeStamp str or TimeStamp start date report_period : TimeStamp str or TimeStamp the finance report period,eg.'20170331' report_event_date : TimeStamp str or TimeStamp the finance report published date return_type : str {'json','doc'},default: 'json' Returns ------- list of IncomeStatement list of json """ security_item = to_security_item(security_item) path = get_income_statement_path(security_item) if not os.path.exists(path): return [] encoding = 'GB2312' with open(path, encoding=encoding) as fr: lines = fr.readlines() # for idx, line in enumerate(lines): # yield idx, line.split() reportDate = lines[0].split()[1:-1] # /*营业总收入*/ # 营业收入 operatingRevenue = lines[2].split()[1:-1] # /*营业总成本*/ operatingTotalCosts = lines[4].split()[1:-1] # 营业成本 operatingCosts = lines[5].split()[1:-1] # 营业税金及附加 businessTaxesAndSurcharges = lines[6].split()[1:-1] # 销售费用 sellingExpenses = lines[7].split()[1:-1] # 管理费用 ManagingCosts = lines[8].split()[1:-1] # 财务费用 financingExpenses = lines[9].split()[1:-1] # 资产减值损失 assetsDevaluation = lines[10].split()[1:-1] # 公允价值变动收益 incomeFromChangesInFairValue = lines[11].split()[1:-1] # 投资收益 investmentIncome = lines[12].split()[1:-1] # 其中:对联营企业和合营企业的投资收益 investmentIncomeFromRelatedEnterpriseAndJointlyOperating = lines[ 13].split()[1:-1] # 汇兑收益 exchangeGains = lines[14].split()[1:-1] # /*营业利润*/ operatingProfit = lines[15].split()[1:-1] # 加:营业外收入 nonOperatingIncome = lines[16].split()[1:-1] # 减:营业外支出 nonOperatingExpenditure = lines[17].split()[1:-1] # 其中:非流动资产处置损失 disposalLossOnNonCurrentLiability = lines[18].split()[1:-1] # /*利润总额*/ totalProfits = lines[19].split()[1:-1] # 减:所得税费用 incomeTaxExpense = lines[20].split()[1:-1] # /*净利润*/ netProfit = lines[21].split()[1:-1] # 归属于母公司所有者的净利润 netProfitAttributedToParentCompanyOwner = lines[22].split()[1:-1] # 少数股东损益 minorityInterestIncome = lines[23].split()[1:-1] # /*每股收益*/ # 基本每股收益(元/股) EPS = lines[25].split()[1:-1] # 稀释每股收益(元/股) dilutedEPS = lines[26].split()[1:-1] # /*其他综合收益*/ otherComprehensiveIncome = lines[27].split()[1:-1] # /*综合收益总额*/ accumulatedOtherComprehensiveIncome = lines[28].split()[1:-1] # 归属于母公司所有者的综合收益总额 attributableToOwnersOfParentCompany = lines[29].split()[1:-1] # 归属于少数股东的综合收益总额 attributableToMinorityShareholders = lines[30].split()[1:-1] result_list = [] for idx, _ in enumerate(reportDate): if start_date: if pd.Timestamp(reportDate[idx]) < pd.Timestamp(start_date): continue if report_period and not is_same_date(report_period, reportDate[idx]): continue reportEventDate = get_report_event_date( security_item, report_date=reportDate[idx]) # use report_event_date to filter the reportEventDate before it for not getting future data if report_event_date and pd.Timestamp( report_event_date) < pd.Timestamp(reportEventDate): continue the_json = { "id": '{}_{}'.format(security_item["id"], reportDate[idx]), "reportDate": to_time_str(reportDate[idx]), "reportEventDate": reportEventDate, "securityId": security_item["id"], "code": security_item["code"], # /*营业总收入*/ # 营业收入 "operatingRevenue": to_float(operatingRevenue[idx]), # /*营业总成本*/ "operatingTotalCosts": to_float(operatingTotalCosts[idx]), # 营业成本 "operatingCosts": to_float(operatingCosts[idx]), # 营业税金及附加 "businessTaxesAndSurcharges": to_float(businessTaxesAndSurcharges[idx]), # 销售费用 "sellingExpenses": to_float(sellingExpenses[idx]), # 管理费用 "ManagingCosts": to_float(ManagingCosts[idx]), # 财务费用 "financingExpenses": to_float(financingExpenses[idx]), # 资产减值损失 "assetsDevaluation": to_float(assetsDevaluation[idx]), # 公允价值变动收益 "incomeFromChangesInFairValue": to_float(incomeFromChangesInFairValue[idx]), # 投资收益 "investmentIncome": to_float(investmentIncome[idx]), # 其中:对联营企业和合营企业的投资收益 "investmentIncomeFromRelatedEnterpriseAndJointlyOperating": investmentIncomeFromRelatedEnterpriseAndJointlyOperating[idx], # 汇兑收益 "exchangeGains": to_float(exchangeGains[idx]), # /*营业利润*/ "operatingProfit": to_float(operatingProfit[idx]), # 加:营业外收入 "nonOperatingIncome": to_float(nonOperatingIncome[idx]), # 减:营业外支出 "nonOperatingExpenditure": to_float(nonOperatingExpenditure[idx]), # 其中:非流动资产处置损失 "disposalLossOnNonCurrentLiability": to_float(disposalLossOnNonCurrentLiability[idx]), # /*利润总额*/ "totalProfits": to_float(totalProfits[idx]), # 减:所得税费用 "incomeTaxExpense": to_float(incomeTaxExpense[idx]), # /*净利润*/ "netProfit": to_float(netProfit[idx]), # 归属于母公司所有者的净利润 "netProfitAttributedToParentCompanyOwner": to_float(netProfitAttributedToParentCompanyOwner[idx]), # 少数股东损益 "minorityInterestIncome": to_float(minorityInterestIncome[idx]), # /*每股收益*/ # 基本每股收益(元/股) "EPS": to_float(EPS[idx]), # 稀释每股收益(元/股) "dilutedEPS": to_float(dilutedEPS[idx]), # /*其他综合收益*/ "otherComprehensiveIncome": to_float(otherComprehensiveIncome[idx]), # /*综合收益总额*/ "accumulatedOtherComprehensiveIncome": to_float(accumulatedOtherComprehensiveIncome[idx]), # 归属于母公司所有者的综合收益总额 "attributableToOwnersOfParentCompany": to_float(attributableToOwnersOfParentCompany[idx]), # 归属于少数股东的综合收益总额 "attributableToMinorityShareholders": to_float(attributableToMinorityShareholders[idx]) } the_data = the_json if return_type == 'doc': the_data = IncomeStatement(meta={'id': the_json['id']}) fill_doc_type(the_data, the_json) if report_period and is_same_date(report_period, reportDate[idx]): return the_data result_list.append(the_data) if result_list: result_list = sorted(result_list, key=lambda x: pd.Timestamp(x['reportDate'])) return result_list
def get_income_statement_items(security_item, start_date=None, report_period=None, report_event_date=None): path = get_income_statement_path(security_item) if not os.path.exists(path): return [] encoding = settings.DOWNLOAD_TXT_ENCODING if settings.DOWNLOAD_TXT_ENCODING else detect_encoding( url='file://' + os.path.abspath(path)).get('encoding') with open(path, encoding=encoding) as fr: lines = fr.readlines() # for idx, line in enumerate(lines): # yield idx, line.split() reportDate = lines[0].split()[1:-1] # /*营业总收入*/ # 营业收入 operatingRevenue = lines[2].split()[1:-1] # /*营业总成本*/ OperatingTotalCosts = lines[4].split()[1:-1] # 营业成本 OperatingCosts = lines[5].split()[1:-1] # 营业税金及附加 businessTaxesAndSurcharges = lines[6].split()[1:-1] # 销售费用 sellingExpenses = lines[7].split()[1:-1] # 管理费用 ManagingCosts = lines[8].split()[1:-1] # 财务费用 financingExpenses = lines[9].split()[1:-1] # 资产减值损失 assetsDevaluation = lines[10].split()[1:-1] # 公允价值变动收益 incomeFromChangesInFairValue = lines[11].split()[1:-1] # 投资收益 investmentIncome = lines[12].split()[1:-1] # 其中:对联营企业和合营企业的投资收益 investmentIncomeFromRelatedEnterpriseAndJointlyOperating = lines[ 13].split()[1:-1] # 汇兑收益 exchangeGains = lines[14].split()[1:-1] # /*营业利润*/ operatingProfit = lines[15].split()[1:-1] # 加:营业外收入 nonOperatingIncome = lines[16].split()[1:-1] # 减:营业外支出 nonOperatingExpenditure = lines[17].split()[1:-1] # 其中:非流动资产处置损失 disposalLossOnNonCurrentLiability = lines[18].split()[1:-1] # /*利润总额*/ totalProfits = lines[19].split()[1:-1] # 减:所得税费用 incomeTaxExpense = lines[20].split()[1:-1] # /*净利润*/ netProfit = lines[21].split()[1:-1] # 归属于母公司所有者的净利润 netProfitAttributedToParentCompanyOwner = lines[22].split()[1:-1] # 少数股东损益 minorityInterestIncome = lines[23].split()[1:-1] # /*每股收益*/ # 基本每股收益(元/股) EPS = lines[25].split()[1:-1] # 稀释每股收益(元/股) dilutedEPS = lines[26].split()[1:-1] # /*其他综合收益*/ otherComprehensiveIncome = lines[27].split()[1:-1] # /*综合收益总额*/ accumulatedOtherComprehensiveIncome = lines[28].split()[1:-1] # 归属于母公司所有者的综合收益总额 attributableToOwnersOfParentCompany = lines[29].split()[1:-1] # 归属于少数股东的综合收益总额 attributableToMinorityShareholders = lines[30].split()[1:-1] result_json = [] for idx, _ in enumerate(reportDate): if start_date: if pd.Timestamp(reportDate[idx]) < pd.Timestamp(start_date): continue if report_period and not is_same_date(report_period, reportDate[idx]): continue reportEventDate = get_report_event_date( security_item, report_date=reportDate[idx]) # use report_event_date to filter the reportEventDate before it for not getting future data if report_event_date and pd.Timestamp( report_event_date) < pd.Timestamp(reportEventDate): continue the_json = { "id": '{}_{}'.format(security_item["id"], reportDate[idx]), "reportDate": to_time_str(reportDate[idx]), "reportEventDate": reportEventDate, "securityId": security_item["id"], "code": security_item["code"], # /*营业总收入*/ # 营业收入 "operatingRevenue": to_float(operatingRevenue[idx]), # /*营业总成本*/ "OperatingTotalCosts": to_float(OperatingTotalCosts[idx]), # 营业成本 "OperatingCosts": to_float(OperatingCosts[idx]), # 营业税金及附加 "businessTaxesAndSurcharges": to_float(businessTaxesAndSurcharges[idx]), # 销售费用 "sellingExpenses": to_float(sellingExpenses[idx]), # 管理费用 "ManagingCosts": to_float(ManagingCosts[idx]), # 财务费用 "financingExpenses": to_float(financingExpenses[idx]), # 资产减值损失 "assetsDevaluation": to_float(assetsDevaluation[idx]), # 公允价值变动收益 "incomeFromChangesInFairValue": to_float(incomeFromChangesInFairValue[idx]), # 投资收益 "investmentIncome": to_float(investmentIncome[idx]), # 其中:对联营企业和合营企业的投资收益 "investmentIncomeFromRelatedEnterpriseAndJointlyOperating": investmentIncomeFromRelatedEnterpriseAndJointlyOperating[idx], # 汇兑收益 "exchangeGains": to_float(exchangeGains[idx]), # /*营业利润*/ "operatingProfit": to_float(operatingProfit[idx]), # 加:营业外收入 "nonOperatingIncome": to_float(nonOperatingIncome[idx]), # 减:营业外支出 "nonOperatingExpenditure": to_float(nonOperatingExpenditure[idx]), # 其中:非流动资产处置损失 "disposalLossOnNonCurrentLiability": to_float(disposalLossOnNonCurrentLiability[idx]), # /*利润总额*/ "totalProfits": to_float(totalProfits[idx]), # 减:所得税费用 "incomeTaxExpense": to_float(incomeTaxExpense[idx]), # /*净利润*/ "netProfit": to_float(netProfit[idx]), # 归属于母公司所有者的净利润 "netProfitAttributedToParentCompanyOwner": to_float(netProfitAttributedToParentCompanyOwner[idx]), # 少数股东损益 "minorityInterestIncome": to_float(minorityInterestIncome[idx]), # /*每股收益*/ # 基本每股收益(元/股) "EPS": to_float(EPS[idx]), # 稀释每股收益(元/股) "dilutedEPS": to_float(dilutedEPS[idx]), # /*其他综合收益*/ "otherComprehensiveIncome": to_float(otherComprehensiveIncome[idx]), # /*综合收益总额*/ "accumulatedOtherComprehensiveIncome": to_float(accumulatedOtherComprehensiveIncome[idx]), # 归属于母公司所有者的综合收益总额 "attributableToOwnersOfParentCompany": to_float(attributableToOwnersOfParentCompany[idx]), # 归属于少数股东的综合收益总额 "attributableToMinorityShareholders": to_float(attributableToMinorityShareholders[idx]) } if report_period and is_same_date(report_period, reportDate[idx]): return the_json result_json.append(the_json) if result_json: result_json = sorted(result_json, key=lambda x: pd.Timestamp(x['reportDate'])) return result_json
def get_kdata(security_item, the_date=None, start_date=None, end_date=None, fuquan='bfq', dtype=None, source='163', level='day'): """ get kdata. Parameters ---------- security_item : SecurityItem or str the security item,id or code the_date : TimeStamp str or TimeStamp get the kdata for the exact date start_date : TimeStamp str or TimeStamp start date end_date : TimeStamp str or TimeStamp end date fuquan : str {"qfq","hfq","bfq"},default:"bfq" dtype : type the data type for the csv column,default: None source : str the data source,{'163','sina'},default: '163' level : str or int the kdata level,{1,5,15,30,60,'day','week','month'},default : 'day' Returns ------- DataFrame """ security_item = to_security_item(security_item) # 163的数据是合并过的,有复权因子,都存在'bfq'目录下,只需从一个地方取数据,并做相应转换 if source == '163': the_path = files_contract.get_kdata_path(security_item, source=source, fuquan='bfq') else: the_path = files_contract.get_kdata_path(security_item, source=source, fuquan=fuquan) if os.path.isfile(the_path): if not dtype: dtype = {"code": str, 'timestamp': str} df = pd.read_csv(the_path, dtype=dtype) df.timestamp = df.timestamp.apply(lambda x: to_time_str(x)) df = df.set_index(df['timestamp'], drop=False) df.index = pd.to_datetime(df.index) df = df.sort_index() if the_date: if the_date in df.index: return df.loc[the_date] else: return pd.DataFrame() if not start_date: if security_item['type'] == 'stock': if type(security_item['listDate']) != str and np.isnan( security_item['listDate']): start_date = '2002-01-01' else: start_date = security_item['listDate'] else: start_date = datetime.datetime.today() - datetime.timedelta( days=30) if not end_date: end_date = datetime.datetime.today() if start_date and end_date: df = df.loc[start_date:end_date] # if source == '163' and security_item['type'] == 'stock': if fuquan == 'bfq': return df if 'factor' in df.columns: current_factor = df.tail(1).factor.iat[0] # 后复权是不变的 df.close *= df.factor df.open *= df.factor df.high *= df.factor df.low *= df.factor if fuquan == 'qfq': # 前复权需要根据最新的factor往回算 df.close /= current_factor df.open /= current_factor df.high /= current_factor df.low /= current_factor return df return pd.DataFrame()
def get_balance_sheet_items(security_item, start_date=None, report_period=None, report_event_date=None): path = get_balance_sheet_path(security_item) if not os.path.exists(path): return [] encoding = settings.DOWNLOAD_TXT_ENCODING if settings.DOWNLOAD_TXT_ENCODING else detect_encoding( url='file://' + os.path.abspath(path)).get('encoding') with open(path, encoding=encoding) as fr: lines = fr.readlines() # for idx, line in enumerate(lines): # yield idx, line.split() reportDate = lines[0].split()[1:-1] # 货币资金 moneyFunds = lines[3].split()[1:-1] # 交易性金融资产 heldForTradingFinancialAssets = lines[4].split()[1:-1] # 衍生金融资产 derivative = lines[5].split()[1:-1] # 应收票据 billsReceivable = lines[6].split()[1:-1] # 应收账款 accountsReceivable = lines[7].split()[1:-1] # 预付款项 prepaidAccounts = lines[8].split()[1:-1] # 应收利息 interestReceivable = lines[9].split()[1:-1] # 应收股利 dividendReceivable = lines[10].split()[1:-1] # 其他应收款 otherReceivables = lines[11].split()[1:-1] # 买入返售金融资产 buyingBackTheSaleOfFinancialAssets = lines[12].split()[1:-1] # 存货 inventory = lines[13].split()[1:-1] # 划分为持有待售的资产 assetsForSale = lines[14].split()[1:-1] # 一年内到期的非流动资产 nonCurrentAssetsDueWithinOneYear = lines[15].split()[1:-1] # 待摊费用 unamortizedExpenditures = lines[16].split()[1:-1] # 待处理流动资产损益 waitDealIntangibleAssetsLossOrIncome = lines[17].split()[1:-1] # 其他流动资产 otherCurrentAssets = lines[18].split()[1:-1] # 流动资产合计 totalCurrentAssets = lines[19].split()[1:-1] # 非流动资产 # 发放贷款及垫款 loansAndPaymentsOnBehalf = lines[21].split()[1:-1] # 可供出售金融资产 availableForSaleFinancialAssets = lines[22].split()[1:-1] # 持有至到期投资 heldToMaturityInvestment = lines[23].split()[1:-1] # 长期应收款 longTermReceivables = lines[24].split()[1:-1] # 长期股权投资 longTermEquityInvestment = lines[25].split()[1:-1] # 投资性房地产 investmentRealEstate = lines[26].split()[1:-1] # 固定资产净额 NetfixedAssets = lines[27].split()[1:-1] # 在建工程 constructionInProcess = lines[28].split()[1:-1] # 工程物资 engineerMaterial = lines[29].split()[1:-1] # 固定资产清理 fixedAssetsInLiquidation = lines[30].split()[1:-1] # 生产性生物资产 productiveBiologicalAssets = lines[31].split()[1:-1] # 公益性生物资产 nonProfitLivingAssets = lines[32].split()[1:-1] # 油气资产 oilAndGasAssets = lines[33].split()[1:-1] # 无形资产 intangibleAssets = lines[34].split()[1:-1] # 开发支出 developmentExpenditure = lines[35].split()[1:-1] # 商誉 goodwill = lines[36].split()[1:-1] # 长期待摊费用 longTermDeferredExpenses = lines[37].split()[1:-1] # 递延所得税资产 deferredIncomeTaxAssets = lines[38].split()[1:-1] # 其他非流动资产 OtherNonCurrentAssets = lines[39].split()[1:-1] # 非流动资产合计 nonCurrentAssets = lines[40].split()[1:-1] # 资产总计 totalAssets = lines[41].split()[1:-1] # / *流动负债 * / # 短期借款 shortTermBorrowing = lines[43].split()[1:-1] # 交易性金融负债 transactionFinancialLiabilities = lines[44].split()[1:-1] # 应付票据 billsPayable = lines[45].split()[1:-1] # 应付账款 accountsPayable = lines[46].split()[1:-1] # 预收款项 accountsReceivedInAdvance = lines[47].split()[1:-1] # 应付手续费及佣金 handlingChargesAndCommissionsPayable = lines[48].split()[1:-1] # 应付职工薪酬 employeeBenefitsPayable = lines[49].split()[1:-1] # 应交税费 taxesAndSurchargesPayable = lines[50].split()[1:-1] # 应付利息 interestPayable = lines[51].split()[1:-1] # 应付股利 dividendpayable = lines[52].split()[1:-1] # 其他应付款 otherPayables = lines[53].split()[1:-1] # 预提费用 withholdingExpenses = lines[54].split()[1:-1] # 一年内的递延收益 deferredIncomeWithinOneYear = lines[55].split()[1:-1] # 应付短期债券 shortTermDebenturesPayable = lines[56].split()[1:-1] # 一年内到期的非流动负债 nonCurrentLiabilitiesMaturingWithinOneYear = lines[57].split()[1:-1] # 其他流动负债 otherCurrentLiability = lines[58].split()[1:-1] # 流动负债合计 totalCurrentLiabilities = lines[59].split()[1:-1] # / *非流动负债 * / # 长期借款 LongTermBorrowing = lines[61].split()[1:-1] # 应付债券 bondPayable = lines[62].split()[1:-1] # 长期应付款 longTermPayables = lines[63].split()[1:-1] # 长期应付职工薪酬 longTermEmployeeBenefitsPayable = lines[64].split()[1:-1] # 专项应付款 specialPayable = lines[65].split()[1:-1] # 预计非流动负债 expectedNonCurrentLiabilities = lines[66].split()[1:-1] # 递延所得税负债 deferredIncomeTaxLiabilities = lines[67].split()[1:-1] # 长期递延收益 longTermDeferredRevenue = lines[68].split()[1:-1] # 其他非流动负债 otherNonCurrentLiabilities = lines[69].split()[1:-1] # 非流动负债合计 totalNonCurrentLiabilities = lines[70].split()[1:-1] # 负债合计 totalLiabilities = lines[71].split()[1:-1] # / *所有者权益 * / # 实收资本(或股本) totalShareCapital = lines[73].split()[1:-1] # 资本公积 capitalSurplus = lines[74].split()[1:-1] # 减:库存股 treasuryStock = lines[75].split()[1:-1] # 其他综合收益 otherComprehensiveIncome = lines[76].split()[1:-1] # 专项储备 theSpecialReserve = lines[77].split()[1:-1] # 盈余公积 surplusReserves = lines[78].split()[1:-1] # 一般风险准备 generalRiskPreparation = lines[79].split()[1:-1] # 未分配利润 undistributedProfits = lines[80].split()[1:-1] # 归属于母公司股东权益合计(净资产) bookValue = lines[81].split()[1:-1] # 少数股东权益 minorityBookValue = lines[82].split()[1:-1] # 所有者权益(或股东权益)合计 totalBookValue = lines[83].split()[1:-1] # 负债和所有者权益(或股东权益)总计 totalLiabilitiesAndOwnersEquity = lines[84].split()[1:-1] result_json = [] for idx, _ in enumerate(reportDate): if start_date: if pd.Timestamp(reportDate[idx]) < pd.Timestamp(start_date): continue reportEventDate = get_report_event_date(security_item, report_date=reportDate[idx]) if report_period and not is_same_date(report_period, reportDate[idx]): continue # use report_event_date to filter the reportEventDate before it for not getting future data if report_event_date and pd.Timestamp(report_event_date) < pd.Timestamp(reportEventDate): continue the_json = { "id": '{}_{}'.format(security_item["id"], reportDate[idx]), "reportDate": to_time_str(reportDate[idx]), "reportEventDate": reportEventDate, "securityId": security_item["id"], "code": security_item["code"], # 货币资金 "moneyFunds": to_float(moneyFunds[idx]), # 交易性金融资产 "heldForTradingFinancialAssets": to_float(heldForTradingFinancialAssets[idx]), # 衍生金融资产 "derivative": to_float(derivative[idx]), # 应收票据 "billsReceivable": to_float(billsReceivable[idx]), # 应收账款 "accountsReceivable": to_float(accountsReceivable[idx]), # 预付款项 "prepaidAccounts": to_float(prepaidAccounts[idx]), # 应收利息 "interestReceivable": to_float(interestReceivable[idx]), # 应收股利 "dividendReceivable": to_float(dividendReceivable[idx]), # 其他应收款 "otherReceivables": to_float(otherReceivables[idx]), # 买入返售金融资产 "buyingBackTheSaleOfFinancialAssets": to_float(buyingBackTheSaleOfFinancialAssets[idx]), # 存货 "inventory": to_float(inventory[idx]), # 划分为持有待售的资产 "assetsForSale": to_float(assetsForSale[idx]), # 一年内到期的非流动资产 "nonCurrentAssetsDueWithinOneYear": to_float(nonCurrentAssetsDueWithinOneYear[idx]), # 待摊费用 "unamortizedExpenditures": to_float(unamortizedExpenditures[idx]), # 待处理流动资产损益 "waitDealIntangibleAssetsLossOrIncome": to_float(waitDealIntangibleAssetsLossOrIncome[idx]), # 其他流动资产 "otherCurrentAssets": to_float(otherCurrentAssets[idx]), # 流动资产合计 "totalCurrentAssets": to_float(totalCurrentAssets[idx]), # 非流动资产 # 发放贷款及垫款 "loansAndPaymentsOnBehalf": to_float(loansAndPaymentsOnBehalf[idx]), # 可供出售金融资产 "availableForSaleFinancialAssets": to_float(availableForSaleFinancialAssets[idx]), # 持有至到期投资 "heldToMaturityInvestment": to_float(heldToMaturityInvestment[idx]), # 长期应收款 "longTermReceivables": to_float(longTermReceivables[idx]), # 长期股权投资 "longTermEquityInvestment": to_float(longTermEquityInvestment[idx]), # 投资性房地产 "investmentRealEstate": to_float(investmentRealEstate[idx]), # 固定资产净额 "NetfixedAssets": to_float(NetfixedAssets[idx]), # 在建工程 "constructionInProcess": to_float(constructionInProcess[idx]), # 工程物资 "engineerMaterial": to_float(engineerMaterial[idx]), # 固定资产清理 "fixedAssetsInLiquidation": to_float(fixedAssetsInLiquidation[idx]), # 生产性生物资产 "productiveBiologicalAssets": to_float(productiveBiologicalAssets[idx]), # 公益性生物资产 "nonProfitLivingAssets": to_float(nonProfitLivingAssets[idx]), # 油气资产 "oilAndGasAssets": to_float(oilAndGasAssets[idx]), # 无形资产 "intangibleAssets": to_float(intangibleAssets[idx]), # 开发支出 "developmentExpenditure": to_float(developmentExpenditure[idx]), # 商誉 "goodwill": to_float(goodwill[idx]), # 长期待摊费用 "longTermDeferredExpenses": to_float(longTermDeferredExpenses[idx]), # 递延所得税资产 "deferredIncomeTaxAssets": to_float(deferredIncomeTaxAssets[idx]), # 其他非流动资产 "OtherNonCurrentAssets": to_float(OtherNonCurrentAssets[idx]), # 非流动资产合计 "nonCurrentAssets": to_float(nonCurrentAssets[idx]), # 资产总计 "totalAssets": to_float(totalAssets[idx]), # / *流动负债 * / # 短期借款 "shortTermBorrowing": to_float(shortTermBorrowing[idx]), # 交易性金融负债 "transactionFinancialLiabilities": to_float(transactionFinancialLiabilities[idx]), # 应付票据 "billsPayable": to_float(billsPayable[idx]), # 应付账款 "accountsPayable": to_float(accountsPayable[idx]), # 预收款项 "accountsReceivedInAdvance": to_float(accountsReceivedInAdvance[idx]), # 应付手续费及佣金 "handlingChargesAndCommissionsPayable": to_float(handlingChargesAndCommissionsPayable[idx]), # 应付职工薪酬 "employeeBenefitsPayable": to_float(employeeBenefitsPayable[idx]), # 应交税费 "taxesAndSurchargesPayable": to_float(taxesAndSurchargesPayable[idx]), # 应付利息 "interestPayable": to_float(interestPayable[idx]), # 应付股利 "dividendpayable": to_float(dividendpayable[idx]), # 其他应付款 "otherPayables": to_float(otherPayables[idx]), # 预提费用 "withholdingExpenses": to_float(withholdingExpenses[idx]), # 一年内的递延收益 "deferredIncomeWithinOneYear": to_float(deferredIncomeWithinOneYear[idx]), # 应付短期债券 "shortTermDebenturesPayable": to_float(shortTermDebenturesPayable[idx]), # 一年内到期的非流动负债 "nonCurrentLiabilitiesMaturingWithinOneYear": to_float(nonCurrentLiabilitiesMaturingWithinOneYear[idx]), # 其他流动负债 "otherCurrentLiability": to_float(otherCurrentLiability[idx]), # 流动负债合计 "totalCurrentLiabilities": to_float(totalCurrentLiabilities[idx]), # / *非流动负债 * / # 长期借款 "LongTermBorrowing": to_float(LongTermBorrowing[idx]), # 应付债券 "bondPayable": to_float(bondPayable[idx]), # 长期应付款 "longTermPayables": to_float(longTermPayables[idx]), # 长期应付职工薪酬 "longTermEmployeeBenefitsPayable": to_float(longTermEmployeeBenefitsPayable[idx]), # 专项应付款 "specialPayable": to_float(specialPayable[idx]), # 预计非流动负债 "expectedNonCurrentLiabilities": to_float(expectedNonCurrentLiabilities[idx]), # 递延所得税负债 "deferredIncomeTaxLiabilities": to_float(deferredIncomeTaxLiabilities[idx]), # 长期递延收益 "longTermDeferredRevenue": to_float(longTermDeferredRevenue[idx]), # 其他非流动负债 "otherNonCurrentLiabilities": to_float(otherNonCurrentLiabilities[idx]), # 非流动负债合计 "totalNonCurrentLiabilities": to_float(totalNonCurrentLiabilities[idx]), # 负债合计 "totalLiabilities": to_float(totalLiabilities[idx]), # / *所有者权益 * / # 实收资本(或股本) "totalShareCapital": to_float(totalShareCapital[idx]), # 资本公积 "capitalSurplus": to_float(capitalSurplus[idx]), # 减:库存股 "treasuryStock": to_float(treasuryStock[idx]), # 其他综合收益 "otherComprehensiveIncome": to_float(otherComprehensiveIncome[idx]), # 专项储备 "theSpecialReserve": to_float(theSpecialReserve[idx]), # 盈余公积 "surplusReserves": to_float(surplusReserves[idx]), # 一般风险准备 "generalRiskPreparation": to_float(generalRiskPreparation[idx]), # 未分配利润 "undistributedProfits": to_float(undistributedProfits[idx]), # 归属于母公司股东权益合计(净资产) "bookValue": to_float(bookValue[idx]), # 少数股东权益 "minorityBookValue": to_float(minorityBookValue[idx]), # 所有者权益(或股东权益)合计 "totalBookValue": to_float(totalBookValue[idx]), # 负债和所有者权益(或股东权益)总计 "totalLiabilitiesAndOwnersEquity": to_float(totalLiabilitiesAndOwnersEquity[idx]) } if report_period and is_same_date(report_period, reportDate[idx]): return the_json result_json.append(the_json) if (result_json): result_json = sorted(result_json, key=lambda x: pd.Timestamp(x['reportDate'])) return result_json
def get_subscription_triggered_topic(the_date): return 'subscription_triggered_{}'.format(to_time_str(the_date))