Exemplo n.º 1
0
    def init_new_computing_interval(self, event_timestamp):
        self.last_timestamp = to_timestamp(event_timestamp)
        self.kdata_timestamp = self.last_timestamp + timedelta(seconds=-self.last_timestamp.second,
                                                               microseconds=-self.last_timestamp.microsecond)

        self.last_day_time_str = to_time_str(self.kdata_timestamp)
        self.last_mirco_time_str = to_time_str(self.kdata_timestamp, time_fmt=TIME_FORMAT_MICRO)
Exemplo n.º 2
0
    def on_event(self, event_item):
        self.logger.debug(event_item)
        if not self.last_date or not is_same_date(self.last_date,
                                                  self.current_time):
            self.last_date = to_timestamp(
                event_item['timestamp']) - timedelta(days=1)
            self.last_kdata = get_kdata(self.security_item,
                                        the_date=to_time_str(self.last_date))

            if self.last_kdata is None:
                fetch_kdata(exchange_str=self.security_item['exchange'])
                self.last_kdata = get_kdata(self.security_item,
                                            the_date=to_time_str(
                                                self.last_date))

            if self.last_kdata is not None:
                self.last_close = self.last_kdata.loc[
                    to_time_str(self.last_date), 'close']
            else:
                self.logger.exception("could not get last close for:{}".format(
                    self.last_date))

            self.update_today_triggered()

        change_pct = (event_item['price'] - self.last_close) / self.last_close

        self.logger.info(
            "{} last day close is:{},now price is:{},the change_pct is:{}".
            format(self.security_item['id'], self.last_close,
                   event_item['price'], change_pct))
        self.check_subscription(current_price=event_item['price'],
                                change_pct=change_pct)
Exemplo n.º 3
0
def eos_account_to_es():
    account = db.accounts
    count = account.count()

    logger.info("current account size:{}".format(count))

    actions = []

    # {
    #     "_id": ObjectId("5b6651aa30cafb28be710275"),
    #     "name": "eosio.ram",
    #     "create_time": ISODate("2018-06-09T11:57:39.000Z"),
    #     "liquid_eos": NumberLong(26757051448),
    #     "stacked_eos": NumberLong(0),
    #     "total_eos": NumberLong(26757051448),
    #     "unstacking_eos": NumberLong(0)
    # }
    start = 0
    size = 1000
    while True:
        for item in account.find().skip(start).limit(size):
            liquidEos = item.get('liquid_eos', 0)
            stackedEos = item.get('stacked_eos', 0)
            unstackingEos = item.get('unstacking_eos', 0)
            totalEos = item.get('total_eos', 0)
            createTime = item.get('create_time', datetime.now())
            json_item = {
                "id": str(item["_id"]),
                "userId": item["name"],
                "liquidEos": liquidEos,
                "stackedEos": stackedEos,
                "totalEos": totalEos,
                "unstackingEos": unstackingEos,
                "timestamp": to_time_str(createTime),
                "updateTimestamp": to_time_str(datetime.now())
            }
            eos_account = EosAccount(meta={
                'id': json_item['id'],
                'index': "eos_account"
            })
            fill_doc_type(eos_account, json_item)

            actions.append(eos_account.to_dict(include_meta=True))

        if actions:
            resp = elasticsearch.helpers.bulk(es_client, actions)
            logger.info("index to {} success:{} failed:{}".format(
                "eos_account", resp[0], len(resp[1])))
            if resp[1]:
                logger.error("index to {} error:{}".format(
                    "eos_account", resp[1]))

        if len(actions) < size:
            break
        actions = []

        start += (size - 1)
Exemplo n.º 4
0
def es_get_statistic(security_item,
                     the_date=None,
                     start_date=None,
                     end_date=None,
                     level='day',
                     from_idx=0,
                     size=500):
    security_item = to_security_item(security_item)

    index = get_es_statistic_index(security_type=security_item['type'],
                                   exchange=security_item['exchange'],
                                   level=level)
    # 单日的日k线直接按id获取
    if level == 'day' and the_date:
        doc_id = '{}_{}'.format(security_item['id'], to_time_str(the_date))
        return es_client.get_source(index=index, doc_type='doc', id=doc_id)
    elif start_date and end_date:
        s = Search(using=es_client, index=index, doc_type='doc') \
            .filter('term', code=security_item['code']) \
            .filter('range', timestamp={'gte': start_date, 'lte': end_date}) \
            .sort({"timestamp": {"order": "asc"}})

        resp = s[from_idx:from_idx + size].execute()

        return es_resp_to_payload(resp)
Exemplo n.º 5
0
    def download_stock_list(self, response):
        exchange = response.meta['exchange']
        path = files_contract.get_security_list_path('stock', exchange)
        df = pd.read_csv(io.BytesIO(response.body), dtype=str)
        if df is not None:
            if os.path.exists(path):
                df_current = pd.read_csv(path, dtype=str)
                df_current = df_current.set_index('code', drop=False)
            else:
                df_current = pd.DataFrame()

            df = df.loc[:, ['Symbol', 'Name', 'IPOyear', 'Sector', 'industry']]
            df = df.dropna(subset=['Symbol', 'Name'])
            df.columns = ['code', 'name', 'listDate', 'sector', 'industry']
            df.listDate = df.listDate.apply(lambda x: to_time_str(x))
            df['exchange'] = exchange
            df['type'] = 'stock'
            df['id'] = df[['type', 'exchange',
                           'code']].apply(lambda x: '_'.join(x.astype(str)),
                                          axis=1)
            df['sinaIndustry'] = ''
            df['sinaConcept'] = ''
            df['sinaArea'] = ''
            df = df.set_index('code', drop=False)

            diff = set(df.index.tolist()) - set(df_current.index.tolist())
            diff = [item for item in diff if item != 'nan']

            if diff:
                df_current = df_current.append(df.loc[diff, :],
                                               ignore_index=False)
                df_current = df_current.loc[:, STOCK_META_COL]
                df_current.columns = STOCK_META_COL
                df_current.to_csv(path, index=False)
Exemplo n.º 6
0
    def download_stock_list(self, response):
        exchange = response.meta['exchange']
        path = files_contract.get_security_list_path('stock', exchange)
        df = pd.read_csv(io.BytesIO(response.body), dtype=str)
        if df is not None:
            if os.path.exists(path):
                df_current = pd.read_csv(path, dtype=str)
                df_current = df_current.set_index('code', drop=False)
            else:
                df_current = pd.DataFrame()

            df = df.loc[:, ['Symbol', 'Name', 'IPOyear', 'Sector', 'industry']]
            df = df.dropna(subset=['Symbol', 'Name'])
            df.columns = ['code', 'name', 'listDate', 'sector', 'industry']
            df.listDate = df.listDate.apply(lambda x: to_time_str(x))
            df['exchange'] = exchange
            df['type'] = 'stock'
            df['id'] = df[['type', 'exchange', 'code']].apply(lambda x: '_'.join(x.astype(str)), axis=1)
            df['sinaIndustry'] = ''
            df['sinaConcept'] = ''
            df['sinaArea'] = ''
            df = df.set_index('code', drop=False)

            diff = set(df.index.tolist()) - set(df_current.index.tolist())
            diff = [item for item in diff if item != 'nan']

            if diff:
                df_current = df_current.append(df.loc[diff, :], ignore_index=False)
                df_current = df_current.loc[:, STOCK_META_COL]
                df_current.columns = STOCK_META_COL
                df_current.to_csv(path, index=False)
Exemplo n.º 7
0
def es_get_kdata(security_item, the_date=None, start_date=None, end_date=None, level='day', fields=None,
                 from_idx=0, size=10):
    """
    get kdata.

    Parameters
    ----------
    security_item : SecurityItem or str
        the security item,id or code
    the_date : TimeStamp str or TimeStamp
        get the kdata for the exact date
    start_date : TimeStamp str or TimeStamp
        start date
    end_date : TimeStamp str or TimeStamp
        end date
    level : str or int
        the kdata level,{1,5,15,30,60,'day','week','month'},default : 'day'
    fields : filed list for es _source
        if not set,would use the default fields for the security type
    from_idx : int
        pagination start offset
    size : int
        pagination return size

    Returns
    -------
    JSON

    """
    security_item = to_security_item(security_item)

    index = get_es_kdata_index(security_type=security_item['type'], exchange=security_item['exchange'],
                               level=level)
    if not fields:
        if security_item['type'] == 'stock':
            fields = KDATA_STOCK_COL
        elif security_item['type'] == 'future':
            fields = KDATA_FUTURE_COL
        elif security_item['type'] == 'index':
            fields = KDATA_INDEX_COL
        else:
            fields = KDATA_COMMON_COL

    # 单日的日k线直接按id获取
    if level == 'day' and the_date:
        doc_id = '{}_{}'.format(security_item['id'], to_time_str(the_date))
        return es_client.get_source(index=index, doc_type='doc', id=doc_id, _source_include=fields)
    elif start_date and end_date:
        s = Search(using=es_client, index=index, doc_type='doc') \
            .source(include=fields) \
            .filter('term', code=security_item['code']) \
            .filter('range', timestamp={'gte': start_date, 'lte': end_date}) \
            .sort({"timestamp": {"order": "asc"}})

        resp = s[from_idx:from_idx + size].execute()

        return resp['hits'].to_dict()
Exemplo n.º 8
0
    def update_today_triggered(self):
        sub_triggered_search = SubscriptionTriggered.search()

        sub_triggered_search = sub_triggered_search.filter('term', subType='price') \
            .filter('range', timestamp={'gte': to_time_str(datetime.now())})
        results = sub_triggered_search.execute()

        for hit in results['hits']['hits']:
            json_data = hit['_source'].to_dict()
            self.has_triggered["{}_{}".format(json_data['subId'], json_data['conditionType'])] = json_data
Exemplo n.º 9
0
def fetch_kdata(exchange_str='bitstamp'):
    ccxt_exchange = eval("ccxt.{}()".format(exchange_str))
    if ccxt_exchange.has['fetchOHLCV']:
        for _, security_item in get_security_list(security_type='cryptocurrency', exchanges=[exchange_str]).iterrows():
            try:
                if security_item['name'] not in CRYPTOCURRENCY_PAIR:
                    continue

                start_date, df = get_latest_download_trading_date(security_item)
                # 日K线只抓到昨天
                end_date = pd.Timestamp.today() - pd.DateOffset(1)

                if start_date and (start_date > end_date):
                    logger.info("{} kdata is ok".format(security_item['code']))
                    continue

                try:
                    kdatas = ccxt_exchange.fetch_ohlcv(security_item['name'], timeframe='1d')
                    # for rateLimit
                    time.sleep(5)
                except Exception as e:
                    logger.exception("fetch_kdata for {} {} failed".format(exchange_str, security_item['name']), e)
                    continue

                for kdata in kdatas:
                    timestamp = pd.Timestamp.fromtimestamp(int(kdata[0] / 1000))
                    if is_same_date(timestamp, pd.Timestamp.today()):
                        continue
                    kdata_json = {
                        'timestamp': to_time_str(timestamp),
                        'code': security_item['code'],
                        'name': security_item['name'],
                        'open': kdata[1],
                        'high': kdata[2],
                        'low': kdata[3],
                        'close': kdata[4],
                        'volume': kdata[5],
                        'securityId': security_item['id'],
                        'preClose': None,
                        'change': None,
                        'changePct': None
                    }
                    df = df.append(kdata_json, ignore_index=True)
                if not df.empty:
                    df = df.loc[:, KDATA_COMMON_COL]
                    kdata_df_save(df, get_kdata_path(security_item), calculate_change=True)
                    logger.info(
                        "fetch_kdata for exchange:{} security:{} success".format(exchange_str, security_item['name']))
            except Exception as e:
                logger.info(
                    "fetch_kdata for exchange:{} security:{} failed".format(exchange_str, security_item['name'], e))
    else:
        logger.warning("exchange:{} not support fetchOHLCV".format(exchange_str))
Exemplo n.º 10
0
def restore_kdata():
    for index, security_item in get_security_list(start_code='600000', end_code='600017').iterrows():
        path_163 = get_kdata_path(security_item, source='163', fuquan='bfq')
        df = pd.read_csv(path_163, dtype=str)
        df = time_index_df(df)

        if 'id' in df.columns:
            df = df.drop(['id'], axis=1)
        df = df[~df.index.duplicated(keep='first')]
        df.timestamp.apply(lambda x: to_time_str(x))
        df.to_csv(path_163, index=False)

        for fuquan in ('hfq', 'bfq'):
            path_sina = get_kdata_path(security_item, source='sina', fuquan=fuquan)
            df = pd.read_csv(path_sina, dtype=str)
            df = time_index_df(df)
            if 'id' in df.columns:
                df = df.drop(['id'], axis=1)
            df = df[~df.index.duplicated(keep='first')]
            df.timestamp = df.timestamp.apply(lambda x: to_time_str(x))
            df.to_csv(path_sina, index=False)
Exemplo n.º 11
0
def set_subscription(sub_type, id):
    the_json = request.get_json()

    if not the_json:
        return error(ERROR_NO_INPUT_JSON_PROVIDED)

    # Validate and deserialize input
    try:
        sub_dict, _ = price_subscription_shema.load(the_json)
    except ValidationError as err:
        return error(ERROR_INVALID_INPUT_JSON, err.messages)

    # the update operation
    if id:
        sub_model = PriceSubscription.get(id=id, ignore=404)
        sub_dict['id'] = id
        if not sub_model:
            logger.warning('could not find subscription:{}'.format(id))
            return error(ERROR_SUBSCRIPTION_NOT_FOUND, id)
    else:
        # generate securityId
        sub_dict['securityId'] = get_security_id(sub_dict['securityType'],
                                                 sub_dict['exchange'],
                                                 sub_dict['code'])
        # generate subscription id
        sub_dict['id'] = "{}_{}".format(sub_dict['userId'],
                                        sub_dict['securityId'])

        sub_dict['timestamp'] = to_time_str(datetime.now(),
                                            time_fmt=TIME_FORMAT_MICRO)

        sub_model = PriceSubscription(meta={'id': sub_dict['id']})

    fill_doc_type(sub_model, sub_dict)

    sub_model.save(force=True)

    result_json = sub_model.to_dict(include_meta=True)

    logger.info('subscription:{} saved'.format(result_json))

    resp = kafka_producer.send('subscription',
                               bytes(json.dumps(sub_dict), encoding='utf8'),
                               key=bytes(sub_dict['id'], encoding='utf8'),
                               timestamp_ms=int(
                                   pd.Timestamp.now().timestamp() * 1000))
    kafka_producer.flush()

    logger.info(resp)

    return success(payload=result_json)
Exemplo n.º 12
0
    def save(self,
             using=None,
             index=None,
             validate=True,
             force=True,
             **kwargs):
        # assign now if no timestamp given
        if not self.timestamp:
            self.timestamp = to_time_str(datetime.now(),
                                         time_fmt=TIME_FORMAT_MICRO)

        if force or not self.exist(index=index):
            return super().save(using, index, validate, **kwargs)
        else:
            logger.debug("doc{} exists".format(self['id']))
Exemplo n.º 13
0
def get_ticks(security_item, the_date=None, start_date=None, end_date=None):
    """
    get the ticks.

    Parameters
    ----------
    security_item : SecurityItem or str
        the security item,id or code
    the_date : TimeStamp str or TimeStamp
        get the tick for the exact date
    start_date : TimeStamp str or TimeStamp
        start date
    end_date: TimeStamp str or TimeStamp
        end date

    Yields
    -------
    DataFrame

    """

    security_item = to_security_item(security_item)

    if the_date:
        the_date = to_time_str(the_date)
        tick_path = files_contract.get_tick_path(security_item, the_date)
        yield _parse_tick(tick_path, security_item)
    else:
        tick_dir = files_contract.get_tick_dir(security_item)
        if start_date or end_date:
            if not start_date:
                start_date = security_item['listDate']
            if not end_date:
                end_date = datetime.datetime.today()
            tick_paths = [
                os.path.join(tick_dir, f) for f in os.listdir(tick_dir)
                if get_file_name(f) in pd.date_range(start=start_date,
                                                     end=end_date)
            ]
        else:
            tick_paths = [
                os.path.join(tick_dir, f) for f in os.listdir(tick_dir)
            ]

        for tick_path in sorted(tick_paths):
            yield _parse_tick(tick_path, security_item)
Exemplo n.º 14
0
    def download_sp500_pe(self, response):
        trs = response.xpath('//*[@id="datatable"]/tr').extract()

        price_jsons = []

        try:
            for tr in trs[1:]:
                tds = Selector(text=tr).xpath('//td//text()').extract()
                tds = [x.strip() for x in tds if x.strip()]

                price_jsons.append({"timestamp": to_time_str(tds[0]),
                                    "pe": to_float(tds[1])})

            if price_jsons:
                self.df_pe = self.df_pe.append(price_jsons, ignore_index=True)
                self.df_pe = index_df_with_time(self.df_pe)
        except Exception as e:
            self.logger.exception('error when getting sp500 pe url={} error={}'.format(response.url, e))
Exemplo n.º 15
0
    def download_sp500_pe(self, response):
        trs = response.xpath('//*[@id="datatable"]/tr').extract()

        price_jsons = []

        try:
            for tr in trs[1:]:
                tds = Selector(text=tr).xpath('//td//text()').extract()
                tds = [x.strip() for x in tds if x.strip()]

                price_jsons.append({"timestamp": to_time_str(tds[0]),
                                    "pe": to_float(tds[1])})

            if price_jsons:
                self.df_pe = self.df_pe.append(price_jsons, ignore_index=True)
                self.df_pe = index_df_with_time(self.df_pe)
        except Exception as e:
            self.logger.error('error when getting sp500 pe url={} error={}'.format(response.url, e))
    def start_requests(self):
        # 往年的统计数据可以直接下载,只从今年开始
        latest_trading_date = "{}0101".format(datetime.today().year)

        # 检查已经保存的
        if self.saved_trading_dates:
            latest_trading_date = self.saved_trading_dates[-1]
            latest_trading_date = next_date(latest_trading_date)

        for the_date in pd.date_range(start=latest_trading_date,
                                      end=datetime.today()):
            # 双休
            if the_date.weekday() == 5 or the_date.weekday() == 6:
                continue
            the_date_str = to_time_str(the_time=the_date, time_fmt='%Y%m%d')
            yield Request(url=self.get_trading_date_url(the_date=the_date_str),
                          meta={'the_date': the_date_str},
                          callback=self.download_trading_calendar)
Exemplo n.º 17
0
def get_kdata(security_item, the_date=None, start_date=None, end_date=None, fuquan='bfq', dtype=None, source='163',
              level='day'):
    if type(security_item) == str:
        if 'stock' in security_item:
            security_item = get_security_item(id=security_item)
        else:
            security_item = get_security_item(code=security_item)

    the_path = files_contract.get_kdata_path(security_item, source=source, fuquan=fuquan)

    if os.path.isfile(the_path):
        if not dtype:
            dtype = {"code": str, 'timestamp': str}
        df = pd.read_csv(the_path, dtype=dtype)

        df.timestamp = df.timestamp.apply(lambda x: to_time_str(x))
        df = df.set_index(df['timestamp'], drop=False)
        df.index = pd.to_datetime(df.index)
        df = df.sort_index()
        if the_date:
            if the_date in df.index:
                return df.loc[the_date]
            else:
                return pd.DataFrame()

        if not start_date:
            if type(security_item['listDate']) != str and np.isnan(security_item['listDate']):
                start_date = '2002-01-01'
            else:
                start_date = security_item['listDate']
        if not end_date:
            end_date = datetime.datetime.today()

        if start_date and end_date:
            df = df.loc[start_date:end_date]

        return df
    return pd.DataFrame()
Exemplo n.º 18
0
def get_cash_flow_statement_items(security_item, start_date=None, report_period=None, report_event_date=None):
    path = get_cash_flow_statement_path(security_item)
    if not os.path.exists(path):
        return []
    encoding = settings.DOWNLOAD_TXT_ENCODING if settings.DOWNLOAD_TXT_ENCODING else detect_encoding(
        url='file://' + os.path.abspath(path)).get('encoding')

    with open(path, encoding=encoding) as fr:
        lines = fr.readlines()
        # for idx, line in enumerate(lines):
        #     yield idx, line.split()
        reportDate = lines[0].split()[1:-1]
        # /*一、经营活动产生的现金流量*/
        # 销售商品、提供劳务收到的现金
        cashFromSellingCommoditiesOrOfferingLabor = lines[3].split()[1:-1]
        # 收到的税费返还
        refundOfTaxAndFeeReceived = lines[4].split()[1:-1]
        # 收到的其他与经营活动有关的现金
        cashReceivedRelatingToOtherOperatingActivities = lines[5].split()[1:-1]
        # 经营活动现金流入小计
        subTotalOfCashInflowsFromOperatingActivities = lines[6].split()[1:-1]
        # 购买商品、接受劳务支付的现金
        cashPaidForGoodsAndServices = lines[7].split()[1:-1]
        # 支付给职工以及为职工支付的现金
        cashPaidToAndOnBehalfOfemployees = lines[8].split()[1:-1]
        # 支付的各项税费
        paymentsOfTaxesAndSurcharges = lines[9].split()[1:-1]
        # 支付的其他与经营活动有关的现金
        cashPaidRelatingToOtherOperatingActivities = lines[10].split()[1:-1]
        # 经营活动现金流出小计
        subTotalOfCashOutflowsFromOperatingActivities = lines[11].split()[1:-1]
        # 经营活动产生的现金流量净额
        netCashFlowsFromOperatingActivities = lines[12].split()[1:-1]
        # /*二、投资活动产生的现金流量*/
        # 收回投资所收到的现金
        cashReceivedFromDisposalOfInvestments = lines[14].split()[1:-1]
        # 取得投资收益所收到的现金
        cashReceivedFromReturnsOnIvestments = lines[15].split()[1:-1]
        # 处置固定资产、无形资产和其他长期资产所收回的现金净额
        netCashReceivedFromDisposalAssets = lines[16].split()[1:-1]
        # 处置子公司及其他营业单位收到的现金净额
        netCashReceivedFromDisposalSubsidiaries = lines[17].split()[1:-1]
        # 收到的其他与投资活动有关的现金
        cashReceivedFromOtherInvesting = lines[18].split()[1:-1]
        # 投资活动现金流入小计
        subTotalOfCashInflowsFromInvesting = lines[19].split()[1:-1]
        # 购建固定资产、无形资产和其他长期资产所支付的现金
        cashPaidToAcquireFixedAssets = lines[20].split()[1:-1]
        # 投资所支付的现金
        cashPaidToAcquireInvestments = lines[21].split()[1:-1]
        # 取得子公司及其他营业单位支付的现金净额
        netCashPaidToAcquireSubsidiaries = lines[22].split()[1:-1]
        # 支付的其他与投资活动有关的现金
        cashPaidRelatingToOtherInvesting = lines[23].split()[1:-1]
        # 投资活动现金流出小计
        subTotalOfCashOutflowsFromInvesting = lines[24].split()[1:-1]
        # 投资活动产生的现金流量净额
        netCashFlowsFromInvesting = lines[25].split()[1:-1]
        # /*三、筹资活动产生的现金流量*/
        # 吸收投资收到的现金
        cashReceivedFromCapitalContributions = lines[27].split()[1:-1]
        # 其中:子公司吸收少数股东投资收到的现金
        cashReceivedFromMinorityShareholdersOfSubsidiaries = lines[28].split()[1:-1]
        # 取得借款收到的现金
        cashReceivedFromBorrowings = lines[29].split()[1:-1]
        # 发行债券收到的现金
        cashReceivedFromIssuingBonds = lines[30].split()[1:-1]
        # 收到其他与筹资活动有关的现金
        cashReceivedRelatingToOtherFinancingActivities = lines[31].split()[1:-1]
        # 筹资活动现金流入小计
        subTotalOfCashInflowsFromFinancingActivities = lines[32].split()[1:-1]
        # 偿还债务支付的现金
        cashRepaymentsOfBorrowings = lines[33].split()[1:-1]
        # 分配股利、利润或偿付利息所支付的现金
        cashPaymentsForInterestExpensesAndDistributionOfDividendsOrProfits = lines[34].split()[1:-1]
        # 其中:子公司支付给少数股东的股利、利润
        cashPaymentsForDividendsOrProfitToMinorityShareholders = lines[35].split()[1:-1]
        # 支付其他与筹资活动有关的现金
        cashPaymentsRelatingToOtherFinancingActivities = lines[36].split()[1:-1]
        # 筹资活动现金流出小计
        subTotalOfCashOutflowsFromFinancingActivities = lines[37].split()[1:-1]
        # 筹资活动产生的现金流量净额
        netCashFlowsFromFinancingActivities = lines[38].split()[1:-1]
        # /*四、汇率变动对现金及现金等价物的影响*/
        effectOfForeignExchangeRate = lines[39].split()[1:-1]
        # /*五、现金及现金等价物净增加额*/
        netIncreaseInCash = lines[40].split()[1:-1]
        # 加:期初现金及现金等价物余额
        cashAtBeginningOfyear = lines[41].split()[1:-1]
        # /*六、期末现金及现金等价物余额*/
        cashAtEndOfyear = lines[42].split()[1:-1]
        # /*附注*/
        # 净利润
        netProfit = lines[44].split()[1:-1]
        # 少数股东权益
        minorityBookValue = lines[45].split()[1:-1]
        # 未确认的投资损失
        unrealisedInvestmentLosses = lines[46].split()[1:-1]
        # 资产减值准备
        allowanceForAssetDevaluation = lines[47].split()[1:-1]
        # 固定资产折旧、油气资产折耗、生产性物资折旧
        depreciationOfFixedAssets = lines[48].split()[1:-1]
        # 无形资产摊销
        amorizationOfIntangibleAssets = lines[49].split()[1:-1]
        # 长期待摊费用摊销
        longTermDeferredExpenses = lines[50].split()[1:-1]
        # 待摊费用的减少
        decreaseOfDeferredExpenses = lines[51].split()[1:-1]
        # 预提费用的增加
        IncreaseOfwithholdingExpenses = lines[52].split()[1:-1]
        # 处置固定资产、无形资产和其他长期资产的损失
        lossOnDisposalOfFixedAssets = lines[53].split()[1:-1]
        # 固定资产报废损失
        lossOnFixedAssetsDamaged = lines[54].split()[1:-1]
        # 公允价值变动损失
        lossOnFairValueChange = lines[55].split()[1:-1]
        # 递延收益增加(减:减少)
        changeOnDeferredRevenue = lines[56].split()[1:-1]
        # 预计负债
        estimatedLiabilities = lines[57].split()[1:-1]
        # 财务费用
        financingExpenses = lines[58].split()[1:-1]
        # 投资损失
        investmentLoss = lines[59].split()[1:-1]
        # 递延所得税资产减少
        decreaseOnDeferredIncomeTaxAssets = lines[60].split()[1:-1]
        # 递延所得税负债增加
        increaseOnDeferredIncomeTaxLiabilities = lines[61].split()[1:-1]
        # 存货的减少
        decreaseInInventories = lines[62].split()[1:-1]
        # 经营性应收项目的减少
        decreaseInReceivablesUnderOperatingActivities = lines[63].split()[1:-1]
        # 经营性应付项目的增加
        increaseInReceivablesUnderOperatingActivities = lines[64].split()[1:-1]
        # 已完工尚未结算款的减少(减:增加)
        decreaseOnAmountDue = lines[65].split()[1:-1]
        # 已结算尚未完工款的增加(减:减少)
        increaseOnSettlementNotYetCompleted = lines[66].split()[1:-1]
        # 其他
        other = lines[67].split()[1:-1]
        # 经营活动产生现金流量净额
        netCashFlowFromOperatingActivities = lines[68].split()[1:-1]
        # 债务转为资本
        debtsTransferToCapital = lines[69].split()[1:-1]
        # 一年内到期的可转换公司债券
        oneYearDueConvertibleBonds = lines[70].split()[1:-1]
        # 融资租入固定资产
        financingRentToFixedAsset = lines[71].split()[1:-1]
        # 现金的期末余额
        cashAtTheEndOfPeriod = lines[72].split()[1:-1]
        # 现金的期初余额
        cashAtTheBeginningOfPeriod = lines[73].split()[1:-1]
        # 现金等价物的期末余额
        cashEquivalentsAtTheEndOfPeriod = lines[74].split()[1:-1]
        # 现金等价物的期初余额
        cashEquivalentsAtTheBeginningOfPeriod = lines[75].split()[1:-1]
        # 现金及现金等价物的净增加额
        netIncreaseInCashAndCashEquivalents = lines[76].split()[1:-1]
        result_json = []
        for idx, _ in enumerate(reportDate):
            if start_date:
                if pd.Timestamp(reportDate[idx]) < pd.Timestamp(start_date):
                    continue

            if report_period and not is_same_date(report_period, reportDate[idx]):
                continue

            reportEventDate = get_report_event_date(security_item, report_date=reportDate[idx])

            # use report_event_date to filter the reportEventDate before it for not getting future data
            if report_event_date and pd.Timestamp(report_event_date) < pd.Timestamp(reportEventDate):
                continue

            the_json = {
                "id": '{}_{}'.format(security_item["id"], reportDate[idx]),
                "reportDate": to_time_str(reportDate[idx]),
                "reportEventDate": reportEventDate,
                "securityId": security_item["id"],
                "code": security_item["code"],
                # /*一、经营活动产生的现金流量*/
                # 销售商品、提供劳务收到的现金
                "cashFromSellingCommoditiesOrOfferingLabor": to_float(cashFromSellingCommoditiesOrOfferingLabor[idx]),
                # 收到的税费返还
                "refundOfTaxAndFeeReceived": to_float(refundOfTaxAndFeeReceived[idx]),
                # 收到的其他与经营活动有关的现金
                "cashReceivedRelatingToOtherOperatingActivities": to_float(
                    cashReceivedRelatingToOtherOperatingActivities[idx]),
                # 经营活动现金流入小计
                "subTotalOfCashInflowsFromOperatingActivities": to_float(
                    subTotalOfCashInflowsFromOperatingActivities[idx]),
                # 购买商品、接受劳务支付的现金
                "cashPaidForGoodsAndServices": to_float(cashPaidForGoodsAndServices[idx]),
                # 支付给职工以及为职工支付的现金
                "cashPaidToAndOnBehalfOfemployees": to_float(cashPaidToAndOnBehalfOfemployees[idx]),
                # 支付的各项税费
                "paymentsOfTaxesAndSurcharges": to_float(paymentsOfTaxesAndSurcharges[idx]),
                # 支付的其他与经营活动有关的现金
                "cashPaidRelatingToOtherOperatingActivities": to_float(cashPaidRelatingToOtherOperatingActivities[idx]),
                # 经营活动现金流出小计
                "subTotalOfCashOutflowsFromOperatingActivities": to_float(
                    subTotalOfCashOutflowsFromOperatingActivities[idx]),
                # 经营活动产生的现金流量净额
                "netCashFlowsFromOperatingActivities": to_float(netCashFlowsFromOperatingActivities[idx]),
                # /*二、投资活动产生的现金流量*/
                # 收回投资所收到的现金
                "cashReceivedFromDisposalOfInvestments": to_float(cashReceivedFromDisposalOfInvestments[idx]),
                # 取得投资收益所收到的现金
                "cashReceivedFromReturnsOnIvestments": to_float(cashReceivedFromReturnsOnIvestments[idx]),
                # 处置固定资产、无形资产和其他长期资产所收回的现金净额
                "netCashReceivedFromDisposalAssets": to_float(netCashReceivedFromDisposalAssets[idx]),
                # 处置子公司及其他营业单位收到的现金净额
                "netCashReceivedFromDisposalSubsidiaries": to_float(netCashReceivedFromDisposalSubsidiaries[idx]),
                # 收到的其他与投资活动有关的现金
                "cashReceivedFromOtherInvesting": to_float(cashReceivedFromOtherInvesting[idx]),
                # 投资活动现金流入小计
                "subTotalOfCashInflowsFromInvesting": to_float(subTotalOfCashInflowsFromInvesting[idx]),
                # 购建固定资产、无形资产和其他长期资产所支付的现金
                "cashPaidToAcquireFixedAssets": to_float(cashPaidToAcquireFixedAssets[idx]),
                # 投资所支付的现金
                "cashPaidToAcquireInvestments": to_float(cashPaidToAcquireInvestments[idx]),
                # 取得子公司及其他营业单位支付的现金净额
                "netCashPaidToAcquireSubsidiaries": to_float(netCashPaidToAcquireSubsidiaries[idx]),
                # 支付的其他与投资活动有关的现金
                "cashPaidRelatingToOtherInvesting": to_float(cashPaidRelatingToOtherInvesting[idx]),
                # 投资活动现金流出小计
                "subTotalOfCashOutflowsFromInvesting": to_float(subTotalOfCashOutflowsFromInvesting[idx]),
                # 投资活动产生的现金流量净额
                "netCashFlowsFromInvesting": to_float(netCashFlowsFromInvesting[idx]),
                # /*三、筹资活动产生的现金流量*/
                # 吸收投资收到的现金
                "cashReceivedFromCapitalContributions": to_float(cashReceivedFromCapitalContributions[idx]),
                # 其中:子公司吸收少数股东投资收到的现金
                "cashReceivedFromMinorityShareholdersOfSubsidiaries":
                    cashReceivedFromMinorityShareholdersOfSubsidiaries[
                        idx],
                # 取得借款收到的现金
                "cashReceivedFromBorrowings": to_float(cashReceivedFromBorrowings[idx]),
                # 发行债券收到的现金
                "cashReceivedFromIssuingBonds": to_float(cashReceivedFromIssuingBonds[idx]),
                # 收到其他与筹资活动有关的现金
                "cashReceivedRelatingToOtherFinancingActivities": to_float(
                    cashReceivedRelatingToOtherFinancingActivities[idx]),
                # 筹资活动现金流入小计
                "subTotalOfCashInflowsFromFinancingActivities": to_float(
                    subTotalOfCashInflowsFromFinancingActivities[idx]),
                # 偿还债务支付的现金
                "cashRepaymentsOfBorrowings": to_float(cashRepaymentsOfBorrowings[idx]),
                # 分配股利、利润或偿付利息所支付的现金
                "cashPaymentsForInterestExpensesAndDistributionOfDividendsOrProfits":
                    cashPaymentsForInterestExpensesAndDistributionOfDividendsOrProfits[idx],
                # 其中:子公司支付给少数股东的股利、利润
                "cashPaymentsForDividendsOrProfitToMinorityShareholders":
                    cashPaymentsForDividendsOrProfitToMinorityShareholders[idx],
                # 支付其他与筹资活动有关的现金
                "cashPaymentsRelatingToOtherFinancingActivities": to_float(
                    cashPaymentsRelatingToOtherFinancingActivities[idx]),
                # 筹资活动现金流出小计
                "subTotalOfCashOutflowsFromFinancingActivities": to_float(
                    subTotalOfCashOutflowsFromFinancingActivities[idx]),
                # 筹资活动产生的现金流量净额
                "netCashFlowsFromFinancingActivities": to_float(netCashFlowsFromFinancingActivities[idx]),
                # /*四、汇率变动对现金及现金等价物的影响*/
                "effectOfForeignExchangeRate": to_float(effectOfForeignExchangeRate[idx]),
                # /*五、现金及现金等价物净增加额*/
                "netIncreaseInCash": to_float(netIncreaseInCash[idx]),
                # 加:期初现金及现金等价物余额
                "cashAtBeginningOfyear": to_float(cashAtBeginningOfyear[idx]),
                # /*六、期末现金及现金等价物余额*/
                "cashAtEndOfyear": to_float(cashAtEndOfyear[idx]),
                # /*附注*/
                # 净利润
                "netProfit": to_float(netProfit[idx]),
                # 少数股东权益
                "minorityBookValue": to_float(minorityBookValue[idx]),
                # 未确认的投资损失
                "unrealisedInvestmentLosses": to_float(unrealisedInvestmentLosses[idx]),
                # 资产减值准备
                "allowanceForAssetDevaluation": to_float(allowanceForAssetDevaluation[idx]),
                # 固定资产折旧、油气资产折耗、生产性物资折旧
                "depreciationOfFixedAssets": to_float(depreciationOfFixedAssets[idx]),
                # 无形资产摊销
                "amorizationOfIntangibleAssets": to_float(amorizationOfIntangibleAssets[idx]),
                # 长期待摊费用摊销
                "longTermDeferredExpenses": to_float(longTermDeferredExpenses[idx]),
                # 待摊费用的减少
                "decreaseOfDeferredExpenses": to_float(decreaseOfDeferredExpenses[idx]),
                # 预提费用的增加
                "IncreaseOfwithholdingExpenses": to_float(IncreaseOfwithholdingExpenses[idx]),
                # 处置固定资产、无形资产和其他长期资产的损失
                "lossOnDisposalOfFixedAssets": to_float(lossOnDisposalOfFixedAssets[idx]),
                # 固定资产报废损失
                "lossOnFixedAssetsDamaged": to_float(lossOnFixedAssetsDamaged[idx]),
                # 公允价值变动损失
                "lossOnFairValueChange": to_float(lossOnFairValueChange[idx]),
                # 递延收益增加(减:减少)
                "changeOnDeferredRevenue": to_float(changeOnDeferredRevenue[idx]),
                # 预计负债
                "estimatedLiabilities": to_float(estimatedLiabilities[idx]),
                # 财务费用
                "financingExpenses": to_float(financingExpenses[idx]),
                # 投资损失
                "investmentLoss": to_float(investmentLoss[idx]),
                # 递延所得税资产减少
                "decreaseOnDeferredIncomeTaxAssets": to_float(decreaseOnDeferredIncomeTaxAssets[idx]),
                # 递延所得税负债增加
                "increaseOnDeferredIncomeTaxLiabilities": to_float(increaseOnDeferredIncomeTaxLiabilities[idx]),
                # 存货的减少
                "decreaseInInventories": to_float(decreaseInInventories[idx]),
                # 经营性应收项目的减少
                "decreaseInReceivablesUnderOperatingActivities": to_float(
                    decreaseInReceivablesUnderOperatingActivities[idx]),
                # 经营性应付项目的增加
                "increaseInReceivablesUnderOperatingActivities": to_float(
                    increaseInReceivablesUnderOperatingActivities[idx]),
                # 已完工尚未结算款的减少(减:增加)
                "decreaseOnAmountDue": to_float(decreaseOnAmountDue[idx]),
                # 已结算尚未完工款的增加(减:减少)
                "increaseOnSettlementNotYetCompleted": to_float(increaseOnSettlementNotYetCompleted[idx]),
                # 其他
                "other": to_float(other[idx]),
                # 经营活动产生现金流量净额
                "netCashFlowFromOperatingActivities": to_float(netCashFlowFromOperatingActivities[idx]),
                # 债务转为资本
                "debtsTransferToCapital": to_float(debtsTransferToCapital[idx]),
                # 一年内到期的可转换公司债券
                "oneYearDueConvertibleBonds": to_float(oneYearDueConvertibleBonds[idx]),
                # 融资租入固定资产
                "financingRentToFixedAsset": to_float(financingRentToFixedAsset[idx]),
                # 现金的期末余额
                "cashAtTheEndOfPeriod": to_float(cashAtTheEndOfPeriod[idx]),
                # 现金的期初余额
                "cashAtTheBeginningOfPeriod": to_float(cashAtTheBeginningOfPeriod[idx]),
                # 现金等价物的期末余额
                "cashEquivalentsAtTheEndOfPeriod": to_float(cashEquivalentsAtTheEndOfPeriod[idx]),
                # 现金等价物的期初余额
                "cashEquivalentsAtTheBeginningOfPeriod": to_float(cashEquivalentsAtTheBeginningOfPeriod[idx]),
                # 现金及现金等价物的净增加额
                "netIncreaseInCashAndCashEquivalents": to_float(netIncreaseInCashAndCashEquivalents[idx])
            }

            if report_period and is_same_date(report_period, reportDate[idx]):
                return the_json

            result_json.append(the_json)

        if result_json:
            result_json = sorted(result_json, key=lambda x: pd.Timestamp(x['reportDate']))

        return result_json
Exemplo n.º 19
0
def parse_shfe_day_data(force_parse=False):
    cache_dir = get_exchange_cache_dir(security_type='future',
                                       exchange='shfe',
                                       the_year=datetime.datetime.today().year,
                                       data_type="day_kdata")
    the_parsed_path = os.path.join(cache_dir, 'parsed')
    the_parsed = []
    if os.path.exists(the_parsed_path):
        with open(the_parsed_path) as data_file:
            the_parsed = json.load(data_file)

    if force_parse:
        the_dates = [f for f in os.listdir(cache_dir) if f != 'parsed' and f]
    else:
        the_dates = [
            f for f in os.listdir(cache_dir)
            if f != 'parsed' and f not in the_parsed
        ]

    for the_date in the_dates:
        the_path = os.path.join(cache_dir, the_date)
        logger.info("start handling {}".format(the_path))

        with open(the_path, 'r', encoding='UTF8') as f:
            tmp_str = f.read()
            the_json = json.loads(tmp_str)
            the_datas = the_json['o_curinstrument']
            # 日期,代码,名称,最低,开盘,收盘,最高,成交量(手),成交额(元),唯一标识,前收盘,涨跌额,涨跌幅(%),持仓量,结算价,前结算,涨跌额(按结算价),涨跌幅(按结算价)
            KDATA_COLUMN_FUTURE = [
                'timestamp', 'code', 'name', 'low', 'open', 'close', 'high',
                'volume', 'turnover', 'securityId', 'preClose', 'change',
                'changePct', 'openInterest', 'settlement', 'preSettlement',
                'change1', 'changePct1'
            ]
            for the_data in the_datas:
                # {'CLOSEPRICE': 11480,
                #  'DELIVERYMONTH': '1809',
                #  'HIGHESTPRICE': 11555,
                #  'LOWESTPRICE': 11320,
                #  'OPENINTEREST': 425692,
                #  'OPENINTERESTCHG': 3918,
                #  'OPENPRICE': 11495,
                #  'ORDERNO': 0,
                #  'PRESETTLEMENTPRICE': 11545,
                #  'PRODUCTID': 'ru_f    ',
                #  'PRODUCTNAME': '天然橡胶            ',
                #  'PRODUCTSORTNO': 100,
                #  'SETTLEMENTPRICE': 11465,
                #  'VOLUME': 456574,
                #  'ZD1_CHG': -65,
                #  'ZD2_CHG': -80}

                if not re.match("\d{4}", the_data['DELIVERYMONTH']):
                    continue

                code = "{}{}".format(
                    the_data['PRODUCTID'][:the_data['PRODUCTID'].index('_')],
                    the_data['DELIVERYMONTH'])
                logger.info("start handling {} for {}".format(code, the_date))

                name = get_future_name(code)
                security_id = "future_shfe_{}".format(code)

                security_list = get_security_list(security_type='future',
                                                  exchanges=['shfe'])

                logger.info("start handling {} for {}".format(code, the_date))
                security_item = {
                    'code': code,
                    'name': name,
                    'id': security_id,
                    'exchange': 'shfe',
                    'type': 'future'
                }
                # 检查是否需要保存合约meta
                if security_list is not None and 'code' in security_list.columns:
                    security_list = security_list.set_index(
                        security_list['code'], drop=False)
                if code not in security_list.index:
                    security_list = security_list.append(security_item,
                                                         ignore_index=True)
                    security_list.to_csv(get_security_list_path(
                        'future', 'shfe'),
                                         index=False)

                kdata_path = get_kdata_path(item=security_item,
                                            source='exchange')
                # TODO:这些逻辑应该统一处理
                kdata_dir = get_kdata_dir(item=security_item)
                if not os.path.exists(kdata_dir):
                    os.makedirs(kdata_dir)

                if os.path.exists(kdata_path):
                    saved_df = pd.read_csv(kdata_path, dtype=str)
                    saved_df = saved_df.set_index(saved_df['timestamp'],
                                                  drop=False)
                else:
                    saved_df = pd.DataFrame()

                if saved_df.empty or the_date not in saved_df.index:
                    low_price = the_data['LOWESTPRICE']
                    if not low_price:
                        low_price = 0
                    open_price = the_data['OPENPRICE']
                    if not open_price:
                        open_price = 0
                    close_price = the_data['CLOSEPRICE']
                    if not close_price:
                        close_price = 0
                    high_price = the_data['HIGHESTPRICE']
                    if not high_price:
                        high_price = 0
                    volume = the_data['VOLUME']
                    if not volume:
                        volume = 0

                    if type(the_data['ZD1_CHG']) == str:
                        change = 0
                    else:
                        change = the_data['ZD1_CHG']

                    if type(the_data['ZD2_CHG']) == str:
                        change1 = 0
                    else:
                        change1 = the_data['ZD2_CHG']

                    pre_close = close_price - change
                    pre_settlement = the_data['PRESETTLEMENTPRICE']

                    # 首日交易
                    if pre_close != 0:
                        change_pct = change / pre_close
                    else:
                        change_pct = 0
                    if pre_settlement != 0:
                        change_pct1 = change1 / pre_settlement
                    else:
                        change_pct1 = 0

                    the_json = {
                        "timestamp":
                        to_time_str(the_date),
                        "code":
                        code,
                        "name":
                        name,
                        "low":
                        low_price,
                        "open":
                        open_price,
                        "close":
                        close_price,
                        "high":
                        high_price,
                        "volume":
                        volume,
                        # 成交额为估算
                        "turnover":
                        (low_price + open_price + close_price + high_price / 4)
                        * volume,
                        "securityId":
                        security_id,
                        "preClose":
                        pre_close,
                        "change":
                        change,
                        "changePct":
                        change_pct,
                        "openInterest":
                        the_data['OPENINTEREST'],
                        "settlement":
                        the_data['SETTLEMENTPRICE'],
                        "preSettlement":
                        the_data['PRESETTLEMENTPRICE'],
                        "change1":
                        change1,
                        "changePct1":
                        change_pct1
                    }
                    saved_df = saved_df.append(the_json, ignore_index=True)
                    saved_df = saved_df.loc[:, KDATA_COLUMN_FUTURE]
                    saved_df = saved_df.drop_duplicates(subset='timestamp',
                                                        keep='last')
                    saved_df = saved_df.set_index(saved_df['timestamp'],
                                                  drop=False)
                    saved_df.index = pd.to_datetime(saved_df.index)
                    saved_df = saved_df.sort_index()
                    saved_df.to_csv(kdata_path, index=False)

                    logger.info("end handling {} for {}".format(
                        code, the_date))

                    if the_date not in the_parsed:
                        the_parsed.append(the_date)
        if the_parsed:
            result_list = drop_duplicate(the_parsed)
            result_list = sorted(result_list)

            with open(the_parsed_path, 'w') as outfile:
                json.dump(result_list, outfile)
        logger.info("end handling {}".format(the_path))
Exemplo n.º 20
0
def get_kdata(security_item,
              exchange=None,
              the_date=None,
              start_date=None,
              end_date=None,
              fuquan='bfq',
              dtype=None,
              source=None,
              level='day'):
    """
    get kdata.

    Parameters
    ----------
    security_item : SecurityItem or str
        the security item,id or code

    exchange : str
        the exchange,set this for cryptocurrency

    the_date : TimeStamp str or TimeStamp
        get the kdata for the exact date
    start_date : TimeStamp str or TimeStamp
        start date
    end_date : TimeStamp str or TimeStamp
        end date
    fuquan : str
        {"qfq","hfq","bfq"},default:"bfq"
    dtype : type
        the data type for the csv column,default: None
    source : str
        the data source,{'163','sina','exchange'},just used for internal merge
    level : str or int
        the kdata level,{1,5,15,30,60,'day','week','month'},default : 'day'

    Returns
    -------
    DataFrame

    """

    # 由于数字货币的交易所太多,必须指定exchange
    security_item = to_security_item(security_item, exchange)

    source = adjust_source(security_item, source)

    # 163的数据是合并过的,有复权因子,都存在'bfq'目录下,只需从一个地方取数据,并做相应转换
    if source == '163':
        the_path = files_contract.get_kdata_path(security_item,
                                                 source=source,
                                                 fuquan='bfq')
    else:
        the_path = files_contract.get_kdata_path(security_item,
                                                 source=source,
                                                 fuquan=fuquan)

    if os.path.isfile(the_path):
        if not dtype:
            dtype = {"code": str, 'timestamp': str}
        df = pd.read_csv(the_path, dtype=dtype)

        if 'factor' in df.columns and source == '163' and security_item[
                'type'] == 'stock':
            df_kdata_has_factor = df[df['factor'].notna()]
            if df_kdata_has_factor.shape[0] > 0:
                latest_factor = df_kdata_has_factor.tail(1).factor.iat[0]
            else:
                latest_factor = None

        df.timestamp = df.timestamp.apply(lambda x: to_time_str(x))
        df = df.set_index(df['timestamp'], drop=False)
        df.index = pd.to_datetime(df.index)
        df = df.sort_index()

        if the_date:
            if the_date in df.index:
                df = df.loc[df['timestamp'] == the_date]
            else:
                return None
        else:
            if not start_date and not pd.isna(security_item['listDate']):
                start_date = security_item['listDate']
            if not end_date:
                end_date = datetime.datetime.today()

            if start_date and end_date:
                df = df.loc[start_date:end_date]

        # 复权处理
        if source == '163' and security_item['type'] == 'stock':
            if 'factor' in df.columns:
                # 后复权是不变的
                df['hfqClose'] = df.close * df.factor
                df['hfqOpen'] = df.open * df.factor
                df['hfqHigh'] = df.high * df.factor
                df['hfqLow'] = df.low * df.factor

                # 前复权需要根据最新的factor往回算,当前价格不变
                if latest_factor:
                    df['qfqClose'] = df.hfqClose / latest_factor
                    df['qfqOpen'] = df.hfqOpen / latest_factor
                    df['qfqHigh'] = df.hfqHigh / latest_factor
                    df['qfqLow'] = df.hfqLow / latest_factor
                else:
                    logger.exception("missing latest factor for {}".format(
                        security_item['id']))
        return df
    return pd.DataFrame()
    def download_day_k_data(self, response):
        path = response.meta['path']
        item = response.meta['item']

        try:
            # 已经保存的csv数据
            if os.path.exists(path):
                df_current = pd.read_csv(path, dtype=str)
                # 补全历史数据
                if 'name' not in df_current.columns:
                    df_current['name'] = item['name']
            else:
                df_current = pd.DataFrame()

            tmp_str = response.text

            json_str = tmp_str[tmp_str.index('{'):tmp_str.index('}') + 1]
            tmp_json = json.loads(json_str)

            the_datas = tmp_json['data']

            # 开,高,收,低,量,幅
            the_jsons = []
            pre_json = None

            for the_data in the_datas:
                the_json = {
                    'code': item['code'],
                    'securityId': item['id'],
                    'name': item['name'],
                    'timestamp': to_time_str(the_data[0]),
                    'open': the_data[1],
                    'high': the_data[2],
                    'close': the_data[3],
                    'low': the_data[4],
                    'volume': the_data[5],
                    'changePct': the_data[6]
                }
                # 有些数据位置不对
                real_high = max(the_data[1], the_data[2], the_data[3],
                                the_data[4])
                if the_json['high'] != real_high:
                    if the_json['close'] == real_high:
                        the_json['close'], the_json['high'] = the_json[
                            'high'], the_json['close']
                    elif the_json['open'] == real_high:
                        the_json['open'], the_json['high'] = the_json[
                            'high'], the_json['open']
                    elif the_json['low'] == real_high:
                        the_json['low'], the_json['high'] = the_json[
                            'high'], the_json['low']

                real_low = min(the_data[1], the_data[2], the_data[3],
                               the_data[4])
                if the_json['low'] != real_low:
                    if the_json['close'] == real_low:
                        the_json['close'], the_json['low'] = the_json[
                            'low'], the_json['close']
                    elif the_json['open'] == real_low:
                        the_json['open'], the_json['low'] = the_json[
                            'low'], the_json['open']
                    elif the_json['high'] == real_low:
                        the_json['high'], the_json['low'] = the_json[
                            'low'], the_json['high']

                # 成交额为估算
                avgPrice = (the_json['open'] + the_json['high'] +
                            the_json['close'] + the_json['low']) / 4
                the_json['turnover'] = avgPrice * the_json['volume']
                if pre_json:
                    the_json['preClose'] = pre_json['close']
                    the_json['change'] = the_json['close'] - pre_json['close']

                # TODO:这些数据目前没有,后面补全
                the_json['turnoverRate'] = 0
                the_json['tCap'] = 0
                the_json['mCap'] = 0
                the_json['factor'] = 0

                pre_json = the_json
                the_jsons.append(the_json)

            # 合并到当前csv中
            df_current = df_current.append(the_jsons, ignore_index=True)

            if item['type'] == 'index':
                df_current = df_current.dropna(subset=KDATA_INDEX_COLUMN_163)
                # 保证col顺序
                df_current = df_current.loc[:, KDATA_COLUMN_INDEX]
            else:
                df_current = df_current.dropna(subset=KDATA_COLUMN_163)
                # 保证col顺序
                df_current = df_current.loc[:, KDATA_COLUMN_STOCK]

            df_current = df_current.drop_duplicates(subset='timestamp',
                                                    keep='last')
            df_current = df_current.set_index(df_current['timestamp'],
                                              drop=False)
            df_current.index = pd.to_datetime(df_current.index)
            df_current = df_current.sort_index()
            df_current.to_csv(path, index=False)
        except Exception as e:
            self.logger.error(
                'error when getting k data url={} error={}'.format(
                    response.url, e))
Exemplo n.º 22
0
def get_cash_flow_statement_items(security_item,
                                  start_date=None,
                                  report_period=None,
                                  report_event_date=None):
    path = get_cash_flow_statement_path(security_item)
    if not os.path.exists(path):
        return []
    encoding = settings.DOWNLOAD_TXT_ENCODING if settings.DOWNLOAD_TXT_ENCODING else detect_encoding(
        url='file://' + os.path.abspath(path)).get('encoding')

    with open(path, encoding=encoding) as fr:
        lines = fr.readlines()
        # for idx, line in enumerate(lines):
        #     yield idx, line.split()
        reportDate = lines[0].split()[1:-1]
        # /*一、经营活动产生的现金流量*/
        # 销售商品、提供劳务收到的现金
        cashFromSellingCommoditiesOrOfferingLabor = lines[3].split()[1:-1]
        # 收到的税费返还
        refundOfTaxAndFeeReceived = lines[4].split()[1:-1]
        # 收到的其他与经营活动有关的现金
        cashReceivedRelatingToOtherOperatingActivities = lines[5].split()[1:-1]
        # 经营活动现金流入小计
        subTotalOfCashInflowsFromOperatingActivities = lines[6].split()[1:-1]
        # 购买商品、接受劳务支付的现金
        cashPaidForGoodsAndServices = lines[7].split()[1:-1]
        # 支付给职工以及为职工支付的现金
        cashPaidToAndOnBehalfOfemployees = lines[8].split()[1:-1]
        # 支付的各项税费
        paymentsOfTaxesAndSurcharges = lines[9].split()[1:-1]
        # 支付的其他与经营活动有关的现金
        cashPaidRelatingToOtherOperatingActivities = lines[10].split()[1:-1]
        # 经营活动现金流出小计
        subTotalOfCashOutflowsFromOperatingActivities = lines[11].split()[1:-1]
        # 经营活动产生的现金流量净额
        netCashFlowsFromOperatingActivities = lines[12].split()[1:-1]
        # /*二、投资活动产生的现金流量*/
        # 收回投资所收到的现金
        cashReceivedFromDisposalOfInvestments = lines[14].split()[1:-1]
        # 取得投资收益所收到的现金
        cashReceivedFromReturnsOnIvestments = lines[15].split()[1:-1]
        # 处置固定资产、无形资产和其他长期资产所收回的现金净额
        netCashReceivedFromDisposalAssets = lines[16].split()[1:-1]
        # 处置子公司及其他营业单位收到的现金净额
        netCashReceivedFromDisposalSubsidiaries = lines[17].split()[1:-1]
        # 收到的其他与投资活动有关的现金
        cashReceivedFromOtherInvesting = lines[18].split()[1:-1]
        # 投资活动现金流入小计
        subTotalOfCashInflowsFromInvesting = lines[19].split()[1:-1]
        # 购建固定资产、无形资产和其他长期资产所支付的现金
        cashPaidToAcquireFixedAssets = lines[20].split()[1:-1]
        # 投资所支付的现金
        cashPaidToAcquireInvestments = lines[21].split()[1:-1]
        # 取得子公司及其他营业单位支付的现金净额
        netCashPaidToAcquireSubsidiaries = lines[22].split()[1:-1]
        # 支付的其他与投资活动有关的现金
        cashPaidRelatingToOtherInvesting = lines[23].split()[1:-1]
        # 投资活动现金流出小计
        subTotalOfCashOutflowsFromInvesting = lines[24].split()[1:-1]
        # 投资活动产生的现金流量净额
        netCashFlowsFromInvesting = lines[25].split()[1:-1]
        # /*三、筹资活动产生的现金流量*/
        # 吸收投资收到的现金
        cashReceivedFromCapitalContributions = lines[27].split()[1:-1]
        # 其中:子公司吸收少数股东投资收到的现金
        cashReceivedFromMinorityShareholdersOfSubsidiaries = lines[28].split(
        )[1:-1]
        # 取得借款收到的现金
        cashReceivedFromBorrowings = lines[29].split()[1:-1]
        # 发行债券收到的现金
        cashReceivedFromIssuingBonds = lines[30].split()[1:-1]
        # 收到其他与筹资活动有关的现金
        cashReceivedRelatingToOtherFinancingActivities = lines[31].split(
        )[1:-1]
        # 筹资活动现金流入小计
        subTotalOfCashInflowsFromFinancingActivities = lines[32].split()[1:-1]
        # 偿还债务支付的现金
        cashRepaymentsOfBorrowings = lines[33].split()[1:-1]
        # 分配股利、利润或偿付利息所支付的现金
        cashPaymentsForInterestExpensesAndDistributionOfDividendsOrProfits = lines[
            34].split()[1:-1]
        # 其中:子公司支付给少数股东的股利、利润
        cashPaymentsForDividendsOrProfitToMinorityShareholders = lines[
            35].split()[1:-1]
        # 支付其他与筹资活动有关的现金
        cashPaymentsRelatingToOtherFinancingActivities = lines[36].split(
        )[1:-1]
        # 筹资活动现金流出小计
        subTotalOfCashOutflowsFromFinancingActivities = lines[37].split()[1:-1]
        # 筹资活动产生的现金流量净额
        netCashFlowsFromFinancingActivities = lines[38].split()[1:-1]
        # /*四、汇率变动对现金及现金等价物的影响*/
        effectOfForeignExchangeRate = lines[39].split()[1:-1]
        # /*五、现金及现金等价物净增加额*/
        netIncreaseInCash = lines[40].split()[1:-1]
        # 加:期初现金及现金等价物余额
        cashAtBeginningOfyear = lines[41].split()[1:-1]
        # /*六、期末现金及现金等价物余额*/
        cashAtEndOfyear = lines[42].split()[1:-1]
        # /*附注*/
        # 净利润
        netProfit = lines[44].split()[1:-1]
        # 少数股东权益
        minorityBookValue = lines[45].split()[1:-1]
        # 未确认的投资损失
        unrealisedInvestmentLosses = lines[46].split()[1:-1]
        # 资产减值准备
        allowanceForAssetDevaluation = lines[47].split()[1:-1]
        # 固定资产折旧、油气资产折耗、生产性物资折旧
        depreciationOfFixedAssets = lines[48].split()[1:-1]
        # 无形资产摊销
        amorizationOfIntangibleAssets = lines[49].split()[1:-1]
        # 长期待摊费用摊销
        longTermDeferredExpenses = lines[50].split()[1:-1]
        # 待摊费用的减少
        decreaseOfDeferredExpenses = lines[51].split()[1:-1]
        # 预提费用的增加
        IncreaseOfwithholdingExpenses = lines[52].split()[1:-1]
        # 处置固定资产、无形资产和其他长期资产的损失
        lossOnDisposalOfFixedAssets = lines[53].split()[1:-1]
        # 固定资产报废损失
        lossOnFixedAssetsDamaged = lines[54].split()[1:-1]
        # 公允价值变动损失
        lossOnFairValueChange = lines[55].split()[1:-1]
        # 递延收益增加(减:减少)
        changeOnDeferredRevenue = lines[56].split()[1:-1]
        # 预计负债
        estimatedLiabilities = lines[57].split()[1:-1]
        # 财务费用
        financingExpenses = lines[58].split()[1:-1]
        # 投资损失
        investmentLoss = lines[59].split()[1:-1]
        # 递延所得税资产减少
        decreaseOnDeferredIncomeTaxAssets = lines[60].split()[1:-1]
        # 递延所得税负债增加
        increaseOnDeferredIncomeTaxLiabilities = lines[61].split()[1:-1]
        # 存货的减少
        decreaseInInventories = lines[62].split()[1:-1]
        # 经营性应收项目的减少
        decreaseInReceivablesUnderOperatingActivities = lines[63].split()[1:-1]
        # 经营性应付项目的增加
        increaseInReceivablesUnderOperatingActivities = lines[64].split()[1:-1]
        # 已完工尚未结算款的减少(减:增加)
        decreaseOnAmountDue = lines[65].split()[1:-1]
        # 已结算尚未完工款的增加(减:减少)
        increaseOnSettlementNotYetCompleted = lines[66].split()[1:-1]
        # 其他
        other = lines[67].split()[1:-1]
        # 经营活动产生现金流量净额
        netCashFlowFromOperatingActivities = lines[68].split()[1:-1]
        # 债务转为资本
        debtsTransferToCapital = lines[69].split()[1:-1]
        # 一年内到期的可转换公司债券
        oneYearDueConvertibleBonds = lines[70].split()[1:-1]
        # 融资租入固定资产
        financingRentToFixedAsset = lines[71].split()[1:-1]
        # 现金的期末余额
        cashAtTheEndOfPeriod = lines[72].split()[1:-1]
        # 现金的期初余额
        cashAtTheBeginningOfPeriod = lines[73].split()[1:-1]
        # 现金等价物的期末余额
        cashEquivalentsAtTheEndOfPeriod = lines[74].split()[1:-1]
        # 现金等价物的期初余额
        cashEquivalentsAtTheBeginningOfPeriod = lines[75].split()[1:-1]
        # 现金及现金等价物的净增加额
        netIncreaseInCashAndCashEquivalents = lines[76].split()[1:-1]
        result_json = []
        for idx, _ in enumerate(reportDate):
            if start_date:
                if pd.Timestamp(reportDate[idx]) < pd.Timestamp(start_date):
                    continue

            if report_period and not is_same_date(report_period,
                                                  reportDate[idx]):
                continue

            reportEventDate = get_report_event_date(
                security_item, report_date=reportDate[idx])

            # use report_event_date to filter the reportEventDate before it for not getting future data
            if report_event_date and pd.Timestamp(
                    report_event_date) < pd.Timestamp(reportEventDate):
                continue

            the_json = {
                "id":
                '{}_{}'.format(security_item["id"], reportDate[idx]),
                "reportDate":
                to_time_str(reportDate[idx]),
                "reportEventDate":
                reportEventDate,
                "securityId":
                security_item["id"],
                "code":
                security_item["code"],
                # /*一、经营活动产生的现金流量*/
                # 销售商品、提供劳务收到的现金
                "cashFromSellingCommoditiesOrOfferingLabor":
                to_float(cashFromSellingCommoditiesOrOfferingLabor[idx]),
                # 收到的税费返还
                "refundOfTaxAndFeeReceived":
                to_float(refundOfTaxAndFeeReceived[idx]),
                # 收到的其他与经营活动有关的现金
                "cashReceivedRelatingToOtherOperatingActivities":
                to_float(cashReceivedRelatingToOtherOperatingActivities[idx]),
                # 经营活动现金流入小计
                "subTotalOfCashInflowsFromOperatingActivities":
                to_float(subTotalOfCashInflowsFromOperatingActivities[idx]),
                # 购买商品、接受劳务支付的现金
                "cashPaidForGoodsAndServices":
                to_float(cashPaidForGoodsAndServices[idx]),
                # 支付给职工以及为职工支付的现金
                "cashPaidToAndOnBehalfOfemployees":
                to_float(cashPaidToAndOnBehalfOfemployees[idx]),
                # 支付的各项税费
                "paymentsOfTaxesAndSurcharges":
                to_float(paymentsOfTaxesAndSurcharges[idx]),
                # 支付的其他与经营活动有关的现金
                "cashPaidRelatingToOtherOperatingActivities":
                to_float(cashPaidRelatingToOtherOperatingActivities[idx]),
                # 经营活动现金流出小计
                "subTotalOfCashOutflowsFromOperatingActivities":
                to_float(subTotalOfCashOutflowsFromOperatingActivities[idx]),
                # 经营活动产生的现金流量净额
                "netCashFlowsFromOperatingActivities":
                to_float(netCashFlowsFromOperatingActivities[idx]),
                # /*二、投资活动产生的现金流量*/
                # 收回投资所收到的现金
                "cashReceivedFromDisposalOfInvestments":
                to_float(cashReceivedFromDisposalOfInvestments[idx]),
                # 取得投资收益所收到的现金
                "cashReceivedFromReturnsOnIvestments":
                to_float(cashReceivedFromReturnsOnIvestments[idx]),
                # 处置固定资产、无形资产和其他长期资产所收回的现金净额
                "netCashReceivedFromDisposalAssets":
                to_float(netCashReceivedFromDisposalAssets[idx]),
                # 处置子公司及其他营业单位收到的现金净额
                "netCashReceivedFromDisposalSubsidiaries":
                to_float(netCashReceivedFromDisposalSubsidiaries[idx]),
                # 收到的其他与投资活动有关的现金
                "cashReceivedFromOtherInvesting":
                to_float(cashReceivedFromOtherInvesting[idx]),
                # 投资活动现金流入小计
                "subTotalOfCashInflowsFromInvesting":
                to_float(subTotalOfCashInflowsFromInvesting[idx]),
                # 购建固定资产、无形资产和其他长期资产所支付的现金
                "cashPaidToAcquireFixedAssets":
                to_float(cashPaidToAcquireFixedAssets[idx]),
                # 投资所支付的现金
                "cashPaidToAcquireInvestments":
                to_float(cashPaidToAcquireInvestments[idx]),
                # 取得子公司及其他营业单位支付的现金净额
                "netCashPaidToAcquireSubsidiaries":
                to_float(netCashPaidToAcquireSubsidiaries[idx]),
                # 支付的其他与投资活动有关的现金
                "cashPaidRelatingToOtherInvesting":
                to_float(cashPaidRelatingToOtherInvesting[idx]),
                # 投资活动现金流出小计
                "subTotalOfCashOutflowsFromInvesting":
                to_float(subTotalOfCashOutflowsFromInvesting[idx]),
                # 投资活动产生的现金流量净额
                "netCashFlowsFromInvesting":
                to_float(netCashFlowsFromInvesting[idx]),
                # /*三、筹资活动产生的现金流量*/
                # 吸收投资收到的现金
                "cashReceivedFromCapitalContributions":
                to_float(cashReceivedFromCapitalContributions[idx]),
                # 其中:子公司吸收少数股东投资收到的现金
                "cashReceivedFromMinorityShareholdersOfSubsidiaries":
                cashReceivedFromMinorityShareholdersOfSubsidiaries[idx],
                # 取得借款收到的现金
                "cashReceivedFromBorrowings":
                to_float(cashReceivedFromBorrowings[idx]),
                # 发行债券收到的现金
                "cashReceivedFromIssuingBonds":
                to_float(cashReceivedFromIssuingBonds[idx]),
                # 收到其他与筹资活动有关的现金
                "cashReceivedRelatingToOtherFinancingActivities":
                to_float(cashReceivedRelatingToOtherFinancingActivities[idx]),
                # 筹资活动现金流入小计
                "subTotalOfCashInflowsFromFinancingActivities":
                to_float(subTotalOfCashInflowsFromFinancingActivities[idx]),
                # 偿还债务支付的现金
                "cashRepaymentsOfBorrowings":
                to_float(cashRepaymentsOfBorrowings[idx]),
                # 分配股利、利润或偿付利息所支付的现金
                "cashPaymentsForInterestExpensesAndDistributionOfDividendsOrProfits":
                cashPaymentsForInterestExpensesAndDistributionOfDividendsOrProfits[
                    idx],
                # 其中:子公司支付给少数股东的股利、利润
                "cashPaymentsForDividendsOrProfitToMinorityShareholders":
                cashPaymentsForDividendsOrProfitToMinorityShareholders[idx],
                # 支付其他与筹资活动有关的现金
                "cashPaymentsRelatingToOtherFinancingActivities":
                to_float(cashPaymentsRelatingToOtherFinancingActivities[idx]),
                # 筹资活动现金流出小计
                "subTotalOfCashOutflowsFromFinancingActivities":
                to_float(subTotalOfCashOutflowsFromFinancingActivities[idx]),
                # 筹资活动产生的现金流量净额
                "netCashFlowsFromFinancingActivities":
                to_float(netCashFlowsFromFinancingActivities[idx]),
                # /*四、汇率变动对现金及现金等价物的影响*/
                "effectOfForeignExchangeRate":
                to_float(effectOfForeignExchangeRate[idx]),
                # /*五、现金及现金等价物净增加额*/
                "netIncreaseInCash":
                to_float(netIncreaseInCash[idx]),
                # 加:期初现金及现金等价物余额
                "cashAtBeginningOfyear":
                to_float(cashAtBeginningOfyear[idx]),
                # /*六、期末现金及现金等价物余额*/
                "cashAtEndOfyear":
                to_float(cashAtEndOfyear[idx]),
                # /*附注*/
                # 净利润
                "netProfit":
                to_float(netProfit[idx]),
                # 少数股东权益
                "minorityBookValue":
                to_float(minorityBookValue[idx]),
                # 未确认的投资损失
                "unrealisedInvestmentLosses":
                to_float(unrealisedInvestmentLosses[idx]),
                # 资产减值准备
                "allowanceForAssetDevaluation":
                to_float(allowanceForAssetDevaluation[idx]),
                # 固定资产折旧、油气资产折耗、生产性物资折旧
                "depreciationOfFixedAssets":
                to_float(depreciationOfFixedAssets[idx]),
                # 无形资产摊销
                "amorizationOfIntangibleAssets":
                to_float(amorizationOfIntangibleAssets[idx]),
                # 长期待摊费用摊销
                "longTermDeferredExpenses":
                to_float(longTermDeferredExpenses[idx]),
                # 待摊费用的减少
                "decreaseOfDeferredExpenses":
                to_float(decreaseOfDeferredExpenses[idx]),
                # 预提费用的增加
                "IncreaseOfwithholdingExpenses":
                to_float(IncreaseOfwithholdingExpenses[idx]),
                # 处置固定资产、无形资产和其他长期资产的损失
                "lossOnDisposalOfFixedAssets":
                to_float(lossOnDisposalOfFixedAssets[idx]),
                # 固定资产报废损失
                "lossOnFixedAssetsDamaged":
                to_float(lossOnFixedAssetsDamaged[idx]),
                # 公允价值变动损失
                "lossOnFairValueChange":
                to_float(lossOnFairValueChange[idx]),
                # 递延收益增加(减:减少)
                "changeOnDeferredRevenue":
                to_float(changeOnDeferredRevenue[idx]),
                # 预计负债
                "estimatedLiabilities":
                to_float(estimatedLiabilities[idx]),
                # 财务费用
                "financingExpenses":
                to_float(financingExpenses[idx]),
                # 投资损失
                "investmentLoss":
                to_float(investmentLoss[idx]),
                # 递延所得税资产减少
                "decreaseOnDeferredIncomeTaxAssets":
                to_float(decreaseOnDeferredIncomeTaxAssets[idx]),
                # 递延所得税负债增加
                "increaseOnDeferredIncomeTaxLiabilities":
                to_float(increaseOnDeferredIncomeTaxLiabilities[idx]),
                # 存货的减少
                "decreaseInInventories":
                to_float(decreaseInInventories[idx]),
                # 经营性应收项目的减少
                "decreaseInReceivablesUnderOperatingActivities":
                to_float(decreaseInReceivablesUnderOperatingActivities[idx]),
                # 经营性应付项目的增加
                "increaseInReceivablesUnderOperatingActivities":
                to_float(increaseInReceivablesUnderOperatingActivities[idx]),
                # 已完工尚未结算款的减少(减:增加)
                "decreaseOnAmountDue":
                to_float(decreaseOnAmountDue[idx]),
                # 已结算尚未完工款的增加(减:减少)
                "increaseOnSettlementNotYetCompleted":
                to_float(increaseOnSettlementNotYetCompleted[idx]),
                # 其他
                "other":
                to_float(other[idx]),
                # 经营活动产生现金流量净额
                "netCashFlowFromOperatingActivities":
                to_float(netCashFlowFromOperatingActivities[idx]),
                # 债务转为资本
                "debtsTransferToCapital":
                to_float(debtsTransferToCapital[idx]),
                # 一年内到期的可转换公司债券
                "oneYearDueConvertibleBonds":
                to_float(oneYearDueConvertibleBonds[idx]),
                # 融资租入固定资产
                "financingRentToFixedAsset":
                to_float(financingRentToFixedAsset[idx]),
                # 现金的期末余额
                "cashAtTheEndOfPeriod":
                to_float(cashAtTheEndOfPeriod[idx]),
                # 现金的期初余额
                "cashAtTheBeginningOfPeriod":
                to_float(cashAtTheBeginningOfPeriod[idx]),
                # 现金等价物的期末余额
                "cashEquivalentsAtTheEndOfPeriod":
                to_float(cashEquivalentsAtTheEndOfPeriod[idx]),
                # 现金等价物的期初余额
                "cashEquivalentsAtTheBeginningOfPeriod":
                to_float(cashEquivalentsAtTheBeginningOfPeriod[idx]),
                # 现金及现金等价物的净增加额
                "netIncreaseInCashAndCashEquivalents":
                to_float(netIncreaseInCashAndCashEquivalents[idx])
            }

            if report_period and is_same_date(report_period, reportDate[idx]):
                return the_json

            result_json.append(the_json)

        if result_json:
            result_json = sorted(result_json,
                                 key=lambda x: pd.Timestamp(x['reportDate']))

        return result_json
Exemplo n.º 23
0
def get_balance_sheet_items(security_item,
                            start_date=None,
                            report_period=None,
                            report_event_date=None):
    path = get_balance_sheet_path(security_item)
    if not os.path.exists(path):
        return []
    encoding = settings.DOWNLOAD_TXT_ENCODING if settings.DOWNLOAD_TXT_ENCODING else detect_encoding(
        url='file://' + os.path.abspath(path)).get('encoding')

    with open(path, encoding=encoding) as fr:
        lines = fr.readlines()

        # for idx, line in enumerate(lines):
        #     yield idx, line.split()

        reportDate = lines[0].split()[1:-1]
        # 货币资金
        moneyFunds = lines[3].split()[1:-1]
        # 交易性金融资产
        heldForTradingFinancialAssets = lines[4].split()[1:-1]
        # 衍生金融资产
        derivative = lines[5].split()[1:-1]
        # 应收票据
        billsReceivable = lines[6].split()[1:-1]
        # 应收账款
        accountsReceivable = lines[7].split()[1:-1]
        # 预付款项
        prepaidAccounts = lines[8].split()[1:-1]
        # 应收利息
        interestReceivable = lines[9].split()[1:-1]
        # 应收股利
        dividendReceivable = lines[10].split()[1:-1]
        # 其他应收款
        otherReceivables = lines[11].split()[1:-1]

        # 买入返售金融资产
        buyingBackTheSaleOfFinancialAssets = lines[12].split()[1:-1]
        # 存货
        inventory = lines[13].split()[1:-1]
        # 划分为持有待售的资产
        assetsForSale = lines[14].split()[1:-1]
        # 一年内到期的非流动资产
        nonCurrentAssetsDueWithinOneYear = lines[15].split()[1:-1]

        # 待摊费用
        unamortizedExpenditures = lines[16].split()[1:-1]
        # 待处理流动资产损益
        waitDealIntangibleAssetsLossOrIncome = lines[17].split()[1:-1]

        # 其他流动资产
        otherCurrentAssets = lines[18].split()[1:-1]
        # 流动资产合计
        totalCurrentAssets = lines[19].split()[1:-1]

        # 非流动资产

        # 发放贷款及垫款
        loansAndPaymentsOnBehalf = lines[21].split()[1:-1]

        # 可供出售金融资产
        availableForSaleFinancialAssets = lines[22].split()[1:-1]
        # 持有至到期投资
        heldToMaturityInvestment = lines[23].split()[1:-1]
        # 长期应收款
        longTermReceivables = lines[24].split()[1:-1]
        # 长期股权投资
        longTermEquityInvestment = lines[25].split()[1:-1]
        # 投资性房地产
        investmentRealEstate = lines[26].split()[1:-1]
        # 固定资产净额
        NetfixedAssets = lines[27].split()[1:-1]
        # 在建工程
        constructionInProcess = lines[28].split()[1:-1]
        # 工程物资
        engineerMaterial = lines[29].split()[1:-1]
        # 固定资产清理
        fixedAssetsInLiquidation = lines[30].split()[1:-1]
        # 生产性生物资产
        productiveBiologicalAssets = lines[31].split()[1:-1]
        # 公益性生物资产
        nonProfitLivingAssets = lines[32].split()[1:-1]
        # 油气资产
        oilAndGasAssets = lines[33].split()[1:-1]
        # 无形资产
        intangibleAssets = lines[34].split()[1:-1]
        # 开发支出
        developmentExpenditure = lines[35].split()[1:-1]
        # 商誉
        goodwill = lines[36].split()[1:-1]
        # 长期待摊费用
        longTermDeferredExpenses = lines[37].split()[1:-1]
        # 递延所得税资产
        deferredIncomeTaxAssets = lines[38].split()[1:-1]
        # 其他非流动资产
        OtherNonCurrentAssets = lines[39].split()[1:-1]
        # 非流动资产合计
        nonCurrentAssets = lines[40].split()[1:-1]
        # 资产总计
        totalAssets = lines[41].split()[1:-1]

        # / *流动负债 * /
        # 短期借款
        shortTermBorrowing = lines[43].split()[1:-1]
        # 交易性金融负债
        transactionFinancialLiabilities = lines[44].split()[1:-1]
        # 应付票据
        billsPayable = lines[45].split()[1:-1]
        # 应付账款
        accountsPayable = lines[46].split()[1:-1]
        # 预收款项
        accountsReceivedInAdvance = lines[47].split()[1:-1]
        # 应付手续费及佣金
        handlingChargesAndCommissionsPayable = lines[48].split()[1:-1]
        # 应付职工薪酬
        employeeBenefitsPayable = lines[49].split()[1:-1]
        # 应交税费
        taxesAndSurchargesPayable = lines[50].split()[1:-1]
        # 应付利息
        interestPayable = lines[51].split()[1:-1]
        # 应付股利
        dividendpayable = lines[52].split()[1:-1]
        # 其他应付款
        otherPayables = lines[53].split()[1:-1]
        # 预提费用
        withholdingExpenses = lines[54].split()[1:-1]
        # 一年内的递延收益
        deferredIncomeWithinOneYear = lines[55].split()[1:-1]
        # 应付短期债券
        shortTermDebenturesPayable = lines[56].split()[1:-1]
        # 一年内到期的非流动负债
        nonCurrentLiabilitiesMaturingWithinOneYear = lines[57].split()[1:-1]
        # 其他流动负债
        otherCurrentLiability = lines[58].split()[1:-1]
        # 流动负债合计
        totalCurrentLiabilities = lines[59].split()[1:-1]

        # / *非流动负债 * /
        # 长期借款
        LongTermBorrowing = lines[61].split()[1:-1]
        # 应付债券
        bondPayable = lines[62].split()[1:-1]
        # 长期应付款
        longTermPayables = lines[63].split()[1:-1]
        # 长期应付职工薪酬
        longTermEmployeeBenefitsPayable = lines[64].split()[1:-1]
        # 专项应付款
        specialPayable = lines[65].split()[1:-1]
        # 预计非流动负债
        expectedNonCurrentLiabilities = lines[66].split()[1:-1]
        # 递延所得税负债
        deferredIncomeTaxLiabilities = lines[67].split()[1:-1]
        # 长期递延收益
        longTermDeferredRevenue = lines[68].split()[1:-1]
        # 其他非流动负债
        otherNonCurrentLiabilities = lines[69].split()[1:-1]
        # 非流动负债合计
        totalNonCurrentLiabilities = lines[70].split()[1:-1]
        # 负债合计
        totalLiabilities = lines[71].split()[1:-1]

        # / *所有者权益 * /
        # 实收资本(或股本)
        totalShareCapital = lines[73].split()[1:-1]

        # 资本公积
        capitalSurplus = lines[74].split()[1:-1]
        # 减:库存股
        treasuryStock = lines[75].split()[1:-1]
        # 其他综合收益
        otherComprehensiveIncome = lines[76].split()[1:-1]
        # 专项储备
        theSpecialReserve = lines[77].split()[1:-1]

        # 盈余公积
        surplusReserves = lines[78].split()[1:-1]
        # 一般风险准备
        generalRiskPreparation = lines[79].split()[1:-1]
        # 未分配利润
        undistributedProfits = lines[80].split()[1:-1]
        # 归属于母公司股东权益合计(净资产)
        bookValue = lines[81].split()[1:-1]

        # 少数股东权益
        minorityBookValue = lines[82].split()[1:-1]

        # 所有者权益(或股东权益)合计
        totalBookValue = lines[83].split()[1:-1]

        # 负债和所有者权益(或股东权益)总计
        totalLiabilitiesAndOwnersEquity = lines[84].split()[1:-1]

        result_json = []
        for idx, _ in enumerate(reportDate):
            if start_date:
                if pd.Timestamp(reportDate[idx]) < pd.Timestamp(start_date):
                    continue

            reportEventDate = get_report_event_date(
                security_item, report_date=reportDate[idx])

            if report_period and not is_same_date(report_period,
                                                  reportDate[idx]):
                continue

            # use report_event_date to filter the reportEventDate before it for not getting future data
            if report_event_date and pd.Timestamp(
                    report_event_date) < pd.Timestamp(reportEventDate):
                continue

            the_json = {
                "id":
                '{}_{}'.format(security_item["id"], reportDate[idx]),
                "reportDate":
                to_time_str(reportDate[idx]),
                "reportEventDate":
                reportEventDate,
                "securityId":
                security_item["id"],
                "code":
                security_item["code"],
                # 货币资金
                "moneyFunds":
                to_float(moneyFunds[idx]),
                # 交易性金融资产
                "heldForTradingFinancialAssets":
                to_float(heldForTradingFinancialAssets[idx]),
                # 衍生金融资产
                "derivative":
                to_float(derivative[idx]),
                # 应收票据
                "billsReceivable":
                to_float(billsReceivable[idx]),
                # 应收账款
                "accountsReceivable":
                to_float(accountsReceivable[idx]),
                # 预付款项
                "prepaidAccounts":
                to_float(prepaidAccounts[idx]),
                # 应收利息
                "interestReceivable":
                to_float(interestReceivable[idx]),
                # 应收股利
                "dividendReceivable":
                to_float(dividendReceivable[idx]),
                # 其他应收款
                "otherReceivables":
                to_float(otherReceivables[idx]),

                # 买入返售金融资产
                "buyingBackTheSaleOfFinancialAssets":
                to_float(buyingBackTheSaleOfFinancialAssets[idx]),
                # 存货
                "inventory":
                to_float(inventory[idx]),
                # 划分为持有待售的资产
                "assetsForSale":
                to_float(assetsForSale[idx]),
                # 一年内到期的非流动资产
                "nonCurrentAssetsDueWithinOneYear":
                to_float(nonCurrentAssetsDueWithinOneYear[idx]),

                # 待摊费用
                "unamortizedExpenditures":
                to_float(unamortizedExpenditures[idx]),
                # 待处理流动资产损益
                "waitDealIntangibleAssetsLossOrIncome":
                to_float(waitDealIntangibleAssetsLossOrIncome[idx]),

                # 其他流动资产
                "otherCurrentAssets":
                to_float(otherCurrentAssets[idx]),
                # 流动资产合计
                "totalCurrentAssets":
                to_float(totalCurrentAssets[idx]),

                # 非流动资产

                # 发放贷款及垫款
                "loansAndPaymentsOnBehalf":
                to_float(loansAndPaymentsOnBehalf[idx]),

                # 可供出售金融资产
                "availableForSaleFinancialAssets":
                to_float(availableForSaleFinancialAssets[idx]),
                # 持有至到期投资
                "heldToMaturityInvestment":
                to_float(heldToMaturityInvestment[idx]),
                # 长期应收款
                "longTermReceivables":
                to_float(longTermReceivables[idx]),
                # 长期股权投资
                "longTermEquityInvestment":
                to_float(longTermEquityInvestment[idx]),
                # 投资性房地产
                "investmentRealEstate":
                to_float(investmentRealEstate[idx]),
                # 固定资产净额
                "NetfixedAssets":
                to_float(NetfixedAssets[idx]),
                # 在建工程
                "constructionInProcess":
                to_float(constructionInProcess[idx]),
                # 工程物资
                "engineerMaterial":
                to_float(engineerMaterial[idx]),
                # 固定资产清理
                "fixedAssetsInLiquidation":
                to_float(fixedAssetsInLiquidation[idx]),
                # 生产性生物资产
                "productiveBiologicalAssets":
                to_float(productiveBiologicalAssets[idx]),
                # 公益性生物资产
                "nonProfitLivingAssets":
                to_float(nonProfitLivingAssets[idx]),
                # 油气资产
                "oilAndGasAssets":
                to_float(oilAndGasAssets[idx]),
                # 无形资产
                "intangibleAssets":
                to_float(intangibleAssets[idx]),
                # 开发支出
                "developmentExpenditure":
                to_float(developmentExpenditure[idx]),
                # 商誉
                "goodwill":
                to_float(goodwill[idx]),
                # 长期待摊费用
                "longTermDeferredExpenses":
                to_float(longTermDeferredExpenses[idx]),
                # 递延所得税资产
                "deferredIncomeTaxAssets":
                to_float(deferredIncomeTaxAssets[idx]),
                # 其他非流动资产
                "OtherNonCurrentAssets":
                to_float(OtherNonCurrentAssets[idx]),
                # 非流动资产合计
                "nonCurrentAssets":
                to_float(nonCurrentAssets[idx]),
                # 资产总计
                "totalAssets":
                to_float(totalAssets[idx]),

                # / *流动负债 * /
                # 短期借款
                "shortTermBorrowing":
                to_float(shortTermBorrowing[idx]),
                # 交易性金融负债
                "transactionFinancialLiabilities":
                to_float(transactionFinancialLiabilities[idx]),
                # 应付票据
                "billsPayable":
                to_float(billsPayable[idx]),
                # 应付账款
                "accountsPayable":
                to_float(accountsPayable[idx]),
                # 预收款项
                "accountsReceivedInAdvance":
                to_float(accountsReceivedInAdvance[idx]),
                # 应付手续费及佣金
                "handlingChargesAndCommissionsPayable":
                to_float(handlingChargesAndCommissionsPayable[idx]),
                # 应付职工薪酬
                "employeeBenefitsPayable":
                to_float(employeeBenefitsPayable[idx]),
                # 应交税费
                "taxesAndSurchargesPayable":
                to_float(taxesAndSurchargesPayable[idx]),
                # 应付利息
                "interestPayable":
                to_float(interestPayable[idx]),
                # 应付股利
                "dividendpayable":
                to_float(dividendpayable[idx]),
                # 其他应付款
                "otherPayables":
                to_float(otherPayables[idx]),
                # 预提费用
                "withholdingExpenses":
                to_float(withholdingExpenses[idx]),
                # 一年内的递延收益
                "deferredIncomeWithinOneYear":
                to_float(deferredIncomeWithinOneYear[idx]),
                # 应付短期债券
                "shortTermDebenturesPayable":
                to_float(shortTermDebenturesPayable[idx]),
                # 一年内到期的非流动负债
                "nonCurrentLiabilitiesMaturingWithinOneYear":
                to_float(nonCurrentLiabilitiesMaturingWithinOneYear[idx]),
                # 其他流动负债
                "otherCurrentLiability":
                to_float(otherCurrentLiability[idx]),
                # 流动负债合计
                "totalCurrentLiabilities":
                to_float(totalCurrentLiabilities[idx]),

                # / *非流动负债 * /
                # 长期借款
                "LongTermBorrowing":
                to_float(LongTermBorrowing[idx]),
                # 应付债券
                "bondPayable":
                to_float(bondPayable[idx]),
                # 长期应付款
                "longTermPayables":
                to_float(longTermPayables[idx]),
                # 长期应付职工薪酬
                "longTermEmployeeBenefitsPayable":
                to_float(longTermEmployeeBenefitsPayable[idx]),
                # 专项应付款
                "specialPayable":
                to_float(specialPayable[idx]),
                # 预计非流动负债
                "expectedNonCurrentLiabilities":
                to_float(expectedNonCurrentLiabilities[idx]),
                # 递延所得税负债
                "deferredIncomeTaxLiabilities":
                to_float(deferredIncomeTaxLiabilities[idx]),
                # 长期递延收益
                "longTermDeferredRevenue":
                to_float(longTermDeferredRevenue[idx]),
                # 其他非流动负债
                "otherNonCurrentLiabilities":
                to_float(otherNonCurrentLiabilities[idx]),
                # 非流动负债合计
                "totalNonCurrentLiabilities":
                to_float(totalNonCurrentLiabilities[idx]),
                # 负债合计
                "totalLiabilities":
                to_float(totalLiabilities[idx]),

                # / *所有者权益 * /
                # 实收资本(或股本)
                "totalShareCapital":
                to_float(totalShareCapital[idx]),

                # 资本公积
                "capitalSurplus":
                to_float(capitalSurplus[idx]),
                # 减:库存股
                "treasuryStock":
                to_float(treasuryStock[idx]),
                # 其他综合收益
                "otherComprehensiveIncome":
                to_float(otherComprehensiveIncome[idx]),
                # 专项储备
                "theSpecialReserve":
                to_float(theSpecialReserve[idx]),

                # 盈余公积
                "surplusReserves":
                to_float(surplusReserves[idx]),
                # 一般风险准备
                "generalRiskPreparation":
                to_float(generalRiskPreparation[idx]),
                # 未分配利润
                "undistributedProfits":
                to_float(undistributedProfits[idx]),
                # 归属于母公司股东权益合计(净资产)
                "bookValue":
                to_float(bookValue[idx]),

                # 少数股东权益
                "minorityBookValue":
                to_float(minorityBookValue[idx]),

                # 所有者权益(或股东权益)合计
                "totalBookValue":
                to_float(totalBookValue[idx]),

                # 负债和所有者权益(或股东权益)总计
                "totalLiabilitiesAndOwnersEquity":
                to_float(totalLiabilitiesAndOwnersEquity[idx])
            }

            if report_period and is_same_date(report_period, reportDate[idx]):
                return the_json

            result_json.append(the_json)

        if (result_json):
            result_json = sorted(result_json,
                                 key=lambda x: pd.Timestamp(x['reportDate']))
        return result_json
Exemplo n.º 24
0
def get_income_statement_items(security_item, start_date=None, report_period=None, report_event_date=None):
    path = get_income_statement_path(security_item)
    if not os.path.exists(path):
        return []
    encoding = settings.DOWNLOAD_TXT_ENCODING if settings.DOWNLOAD_TXT_ENCODING else detect_encoding(
        url='file://' + os.path.abspath(path)).get('encoding')

    with open(path, encoding=encoding) as fr:
        lines = fr.readlines()
        # for idx, line in enumerate(lines):
        #     yield idx, line.split()

        reportDate = lines[0].split()[1:-1]
        # /*营业总收入*/
        # 营业收入
        operatingRevenue = lines[2].split()[1:-1]
        # /*营业总成本*/
        OperatingTotalCosts = lines[4].split()[1:-1]
        # 营业成本
        OperatingCosts = lines[5].split()[1:-1]
        # 营业税金及附加
        businessTaxesAndSurcharges = lines[6].split()[1:-1]
        # 销售费用
        sellingExpenses = lines[7].split()[1:-1]
        # 管理费用
        ManagingCosts = lines[8].split()[1:-1]
        # 财务费用
        financingExpenses = lines[9].split()[1:-1]
        # 资产减值损失
        assetsDevaluation = lines[10].split()[1:-1]
        # 公允价值变动收益
        incomeFromChangesInFairValue = lines[11].split()[1:-1]
        # 投资收益
        investmentIncome = lines[12].split()[1:-1]
        # 其中:对联营企业和合营企业的投资收益
        investmentIncomeFromRelatedEnterpriseAndJointlyOperating = lines[13].split()[1:-1]
        # 汇兑收益
        exchangeGains = lines[14].split()[1:-1]
        # /*营业利润*/
        operatingProfit = lines[15].split()[1:-1]
        # 加:营业外收入
        nonOperatingIncome = lines[16].split()[1:-1]
        # 减:营业外支出
        nonOperatingExpenditure = lines[17].split()[1:-1]
        # 其中:非流动资产处置损失
        disposalLossOnNonCurrentLiability = lines[18].split()[1:-1]
        # /*利润总额*/
        totalProfits = lines[19].split()[1:-1]
        # 减:所得税费用
        incomeTaxExpense = lines[20].split()[1:-1]
        # /*净利润*/
        netProfit = lines[21].split()[1:-1]
        # 归属于母公司所有者的净利润
        netProfitAttributedToParentCompanyOwner = lines[22].split()[1:-1]
        # 少数股东损益
        minorityInterestIncome = lines[23].split()[1:-1]
        # /*每股收益*/
        # 基本每股收益(元/股)
        EPS = lines[25].split()[1:-1]
        # 稀释每股收益(元/股)
        dilutedEPS = lines[26].split()[1:-1]
        # /*其他综合收益*/
        otherComprehensiveIncome = lines[27].split()[1:-1]
        # /*综合收益总额*/
        accumulatedOtherComprehensiveIncome = lines[28].split()[1:-1]
        # 归属于母公司所有者的综合收益总额
        attributableToOwnersOfParentCompany = lines[29].split()[1:-1]
        # 归属于少数股东的综合收益总额
        attributableToMinorityShareholders = lines[30].split()[1:-1]

        result_json = []
        for idx, _ in enumerate(reportDate):
            if start_date:
                if pd.Timestamp(reportDate[idx]) < pd.Timestamp(start_date):
                    continue

            if report_period and not is_same_date(report_period, reportDate[idx]):
                continue

            reportEventDate = get_report_event_date(security_item, report_date=reportDate[idx])

            # use report_event_date to filter the reportEventDate before it for not getting future data
            if report_event_date and pd.Timestamp(report_event_date) < pd.Timestamp(reportEventDate):
                continue

            the_json = {
                "id": '{}_{}'.format(security_item["id"], reportDate[idx]),
                "reportDate": to_time_str(reportDate[idx]),
                "reportEventDate": reportEventDate,
                "securityId": security_item["id"],
                "code": security_item["code"],
                # /*营业总收入*/
                # 营业收入
                "operatingRevenue": to_float(operatingRevenue[idx]),
                # /*营业总成本*/
                "OperatingTotalCosts": to_float(OperatingTotalCosts[idx]),
                # 营业成本
                "OperatingCosts": to_float(OperatingCosts[idx]),
                # 营业税金及附加
                "businessTaxesAndSurcharges": to_float(businessTaxesAndSurcharges[idx]),
                # 销售费用
                "sellingExpenses": to_float(sellingExpenses[idx]),
                # 管理费用
                "ManagingCosts": to_float(ManagingCosts[idx]),
                # 财务费用
                "financingExpenses": to_float(financingExpenses[idx]),
                # 资产减值损失
                "assetsDevaluation": to_float(assetsDevaluation[idx]),
                # 公允价值变动收益
                "incomeFromChangesInFairValue": to_float(incomeFromChangesInFairValue[idx]),
                # 投资收益
                "investmentIncome": to_float(investmentIncome[idx]),
                # 其中:对联营企业和合营企业的投资收益
                "investmentIncomeFromRelatedEnterpriseAndJointlyOperating":
                    investmentIncomeFromRelatedEnterpriseAndJointlyOperating[idx],
                # 汇兑收益
                "exchangeGains": to_float(exchangeGains[idx]),
                # /*营业利润*/
                "operatingProfit": to_float(operatingProfit[idx]),
                # 加:营业外收入
                "nonOperatingIncome": to_float(nonOperatingIncome[idx]),
                # 减:营业外支出
                "nonOperatingExpenditure": to_float(nonOperatingExpenditure[idx]),
                # 其中:非流动资产处置损失
                "disposalLossOnNonCurrentLiability": to_float(disposalLossOnNonCurrentLiability[idx]),
                # /*利润总额*/
                "totalProfits": to_float(totalProfits[idx]),
                # 减:所得税费用
                "incomeTaxExpense": to_float(incomeTaxExpense[idx]),
                # /*净利润*/
                "netProfit": to_float(netProfit[idx]),
                # 归属于母公司所有者的净利润
                "netProfitAttributedToParentCompanyOwner": to_float(netProfitAttributedToParentCompanyOwner[idx]),
                # 少数股东损益
                "minorityInterestIncome": to_float(minorityInterestIncome[idx]),
                # /*每股收益*/
                # 基本每股收益(元/股)
                "EPS": to_float(EPS[idx]),
                # 稀释每股收益(元/股)
                "dilutedEPS": to_float(dilutedEPS[idx]),
                # /*其他综合收益*/
                "otherComprehensiveIncome": to_float(otherComprehensiveIncome[idx]),
                # /*综合收益总额*/
                "accumulatedOtherComprehensiveIncome": to_float(accumulatedOtherComprehensiveIncome[idx]),
                # 归属于母公司所有者的综合收益总额
                "attributableToOwnersOfParentCompany": to_float(attributableToOwnersOfParentCompany[idx]),
                # 归属于少数股东的综合收益总额
                "attributableToMinorityShareholders": to_float(attributableToMinorityShareholders[idx])
            }

            if report_period and is_same_date(report_period, reportDate[idx]):
                return the_json

            result_json.append(the_json)

        if result_json:
            result_json = sorted(result_json, key=lambda x: pd.Timestamp(x['reportDate']))
        return result_json
    def download_day_k_data(self, response):
        path = response.meta['path']
        item = response.meta['item']

        try:
            # 已经保存的csv数据
            if os.path.exists(path):
                df_current = pd.read_csv(path, dtype=str)
                # 补全历史数据
                if 'name' not in df_current.columns:
                    df_current['name'] = item['name']
            else:
                df_current = pd.DataFrame()

            tmp_str = response.text

            json_str = tmp_str[tmp_str.index('{'):tmp_str.index('}') + 1]
            tmp_json = json.loads(json_str)

            the_datas = tmp_json['data']

            # 开,高,收,低,量,幅
            the_jsons = []
            pre_json = None

            for the_data in the_datas:
                the_json = {'code': item['code'],
                            'securityId': item['id'],
                            'name': item['name'],
                            'timestamp': to_time_str(the_data[0]),
                            'open': the_data[1],
                            'high': the_data[2],
                            'close': the_data[3],
                            'low': the_data[4],
                            'volume': the_data[5],
                            'changePct': the_data[6]}
                # 有些数据位置不对
                real_high = max(the_data[1], the_data[2], the_data[3], the_data[4])
                if the_json['high'] != real_high:
                    if the_json['close'] == real_high:
                        the_json['close'], the_json['high'] = the_json['high'], the_json['close']
                    elif the_json['open'] == real_high:
                        the_json['open'], the_json['high'] = the_json['high'], the_json['open']
                    elif the_json['low'] == real_high:
                        the_json['low'], the_json['high'] = the_json['high'], the_json['low']

                real_low = min(the_data[1], the_data[2], the_data[3], the_data[4])
                if the_json['low'] != real_low:
                    if the_json['close'] == real_low:
                        the_json['close'], the_json['low'] = the_json['low'], the_json['close']
                    elif the_json['open'] == real_low:
                        the_json['open'], the_json['low'] = the_json['low'], the_json['open']
                    elif the_json['high'] == real_low:
                        the_json['high'], the_json['low'] = the_json['low'], the_json['high']

                # 成交额为估算
                avgPrice = (the_json['open'] + the_json['high'] + the_json['close'] + the_json['low']) / 4
                the_json['turnover'] = avgPrice * the_json['volume']
                if pre_json:
                    the_json['preClose'] = pre_json['close']
                    the_json['change'] = the_json['close'] - pre_json['close']

                # TODO:这些数据目前没有,后面补全
                the_json['turnoverRate'] = 0
                the_json['tCap'] = 0
                the_json['mCap'] = 0
                the_json['factor'] = 0

                pre_json = the_json
                the_jsons.append(the_json)

            # 合并到当前csv中
            df_current = df_current.append(the_jsons, ignore_index=True)

            if item['type'] == 'index':
                df_current = df_current.dropna(subset=KDATA_INDEX_COLUMN_163)
                # 保证col顺序
                df_current = df_current.loc[:, KDATA_COLUMN_INDEX]
            else:
                df_current = df_current.dropna(subset=KDATA_COLUMN_163)
                # 保证col顺序
                df_current = df_current.loc[:, KDATA_COLUMN_STOCK]

            df_current = df_current.drop_duplicates(subset='timestamp', keep='last')
            df_current = df_current.set_index(df_current['timestamp'], drop=False)
            df_current.index = pd.to_datetime(df_current.index)
            df_current = df_current.sort_index()
            df_current.to_csv(path, index=False)
        except Exception as e:
            self.logger.error('error when getting k data url={} error={}'.format(response.url, e))
Exemplo n.º 26
0
def get_income_statement_items(security_item,
                               start_date=None,
                               report_period=None,
                               report_event_date=None,
                               return_type='json'):
    """
    get income statement items.

    Parameters
    ----------
    security_item : SecurityItem or str
        the security item,id or code
    start_date : TimeStamp str or TimeStamp
        start date
    report_period : TimeStamp str or TimeStamp
        the finance report period,eg.'20170331'
    report_event_date : TimeStamp str or TimeStamp
        the finance report published date
    return_type : str
        {'json','doc'},default: 'json'

    Returns
    -------
    list of IncomeStatement
    list of json

    """

    security_item = to_security_item(security_item)

    path = get_income_statement_path(security_item)
    if not os.path.exists(path):
        return []
    encoding = 'GB2312'

    with open(path, encoding=encoding) as fr:
        lines = fr.readlines()
        # for idx, line in enumerate(lines):
        #     yield idx, line.split()

        reportDate = lines[0].split()[1:-1]
        # /*营业总收入*/
        # 营业收入
        operatingRevenue = lines[2].split()[1:-1]
        # /*营业总成本*/
        operatingTotalCosts = lines[4].split()[1:-1]
        # 营业成本
        operatingCosts = lines[5].split()[1:-1]
        # 营业税金及附加
        businessTaxesAndSurcharges = lines[6].split()[1:-1]
        # 销售费用
        sellingExpenses = lines[7].split()[1:-1]
        # 管理费用
        ManagingCosts = lines[8].split()[1:-1]
        # 财务费用
        financingExpenses = lines[9].split()[1:-1]
        # 资产减值损失
        assetsDevaluation = lines[10].split()[1:-1]
        # 公允价值变动收益
        incomeFromChangesInFairValue = lines[11].split()[1:-1]
        # 投资收益
        investmentIncome = lines[12].split()[1:-1]
        # 其中:对联营企业和合营企业的投资收益
        investmentIncomeFromRelatedEnterpriseAndJointlyOperating = lines[
            13].split()[1:-1]
        # 汇兑收益
        exchangeGains = lines[14].split()[1:-1]
        # /*营业利润*/
        operatingProfit = lines[15].split()[1:-1]
        # 加:营业外收入
        nonOperatingIncome = lines[16].split()[1:-1]
        # 减:营业外支出
        nonOperatingExpenditure = lines[17].split()[1:-1]
        # 其中:非流动资产处置损失
        disposalLossOnNonCurrentLiability = lines[18].split()[1:-1]
        # /*利润总额*/
        totalProfits = lines[19].split()[1:-1]
        # 减:所得税费用
        incomeTaxExpense = lines[20].split()[1:-1]
        # /*净利润*/
        netProfit = lines[21].split()[1:-1]
        # 归属于母公司所有者的净利润
        netProfitAttributedToParentCompanyOwner = lines[22].split()[1:-1]
        # 少数股东损益
        minorityInterestIncome = lines[23].split()[1:-1]
        # /*每股收益*/
        # 基本每股收益(元/股)
        EPS = lines[25].split()[1:-1]
        # 稀释每股收益(元/股)
        dilutedEPS = lines[26].split()[1:-1]
        # /*其他综合收益*/
        otherComprehensiveIncome = lines[27].split()[1:-1]
        # /*综合收益总额*/
        accumulatedOtherComprehensiveIncome = lines[28].split()[1:-1]
        # 归属于母公司所有者的综合收益总额
        attributableToOwnersOfParentCompany = lines[29].split()[1:-1]
        # 归属于少数股东的综合收益总额
        attributableToMinorityShareholders = lines[30].split()[1:-1]

        result_list = []
        for idx, _ in enumerate(reportDate):
            if start_date:
                if pd.Timestamp(reportDate[idx]) < pd.Timestamp(start_date):
                    continue

            if report_period and not is_same_date(report_period,
                                                  reportDate[idx]):
                continue

            reportEventDate = get_report_event_date(
                security_item, report_date=reportDate[idx])

            # use report_event_date to filter the reportEventDate before it for not getting future data
            if report_event_date and pd.Timestamp(
                    report_event_date) < pd.Timestamp(reportEventDate):
                continue

            the_json = {
                "id":
                '{}_{}'.format(security_item["id"], reportDate[idx]),
                "reportDate":
                to_time_str(reportDate[idx]),
                "reportEventDate":
                reportEventDate,
                "securityId":
                security_item["id"],
                "code":
                security_item["code"],
                # /*营业总收入*/
                # 营业收入
                "operatingRevenue":
                to_float(operatingRevenue[idx]),
                # /*营业总成本*/
                "operatingTotalCosts":
                to_float(operatingTotalCosts[idx]),
                # 营业成本
                "operatingCosts":
                to_float(operatingCosts[idx]),
                # 营业税金及附加
                "businessTaxesAndSurcharges":
                to_float(businessTaxesAndSurcharges[idx]),
                # 销售费用
                "sellingExpenses":
                to_float(sellingExpenses[idx]),
                # 管理费用
                "ManagingCosts":
                to_float(ManagingCosts[idx]),
                # 财务费用
                "financingExpenses":
                to_float(financingExpenses[idx]),
                # 资产减值损失
                "assetsDevaluation":
                to_float(assetsDevaluation[idx]),
                # 公允价值变动收益
                "incomeFromChangesInFairValue":
                to_float(incomeFromChangesInFairValue[idx]),
                # 投资收益
                "investmentIncome":
                to_float(investmentIncome[idx]),
                # 其中:对联营企业和合营企业的投资收益
                "investmentIncomeFromRelatedEnterpriseAndJointlyOperating":
                investmentIncomeFromRelatedEnterpriseAndJointlyOperating[idx],
                # 汇兑收益
                "exchangeGains":
                to_float(exchangeGains[idx]),
                # /*营业利润*/
                "operatingProfit":
                to_float(operatingProfit[idx]),
                # 加:营业外收入
                "nonOperatingIncome":
                to_float(nonOperatingIncome[idx]),
                # 减:营业外支出
                "nonOperatingExpenditure":
                to_float(nonOperatingExpenditure[idx]),
                # 其中:非流动资产处置损失
                "disposalLossOnNonCurrentLiability":
                to_float(disposalLossOnNonCurrentLiability[idx]),
                # /*利润总额*/
                "totalProfits":
                to_float(totalProfits[idx]),
                # 减:所得税费用
                "incomeTaxExpense":
                to_float(incomeTaxExpense[idx]),
                # /*净利润*/
                "netProfit":
                to_float(netProfit[idx]),
                # 归属于母公司所有者的净利润
                "netProfitAttributedToParentCompanyOwner":
                to_float(netProfitAttributedToParentCompanyOwner[idx]),
                # 少数股东损益
                "minorityInterestIncome":
                to_float(minorityInterestIncome[idx]),
                # /*每股收益*/
                # 基本每股收益(元/股)
                "EPS":
                to_float(EPS[idx]),
                # 稀释每股收益(元/股)
                "dilutedEPS":
                to_float(dilutedEPS[idx]),
                # /*其他综合收益*/
                "otherComprehensiveIncome":
                to_float(otherComprehensiveIncome[idx]),
                # /*综合收益总额*/
                "accumulatedOtherComprehensiveIncome":
                to_float(accumulatedOtherComprehensiveIncome[idx]),
                # 归属于母公司所有者的综合收益总额
                "attributableToOwnersOfParentCompany":
                to_float(attributableToOwnersOfParentCompany[idx]),
                # 归属于少数股东的综合收益总额
                "attributableToMinorityShareholders":
                to_float(attributableToMinorityShareholders[idx])
            }

            the_data = the_json

            if return_type == 'doc':
                the_data = IncomeStatement(meta={'id': the_json['id']})
                fill_doc_type(the_data, the_json)

            if report_period and is_same_date(report_period, reportDate[idx]):
                return the_data

            result_list.append(the_data)

        if result_list:
            result_list = sorted(result_list,
                                 key=lambda x: pd.Timestamp(x['reportDate']))
        return result_list
Exemplo n.º 27
0
def get_income_statement_items(security_item,
                               start_date=None,
                               report_period=None,
                               report_event_date=None):
    path = get_income_statement_path(security_item)
    if not os.path.exists(path):
        return []
    encoding = settings.DOWNLOAD_TXT_ENCODING if settings.DOWNLOAD_TXT_ENCODING else detect_encoding(
        url='file://' + os.path.abspath(path)).get('encoding')

    with open(path, encoding=encoding) as fr:
        lines = fr.readlines()
        # for idx, line in enumerate(lines):
        #     yield idx, line.split()

        reportDate = lines[0].split()[1:-1]
        # /*营业总收入*/
        # 营业收入
        operatingRevenue = lines[2].split()[1:-1]
        # /*营业总成本*/
        OperatingTotalCosts = lines[4].split()[1:-1]
        # 营业成本
        OperatingCosts = lines[5].split()[1:-1]
        # 营业税金及附加
        businessTaxesAndSurcharges = lines[6].split()[1:-1]
        # 销售费用
        sellingExpenses = lines[7].split()[1:-1]
        # 管理费用
        ManagingCosts = lines[8].split()[1:-1]
        # 财务费用
        financingExpenses = lines[9].split()[1:-1]
        # 资产减值损失
        assetsDevaluation = lines[10].split()[1:-1]
        # 公允价值变动收益
        incomeFromChangesInFairValue = lines[11].split()[1:-1]
        # 投资收益
        investmentIncome = lines[12].split()[1:-1]
        # 其中:对联营企业和合营企业的投资收益
        investmentIncomeFromRelatedEnterpriseAndJointlyOperating = lines[
            13].split()[1:-1]
        # 汇兑收益
        exchangeGains = lines[14].split()[1:-1]
        # /*营业利润*/
        operatingProfit = lines[15].split()[1:-1]
        # 加:营业外收入
        nonOperatingIncome = lines[16].split()[1:-1]
        # 减:营业外支出
        nonOperatingExpenditure = lines[17].split()[1:-1]
        # 其中:非流动资产处置损失
        disposalLossOnNonCurrentLiability = lines[18].split()[1:-1]
        # /*利润总额*/
        totalProfits = lines[19].split()[1:-1]
        # 减:所得税费用
        incomeTaxExpense = lines[20].split()[1:-1]
        # /*净利润*/
        netProfit = lines[21].split()[1:-1]
        # 归属于母公司所有者的净利润
        netProfitAttributedToParentCompanyOwner = lines[22].split()[1:-1]
        # 少数股东损益
        minorityInterestIncome = lines[23].split()[1:-1]
        # /*每股收益*/
        # 基本每股收益(元/股)
        EPS = lines[25].split()[1:-1]
        # 稀释每股收益(元/股)
        dilutedEPS = lines[26].split()[1:-1]
        # /*其他综合收益*/
        otherComprehensiveIncome = lines[27].split()[1:-1]
        # /*综合收益总额*/
        accumulatedOtherComprehensiveIncome = lines[28].split()[1:-1]
        # 归属于母公司所有者的综合收益总额
        attributableToOwnersOfParentCompany = lines[29].split()[1:-1]
        # 归属于少数股东的综合收益总额
        attributableToMinorityShareholders = lines[30].split()[1:-1]

        result_json = []
        for idx, _ in enumerate(reportDate):
            if start_date:
                if pd.Timestamp(reportDate[idx]) < pd.Timestamp(start_date):
                    continue

            if report_period and not is_same_date(report_period,
                                                  reportDate[idx]):
                continue

            reportEventDate = get_report_event_date(
                security_item, report_date=reportDate[idx])

            # use report_event_date to filter the reportEventDate before it for not getting future data
            if report_event_date and pd.Timestamp(
                    report_event_date) < pd.Timestamp(reportEventDate):
                continue

            the_json = {
                "id":
                '{}_{}'.format(security_item["id"], reportDate[idx]),
                "reportDate":
                to_time_str(reportDate[idx]),
                "reportEventDate":
                reportEventDate,
                "securityId":
                security_item["id"],
                "code":
                security_item["code"],
                # /*营业总收入*/
                # 营业收入
                "operatingRevenue":
                to_float(operatingRevenue[idx]),
                # /*营业总成本*/
                "OperatingTotalCosts":
                to_float(OperatingTotalCosts[idx]),
                # 营业成本
                "OperatingCosts":
                to_float(OperatingCosts[idx]),
                # 营业税金及附加
                "businessTaxesAndSurcharges":
                to_float(businessTaxesAndSurcharges[idx]),
                # 销售费用
                "sellingExpenses":
                to_float(sellingExpenses[idx]),
                # 管理费用
                "ManagingCosts":
                to_float(ManagingCosts[idx]),
                # 财务费用
                "financingExpenses":
                to_float(financingExpenses[idx]),
                # 资产减值损失
                "assetsDevaluation":
                to_float(assetsDevaluation[idx]),
                # 公允价值变动收益
                "incomeFromChangesInFairValue":
                to_float(incomeFromChangesInFairValue[idx]),
                # 投资收益
                "investmentIncome":
                to_float(investmentIncome[idx]),
                # 其中:对联营企业和合营企业的投资收益
                "investmentIncomeFromRelatedEnterpriseAndJointlyOperating":
                investmentIncomeFromRelatedEnterpriseAndJointlyOperating[idx],
                # 汇兑收益
                "exchangeGains":
                to_float(exchangeGains[idx]),
                # /*营业利润*/
                "operatingProfit":
                to_float(operatingProfit[idx]),
                # 加:营业外收入
                "nonOperatingIncome":
                to_float(nonOperatingIncome[idx]),
                # 减:营业外支出
                "nonOperatingExpenditure":
                to_float(nonOperatingExpenditure[idx]),
                # 其中:非流动资产处置损失
                "disposalLossOnNonCurrentLiability":
                to_float(disposalLossOnNonCurrentLiability[idx]),
                # /*利润总额*/
                "totalProfits":
                to_float(totalProfits[idx]),
                # 减:所得税费用
                "incomeTaxExpense":
                to_float(incomeTaxExpense[idx]),
                # /*净利润*/
                "netProfit":
                to_float(netProfit[idx]),
                # 归属于母公司所有者的净利润
                "netProfitAttributedToParentCompanyOwner":
                to_float(netProfitAttributedToParentCompanyOwner[idx]),
                # 少数股东损益
                "minorityInterestIncome":
                to_float(minorityInterestIncome[idx]),
                # /*每股收益*/
                # 基本每股收益(元/股)
                "EPS":
                to_float(EPS[idx]),
                # 稀释每股收益(元/股)
                "dilutedEPS":
                to_float(dilutedEPS[idx]),
                # /*其他综合收益*/
                "otherComprehensiveIncome":
                to_float(otherComprehensiveIncome[idx]),
                # /*综合收益总额*/
                "accumulatedOtherComprehensiveIncome":
                to_float(accumulatedOtherComprehensiveIncome[idx]),
                # 归属于母公司所有者的综合收益总额
                "attributableToOwnersOfParentCompany":
                to_float(attributableToOwnersOfParentCompany[idx]),
                # 归属于少数股东的综合收益总额
                "attributableToMinorityShareholders":
                to_float(attributableToMinorityShareholders[idx])
            }

            if report_period and is_same_date(report_period, reportDate[idx]):
                return the_json

            result_json.append(the_json)

        if result_json:
            result_json = sorted(result_json,
                                 key=lambda x: pd.Timestamp(x['reportDate']))
        return result_json
Exemplo n.º 28
0
def get_kdata(security_item,
              the_date=None,
              start_date=None,
              end_date=None,
              fuquan='bfq',
              dtype=None,
              source='163',
              level='day'):
    """
    get kdata.

    Parameters
    ----------
    security_item : SecurityItem or str
        the security item,id or code
    the_date : TimeStamp str or TimeStamp
        get the kdata for the exact date
    start_date : TimeStamp str or TimeStamp
        start date
    end_date : TimeStamp str or TimeStamp
        end date
    fuquan : str
        {"qfq","hfq","bfq"},default:"bfq"
    dtype : type
        the data type for the csv column,default: None
    source : str
        the data source,{'163','sina'},default: '163'
    level : str or int
        the kdata level,{1,5,15,30,60,'day','week','month'},default : 'day'

    Returns
    -------
    DataFrame

    """

    security_item = to_security_item(security_item)

    # 163的数据是合并过的,有复权因子,都存在'bfq'目录下,只需从一个地方取数据,并做相应转换
    if source == '163':
        the_path = files_contract.get_kdata_path(security_item,
                                                 source=source,
                                                 fuquan='bfq')
    else:
        the_path = files_contract.get_kdata_path(security_item,
                                                 source=source,
                                                 fuquan=fuquan)

    if os.path.isfile(the_path):
        if not dtype:
            dtype = {"code": str, 'timestamp': str}
        df = pd.read_csv(the_path, dtype=dtype)

        df.timestamp = df.timestamp.apply(lambda x: to_time_str(x))
        df = df.set_index(df['timestamp'], drop=False)
        df.index = pd.to_datetime(df.index)
        df = df.sort_index()
        if the_date:
            if the_date in df.index:
                return df.loc[the_date]
            else:
                return pd.DataFrame()

        if not start_date:
            if security_item['type'] == 'stock':
                if type(security_item['listDate']) != str and np.isnan(
                        security_item['listDate']):
                    start_date = '2002-01-01'
                else:
                    start_date = security_item['listDate']
            else:
                start_date = datetime.datetime.today() - datetime.timedelta(
                    days=30)
        if not end_date:
            end_date = datetime.datetime.today()

        if start_date and end_date:
            df = df.loc[start_date:end_date]

        #
        if source == '163' and security_item['type'] == 'stock':
            if fuquan == 'bfq':
                return df
            if 'factor' in df.columns:
                current_factor = df.tail(1).factor.iat[0]
                # 后复权是不变的
                df.close *= df.factor
                df.open *= df.factor
                df.high *= df.factor
                df.low *= df.factor
                if fuquan == 'qfq':
                    # 前复权需要根据最新的factor往回算
                    df.close /= current_factor
                    df.open /= current_factor
                    df.high /= current_factor
                    df.low /= current_factor
        return df
    return pd.DataFrame()
Exemplo n.º 29
0
def get_balance_sheet_items(security_item, start_date=None, report_period=None, report_event_date=None):
    path = get_balance_sheet_path(security_item)
    if not os.path.exists(path):
        return []
    encoding = settings.DOWNLOAD_TXT_ENCODING if settings.DOWNLOAD_TXT_ENCODING else detect_encoding(
        url='file://' + os.path.abspath(path)).get('encoding')

    with open(path, encoding=encoding) as fr:
        lines = fr.readlines()

        # for idx, line in enumerate(lines):
        #     yield idx, line.split()

        reportDate = lines[0].split()[1:-1]
        # 货币资金
        moneyFunds = lines[3].split()[1:-1]
        # 交易性金融资产
        heldForTradingFinancialAssets = lines[4].split()[1:-1]
        # 衍生金融资产
        derivative = lines[5].split()[1:-1]
        # 应收票据
        billsReceivable = lines[6].split()[1:-1]
        # 应收账款
        accountsReceivable = lines[7].split()[1:-1]
        # 预付款项
        prepaidAccounts = lines[8].split()[1:-1]
        # 应收利息
        interestReceivable = lines[9].split()[1:-1]
        # 应收股利
        dividendReceivable = lines[10].split()[1:-1]
        # 其他应收款
        otherReceivables = lines[11].split()[1:-1]

        # 买入返售金融资产
        buyingBackTheSaleOfFinancialAssets = lines[12].split()[1:-1]
        # 存货
        inventory = lines[13].split()[1:-1]
        # 划分为持有待售的资产
        assetsForSale = lines[14].split()[1:-1]
        # 一年内到期的非流动资产
        nonCurrentAssetsDueWithinOneYear = lines[15].split()[1:-1]

        # 待摊费用
        unamortizedExpenditures = lines[16].split()[1:-1]
        # 待处理流动资产损益
        waitDealIntangibleAssetsLossOrIncome = lines[17].split()[1:-1]

        # 其他流动资产
        otherCurrentAssets = lines[18].split()[1:-1]
        # 流动资产合计
        totalCurrentAssets = lines[19].split()[1:-1]

        # 非流动资产

        # 发放贷款及垫款
        loansAndPaymentsOnBehalf = lines[21].split()[1:-1]

        # 可供出售金融资产
        availableForSaleFinancialAssets = lines[22].split()[1:-1]
        # 持有至到期投资
        heldToMaturityInvestment = lines[23].split()[1:-1]
        # 长期应收款
        longTermReceivables = lines[24].split()[1:-1]
        # 长期股权投资
        longTermEquityInvestment = lines[25].split()[1:-1]
        # 投资性房地产
        investmentRealEstate = lines[26].split()[1:-1]
        # 固定资产净额
        NetfixedAssets = lines[27].split()[1:-1]
        # 在建工程
        constructionInProcess = lines[28].split()[1:-1]
        # 工程物资
        engineerMaterial = lines[29].split()[1:-1]
        # 固定资产清理
        fixedAssetsInLiquidation = lines[30].split()[1:-1]
        # 生产性生物资产
        productiveBiologicalAssets = lines[31].split()[1:-1]
        # 公益性生物资产
        nonProfitLivingAssets = lines[32].split()[1:-1]
        # 油气资产
        oilAndGasAssets = lines[33].split()[1:-1]
        # 无形资产
        intangibleAssets = lines[34].split()[1:-1]
        # 开发支出
        developmentExpenditure = lines[35].split()[1:-1]
        # 商誉
        goodwill = lines[36].split()[1:-1]
        # 长期待摊费用
        longTermDeferredExpenses = lines[37].split()[1:-1]
        # 递延所得税资产
        deferredIncomeTaxAssets = lines[38].split()[1:-1]
        # 其他非流动资产
        OtherNonCurrentAssets = lines[39].split()[1:-1]
        # 非流动资产合计
        nonCurrentAssets = lines[40].split()[1:-1]
        # 资产总计
        totalAssets = lines[41].split()[1:-1]

        # / *流动负债 * /
        # 短期借款
        shortTermBorrowing = lines[43].split()[1:-1]
        # 交易性金融负债
        transactionFinancialLiabilities = lines[44].split()[1:-1]
        # 应付票据
        billsPayable = lines[45].split()[1:-1]
        # 应付账款
        accountsPayable = lines[46].split()[1:-1]
        # 预收款项
        accountsReceivedInAdvance = lines[47].split()[1:-1]
        # 应付手续费及佣金
        handlingChargesAndCommissionsPayable = lines[48].split()[1:-1]
        # 应付职工薪酬
        employeeBenefitsPayable = lines[49].split()[1:-1]
        # 应交税费
        taxesAndSurchargesPayable = lines[50].split()[1:-1]
        # 应付利息
        interestPayable = lines[51].split()[1:-1]
        # 应付股利
        dividendpayable = lines[52].split()[1:-1]
        # 其他应付款
        otherPayables = lines[53].split()[1:-1]
        # 预提费用
        withholdingExpenses = lines[54].split()[1:-1]
        # 一年内的递延收益
        deferredIncomeWithinOneYear = lines[55].split()[1:-1]
        # 应付短期债券
        shortTermDebenturesPayable = lines[56].split()[1:-1]
        # 一年内到期的非流动负债
        nonCurrentLiabilitiesMaturingWithinOneYear = lines[57].split()[1:-1]
        # 其他流动负债
        otherCurrentLiability = lines[58].split()[1:-1]
        # 流动负债合计
        totalCurrentLiabilities = lines[59].split()[1:-1]

        # / *非流动负债 * /
        # 长期借款
        LongTermBorrowing = lines[61].split()[1:-1]
        # 应付债券
        bondPayable = lines[62].split()[1:-1]
        # 长期应付款
        longTermPayables = lines[63].split()[1:-1]
        # 长期应付职工薪酬
        longTermEmployeeBenefitsPayable = lines[64].split()[1:-1]
        # 专项应付款
        specialPayable = lines[65].split()[1:-1]
        # 预计非流动负债
        expectedNonCurrentLiabilities = lines[66].split()[1:-1]
        # 递延所得税负债
        deferredIncomeTaxLiabilities = lines[67].split()[1:-1]
        # 长期递延收益
        longTermDeferredRevenue = lines[68].split()[1:-1]
        # 其他非流动负债
        otherNonCurrentLiabilities = lines[69].split()[1:-1]
        # 非流动负债合计
        totalNonCurrentLiabilities = lines[70].split()[1:-1]
        # 负债合计
        totalLiabilities = lines[71].split()[1:-1]

        # / *所有者权益 * /
        # 实收资本(或股本)
        totalShareCapital = lines[73].split()[1:-1]

        # 资本公积
        capitalSurplus = lines[74].split()[1:-1]
        # 减:库存股
        treasuryStock = lines[75].split()[1:-1]
        # 其他综合收益
        otherComprehensiveIncome = lines[76].split()[1:-1]
        # 专项储备
        theSpecialReserve = lines[77].split()[1:-1]

        # 盈余公积
        surplusReserves = lines[78].split()[1:-1]
        # 一般风险准备
        generalRiskPreparation = lines[79].split()[1:-1]
        # 未分配利润
        undistributedProfits = lines[80].split()[1:-1]
        # 归属于母公司股东权益合计(净资产)
        bookValue = lines[81].split()[1:-1]

        # 少数股东权益
        minorityBookValue = lines[82].split()[1:-1]

        # 所有者权益(或股东权益)合计
        totalBookValue = lines[83].split()[1:-1]

        # 负债和所有者权益(或股东权益)总计
        totalLiabilitiesAndOwnersEquity = lines[84].split()[1:-1]

        result_json = []
        for idx, _ in enumerate(reportDate):
            if start_date:
                if pd.Timestamp(reportDate[idx]) < pd.Timestamp(start_date):
                    continue

            reportEventDate = get_report_event_date(security_item, report_date=reportDate[idx])

            if report_period and not is_same_date(report_period, reportDate[idx]):
                continue

            # use report_event_date to filter the reportEventDate before it for not getting future data
            if report_event_date and pd.Timestamp(report_event_date) < pd.Timestamp(reportEventDate):
                continue

            the_json = {
                "id": '{}_{}'.format(security_item["id"], reportDate[idx]),
                "reportDate": to_time_str(reportDate[idx]),
                "reportEventDate": reportEventDate,
                "securityId": security_item["id"],
                "code": security_item["code"],
                # 货币资金
                "moneyFunds": to_float(moneyFunds[idx]),
                # 交易性金融资产
                "heldForTradingFinancialAssets": to_float(heldForTradingFinancialAssets[idx]),
                # 衍生金融资产
                "derivative": to_float(derivative[idx]),
                # 应收票据
                "billsReceivable": to_float(billsReceivable[idx]),
                # 应收账款
                "accountsReceivable": to_float(accountsReceivable[idx]),
                # 预付款项
                "prepaidAccounts": to_float(prepaidAccounts[idx]),
                # 应收利息
                "interestReceivable": to_float(interestReceivable[idx]),
                # 应收股利
                "dividendReceivable": to_float(dividendReceivable[idx]),
                # 其他应收款
                "otherReceivables": to_float(otherReceivables[idx]),

                # 买入返售金融资产
                "buyingBackTheSaleOfFinancialAssets": to_float(buyingBackTheSaleOfFinancialAssets[idx]),
                # 存货
                "inventory": to_float(inventory[idx]),
                # 划分为持有待售的资产
                "assetsForSale": to_float(assetsForSale[idx]),
                # 一年内到期的非流动资产
                "nonCurrentAssetsDueWithinOneYear": to_float(nonCurrentAssetsDueWithinOneYear[idx]),

                # 待摊费用
                "unamortizedExpenditures": to_float(unamortizedExpenditures[idx]),
                # 待处理流动资产损益
                "waitDealIntangibleAssetsLossOrIncome": to_float(waitDealIntangibleAssetsLossOrIncome[idx]),

                # 其他流动资产
                "otherCurrentAssets": to_float(otherCurrentAssets[idx]),
                # 流动资产合计
                "totalCurrentAssets": to_float(totalCurrentAssets[idx]),

                # 非流动资产

                # 发放贷款及垫款
                "loansAndPaymentsOnBehalf": to_float(loansAndPaymentsOnBehalf[idx]),

                # 可供出售金融资产
                "availableForSaleFinancialAssets": to_float(availableForSaleFinancialAssets[idx]),
                # 持有至到期投资
                "heldToMaturityInvestment": to_float(heldToMaturityInvestment[idx]),
                # 长期应收款
                "longTermReceivables": to_float(longTermReceivables[idx]),
                # 长期股权投资
                "longTermEquityInvestment": to_float(longTermEquityInvestment[idx]),
                # 投资性房地产
                "investmentRealEstate": to_float(investmentRealEstate[idx]),
                # 固定资产净额
                "NetfixedAssets": to_float(NetfixedAssets[idx]),
                # 在建工程
                "constructionInProcess": to_float(constructionInProcess[idx]),
                # 工程物资
                "engineerMaterial": to_float(engineerMaterial[idx]),
                # 固定资产清理
                "fixedAssetsInLiquidation": to_float(fixedAssetsInLiquidation[idx]),
                # 生产性生物资产
                "productiveBiologicalAssets": to_float(productiveBiologicalAssets[idx]),
                # 公益性生物资产
                "nonProfitLivingAssets": to_float(nonProfitLivingAssets[idx]),
                # 油气资产
                "oilAndGasAssets": to_float(oilAndGasAssets[idx]),
                # 无形资产
                "intangibleAssets": to_float(intangibleAssets[idx]),
                # 开发支出
                "developmentExpenditure": to_float(developmentExpenditure[idx]),
                # 商誉
                "goodwill": to_float(goodwill[idx]),
                # 长期待摊费用
                "longTermDeferredExpenses": to_float(longTermDeferredExpenses[idx]),
                # 递延所得税资产
                "deferredIncomeTaxAssets": to_float(deferredIncomeTaxAssets[idx]),
                # 其他非流动资产
                "OtherNonCurrentAssets": to_float(OtherNonCurrentAssets[idx]),
                # 非流动资产合计
                "nonCurrentAssets": to_float(nonCurrentAssets[idx]),
                # 资产总计
                "totalAssets": to_float(totalAssets[idx]),

                # / *流动负债 * /
                # 短期借款
                "shortTermBorrowing": to_float(shortTermBorrowing[idx]),
                # 交易性金融负债
                "transactionFinancialLiabilities": to_float(transactionFinancialLiabilities[idx]),
                # 应付票据
                "billsPayable": to_float(billsPayable[idx]),
                # 应付账款
                "accountsPayable": to_float(accountsPayable[idx]),
                # 预收款项
                "accountsReceivedInAdvance": to_float(accountsReceivedInAdvance[idx]),
                # 应付手续费及佣金
                "handlingChargesAndCommissionsPayable": to_float(handlingChargesAndCommissionsPayable[idx]),
                # 应付职工薪酬
                "employeeBenefitsPayable": to_float(employeeBenefitsPayable[idx]),
                # 应交税费
                "taxesAndSurchargesPayable": to_float(taxesAndSurchargesPayable[idx]),
                # 应付利息
                "interestPayable": to_float(interestPayable[idx]),
                # 应付股利
                "dividendpayable": to_float(dividendpayable[idx]),
                # 其他应付款
                "otherPayables": to_float(otherPayables[idx]),
                # 预提费用
                "withholdingExpenses": to_float(withholdingExpenses[idx]),
                # 一年内的递延收益
                "deferredIncomeWithinOneYear": to_float(deferredIncomeWithinOneYear[idx]),
                # 应付短期债券
                "shortTermDebenturesPayable": to_float(shortTermDebenturesPayable[idx]),
                # 一年内到期的非流动负债
                "nonCurrentLiabilitiesMaturingWithinOneYear": to_float(nonCurrentLiabilitiesMaturingWithinOneYear[idx]),
                # 其他流动负债
                "otherCurrentLiability": to_float(otherCurrentLiability[idx]),
                # 流动负债合计
                "totalCurrentLiabilities": to_float(totalCurrentLiabilities[idx]),

                # / *非流动负债 * /
                # 长期借款
                "LongTermBorrowing": to_float(LongTermBorrowing[idx]),
                # 应付债券
                "bondPayable": to_float(bondPayable[idx]),
                # 长期应付款
                "longTermPayables": to_float(longTermPayables[idx]),
                # 长期应付职工薪酬
                "longTermEmployeeBenefitsPayable": to_float(longTermEmployeeBenefitsPayable[idx]),
                # 专项应付款
                "specialPayable": to_float(specialPayable[idx]),
                # 预计非流动负债
                "expectedNonCurrentLiabilities": to_float(expectedNonCurrentLiabilities[idx]),
                # 递延所得税负债
                "deferredIncomeTaxLiabilities": to_float(deferredIncomeTaxLiabilities[idx]),
                # 长期递延收益
                "longTermDeferredRevenue": to_float(longTermDeferredRevenue[idx]),
                # 其他非流动负债
                "otherNonCurrentLiabilities": to_float(otherNonCurrentLiabilities[idx]),
                # 非流动负债合计
                "totalNonCurrentLiabilities": to_float(totalNonCurrentLiabilities[idx]),
                # 负债合计
                "totalLiabilities": to_float(totalLiabilities[idx]),

                # / *所有者权益 * /
                # 实收资本(或股本)
                "totalShareCapital": to_float(totalShareCapital[idx]),

                # 资本公积
                "capitalSurplus": to_float(capitalSurplus[idx]),
                # 减:库存股
                "treasuryStock": to_float(treasuryStock[idx]),
                # 其他综合收益
                "otherComprehensiveIncome": to_float(otherComprehensiveIncome[idx]),
                # 专项储备
                "theSpecialReserve": to_float(theSpecialReserve[idx]),

                # 盈余公积
                "surplusReserves": to_float(surplusReserves[idx]),
                # 一般风险准备
                "generalRiskPreparation": to_float(generalRiskPreparation[idx]),
                # 未分配利润
                "undistributedProfits": to_float(undistributedProfits[idx]),
                # 归属于母公司股东权益合计(净资产)
                "bookValue": to_float(bookValue[idx]),

                # 少数股东权益
                "minorityBookValue": to_float(minorityBookValue[idx]),

                # 所有者权益(或股东权益)合计
                "totalBookValue": to_float(totalBookValue[idx]),

                # 负债和所有者权益(或股东权益)总计
                "totalLiabilitiesAndOwnersEquity": to_float(totalLiabilitiesAndOwnersEquity[idx])
            }

            if report_period and is_same_date(report_period, reportDate[idx]):
                return the_json

            result_json.append(the_json)

        if (result_json):
            result_json = sorted(result_json, key=lambda x: pd.Timestamp(x['reportDate']))
        return result_json
Exemplo n.º 30
0
def get_subscription_triggered_topic(the_date):
    return 'subscription_triggered_{}'.format(to_time_str(the_date))