Пример #1
0
 def get_fg_index_df(self, cdate):
     df = BlockReader().get_df(ct.TONG_DA_XIN_FG_INDEX_PATH)
     df = df.loc[df.blockname == self.name]
     df = df[['code']]
     df['date'] = cdate
     df = df.reset_index(drop = True)
     return df
Пример #2
0
    def __init__(self):
        self.day_reader = TdxDailyBarReader()
        self.minline_reader = TdxLCMinBarReader()
        self.block_reader = BlockReader()

        # pandas数据显示设置
        pd.set_option('display.max_columns', None)  # 显示所有列
Пример #3
0
    def block(self, group=False, custom=False):
        '''
        获取板块数据

        :param group:
        :param custom:
        :return: pd.dataFrame or None
        '''
        reader = BlockReader()
        symbol = os.path.join(self.tdxdir, 'block_zs.dat')

        if symbol is not None:
            return reader.get_df(symbol, group)

        return None
Пример #4
0
    def index(self, symbol='incon.dat', group=False):
        '''
        获取指数数据

        :param symbol:
        :param group:
        :return: pd.dataFrame or None
        '''
        reader = BlockReader()
        symbol = os.path.join(self.tdxdir, symbol)

        if symbol is not None:
            return reader.get_df(symbol, group)

        return None
Пример #5
0
def main(input, output, datatype):
    """
    通达信数据文件读取
    """

    if datatype == 'daily':
        reader = TdxDailyBarReader()
    elif datatype == 'ex_daily':
        reader = TdxExHqDailyBarReader()
    elif datatype == 'lc':
        reader = TdxLCMinBarReader()
    elif datatype == 'gbbq':
        reader = GbbqReader()
    elif datatype == 'block':
        reader = BlockReader()
    else:
        reader = TdxMinBarReader()

    try:
        df = reader.get_df(input)
        if (output):
            click.echo("写入到文件 : " + output)
            df.to_csv(output)
        else:
            print(df)
    except Exception as e:
        print(str(e))
Пример #6
0
    def block(self, symbol='block', custom=False, group=False):
        '''
        获取板块数据
        参考: http://blog.sina.com.cn/s/blog_623d2d280102vt8y.html

        :param custom:
        :param symbol:
        :param group:
        :return: pd.dataFrame or None
        '''
        if custom:
            reader = CustomerBlockReader()
            vipdoc = os.path.join(self.tdxdir, 'T0002', 'blocknew',
                                  '{}'.format(symbol))
        else:
            reader = BlockReader()
            vipdoc = os.path.join(self.tdxdir, 'T0002', 'hq_cache',
                                  '{}.dat'.format(symbol))

        fmt = TYPE_GROUP if group else None

        if os.path.exists(vipdoc):
            return reader.get_df(vipdoc, format)
        else:
            logger.error('未找到所需的文件: {}'.format(vipdoc))

        return None
Пример #7
0
def main(input, output, datatype):
    """
    通达信数据文件读取
    """

    if datatype == 'daily':
        reader = TdxDailyBarReader()
    elif datatype == 'ex_daily':
        reader = TdxExHqDailyBarReader()
    elif datatype == 'lc':
        reader = TdxLCMinBarReader()
    elif datatype == 'gbbq':
        reader = GbbqReader()
    elif datatype == 'block':
        reader = BlockReader()
    elif datatype == 'customblock':
        reader = CustomerBlockReader()
    elif datatype == 'history_financial' or datatype == 'hf':
        reader = HistoryFinancialReader()
    else:
        reader = TdxMinBarReader()

    try:
        df = reader.get_df(input)
        if output:
            click.echo("写入到文件 : " + output)
            df.to_csv(output)
        else:
            print(df)
    except Exception as e:
        print(str(e))
Пример #8
0
def get_style_block():
    """
    返回股票对应的指数
    block_zs.dat   对应通达信指数板块
    block_gn.dat   对应通达信概念板块
    block_fg.dat   对应通达信风格板块  融资融券 已高送转 近期弱势

    index 为 code
    columns 为指数,如果为指数成份股 则为2
    :return:
    """
    filename = '{}{}{}'.format(TDX_DIR, os.sep,
                               'T0002\\hq_cache\\block_fg.dat')
    return BlockReader().get_df(filename).pivot(index='code',
                                                columns='blockname',
                                                values='block_type')
Пример #9
0
    def _load_instruments(self, market,
                          freq) -> Dict[str, List[Tuple[str, str]]]:
        """
        返回股票对应的指数
        block_zs.dat   对应通达信指数板块
        block_gn.dat   对应通达信概念板块
        block_fg.dat   对应通达信风格板块  融资融券 已高送转 近期弱势

        blockname block_type    code_index    code
          板块名  2表示属于该板块  从0开始的计数   股票代码 6和9开头的是sh
        :param market: block name
        :param freq: no need
        :return -> dict: {instrument: [(begin, end), (begin, end), (begin, end)]}
        """
        freq = str(Freq(freq))

        logger.info(f"get instruments {market} begin......")

        # "czce":28, "dce":29, "shfe":30, "cffex":47
        if market in ["future", "commodity", "czce", "dce", "shfe", "cffex"]:
            if market == "future":
                exchanges = ["czce", "dce", "shfe", "cffex"]
            elif market == "commodity":
                exchanges = ["czce", "dce", "shfe"]
            else:
                exchanges = market

            instruments_list, files_list = self.get_instrument_by_exchange(
                exchanges, freq)
        else:
            file_path = self.block_dir.joinpath("block_zs.dat")
            df = BlockReader().get_df(file_path)

            if market in df["blockname"].unique():
                instruments_series = df[df["blockname"] == market]["code"]
            else:
                instruments_series = df["code"]

            instruments_list = instruments_series \
                .apply(lambda x: "SH" + x if x.startswith("9") or x.startswith("6") else "SZ" + x) \
                .to_list()

            files_list = \
                [get_file_path_from_instrument(self.tdx_path, instrument, freq=freq)[1] for instrument in
                 instruments_list]

        workers = max(min(C.get_kernels(freq), len(instruments_list)), 1)
        with tqdm(total=len(instruments_list)) as p_bar:
            with ProcessPoolExecutor(max_workers=workers) as executor:
                futures = {}
                for instrument, file_path in zip(instruments_list, files_list):
                    futures[executor.submit(get_begin_and_end, file_path,
                                            False)] = instrument
                    p_bar.update()

        error_code = {}
        _instruments = {}
        with tqdm(total=len(futures)) as p_bar:
            for _future in as_completed(futures):
                try:
                    _begin_time, _end_time = _future.result()
                    _instruments.setdefault(futures[_future], []).append(
                        (_begin_time, _end_time))
                except Exception:
                    error_code[futures[_future]] = traceback.format_exc()
                p_bar.update()
        if error_code:
            logger.info(f"instruments errors: {error_code}")

        logger.info(f"get instruments {market} end......")
        return _instruments
Пример #10
0
class TdxLocalHelper:
    BlockReader_TYPE_FLAT = 0
    BlockReader_TYPE_GROUP = 1

    def __init__(self):
        self.day_reader = TdxDailyBarReader()
        self.minline_reader = TdxLCMinBarReader()
        self.block_reader = BlockReader()

        # pandas数据显示设置
        pd.set_option('display.max_columns', None)  # 显示所有列
        #pd.set_option('display.max_rows', None)  # 显示所有行

    # 解析日线文件数据
    #返回值:date:open,high,low,close,amount,volume
    def read_tdx_local_day(self):
        #df = reader.get_df(config.tdx_local_sz_day + "sz000001.day")   sh000001.day
        df = self.day_reader.get_df(config.tdx_local_sh_day + "sh000001.day")
        print(df)

    # 批量格式化日期字段,去掉分隔符
    def format_date(self, date):
        return ((str(date))[0:10]).replace('-', '')

    # 解析1分钟和5分钟数据
    #返回值:date: open,high,low,close,amount,volume
    #csv格式:code,ts_code,trade_date(缩写),trade_time,time_index,open,high,low,close,amount,volume
    def read_tdx_local_minline_all(self, full_path,filename=""):
        #df = reader.get_df(config.tdx_local_sz_minline1 + "sz399001.lc1")
        #df = reader.get_df(config.tdx_local_sz_minline5 + "sz399001.lc5")
        #df = self.minline_reader.get_df(config.tdx_local_sh_minline1 + "sh600300.lc1")
        code = filename[2:8]
        ts_code = code + "." + filename[0:2].upper()

        df = self.minline_reader.get_df(full_path)

        df.insert(0, 'trade_time', df.index)
        df.insert(0, 'trade_date', df.index.floor('D'))
        df.insert(0, 'ts_code', ts_code)
        df.insert(0, 'code', code)
        df.reset_index(drop=True,inplace=True)  #参考:https://zhuanlan.zhihu.com/p/110819220?from_voters_page=true
        df.insert(4, 'time_index', df.index)
        #df['trade_time'] = pd.to_datetime(df['trade_time'], infer_datetime_format=True).dt.normalize()  # strftime('%m/%d/%Y') # format='%m/%d/%Y').dt.date
        #df['trade_time'] = df['trade_time'].apply(lambda x: x.strftime('%H:%M:%S'))
        df['trade_time'] = pd.to_datetime(df['trade_time'], format='%H:%M:%S').dt.strftime('%H:%M:%S')
        df['time_index'] = df['trade_time'].apply(lambda x: datatime_util.stockTradeTime2Index(x))
        df['trade_date'] = df['trade_date'].apply(lambda x: self.format_date(x))
        df['open'] = df['open'].apply(lambda x: round(x,2))
        df['high'] = df['high'].apply(lambda x: round(x, 2))
        df['low'] = df['low'].apply(lambda x: round(x, 2))
        df['close'] = df['close'].apply(lambda x: round(x, 2))

        csv_filename = config.tdx_csv_minline1_all + ts_code + ".csv"
        if os.path.isfile(csv_filename):
            os.remove(csv_filename)
            df.to_csv(csv_filename, index=False, mode='w', header=True, sep=',', encoding="utf_8_sig")
        else:
            df.to_csv(csv_filename, index=False, mode='w', header=True, sep=',', encoding="utf_8_sig")

    # 解析1分钟和5分钟数据,输出简单形式,只返回price和vol
    # 返回值:date: open,high,low,close,amount,volume
    # csv格式:code,ts_code,trade_date(缩写),trade_time,time_index,price,volume
    def read_tdx_local_minline_simple(self, full_path, filename=""):
        # df = reader.get_df(config.tdx_local_sz_minline1 + "sz399001.lc1")
        # df = reader.get_df(config.tdx_local_sz_minline5 + "sz399001.lc5")
        # df = self.minline_reader.get_df(config.tdx_local_sh_minline1 + "sh600300.lc1")
        code = filename[2:8]
        ts_code = code + "." + filename[0:2].upper()

        df = self.minline_reader.get_df(full_path)

        df.insert(0, 'trade_time', df.index)
        df.insert(0, 'trade_date', df.index.floor('D'))
        df.insert(0, 'ts_code', ts_code)
        df.insert(0, 'code', code)
        df.reset_index(drop=True, inplace=True)  # 参考:https://zhuanlan.zhihu.com/p/110819220?from_voters_page=true
        df.insert(4, 'time_index', df.index)
        # df['trade_time'] = pd.to_datetime(df['trade_time'], infer_datetime_format=True).dt.normalize()  # strftime('%m/%d/%Y') # format='%m/%d/%Y').dt.date
        # df['trade_time'] = df['trade_time'].apply(lambda x: x.strftime('%H:%M:%S'))
        df['trade_time'] = pd.to_datetime(df['trade_time'], format='%H:%M:%S').dt.strftime('%H:%M:%S')
        df['time_index'] = df['trade_time'].apply(lambda x: datatime_util.stockTradeTime2Index(x))
        df['trade_date'] = df['trade_date'].apply(lambda x: self.format_date(x))
        #df['open'] = df['open'].apply(lambda x: round(x, 2))
        #df['high'] = df['high'].apply(lambda x: round(x, 2))
        #df['low'] = df['low'].apply(lambda x: round(x, 2))
        df['close'] = df['close'].apply(lambda x: round(x, 2))
        df.rename(columns={'close': 'price'}, inplace=True)
        df.drop(['open', 'high', 'low', 'amount'], axis=1, inplace=True)

        csv_filename = config.tdx_csv_minline1_simple + ts_code + ".csv"
        if os.path.isfile(csv_filename):
            os.remove(csv_filename)
            df.to_csv(csv_filename, index=False, mode='w', header=True, sep=',', encoding="utf_8_sig")
        else:
            df.to_csv(csv_filename, index=False, mode='w', header=True, sep=',', encoding="utf_8_sig")

    # 解析板块数据
    #扁平格式返回值:blockname,block_type,code_index,code
    def read_tdx_local_block(self):
        ##指数板块 风格板块  概念板块  一般板块
        block_filename = ["block_zs.dat", "block_fg.dat", "block_gn.dat", "block.dat"]
        for block in block_filename:
            df = self.block_reader.get_df(config.tdx_local_block + block)   # 默认扁平格式
            df_group = self.block_reader.get_df(config.tdx_local_block + block, self.BlockReader_TYPE_GROUP)   #分组格式
            filename = config.tdx_csv_block + block[0:-4] + ".csv"
            filename_group = config.tdx_csv_block + block[0:-4] + "_group" + ".csv"
            if os.path.isfile(filename):
                df.to_csv(filename, index=False, mode='a', header=False, sep=',', encoding="utf_8_sig")
                df_group.to_csv(filename_group, index=False, mode='a', header=False, sep=',', encoding="utf_8_sig")
            else:
                df.to_csv(filename, index=False, mode='w', header=True, sep=',', encoding="utf_8_sig")
                df_group.to_csv(filename_group, index=False, mode='w', header=True, sep=',', encoding="utf_8_sig")

    # 解析分时图数据
    def read_tdx_local_fst(self):
        reader = TdxMinBarReader()  #这个reader不能解析分时图文件
        # df = reader.get_df(config.tdx_local_sz_minline + "sz399001.lc1")
        df = reader.get_df(config.tdx_local_fst + "sh20200417.tfz")
        print(df)

    # 解析分时图文件,没有解析出来
    """
    通达信的zst的数据记录是每6508个字节为一天的数据,
    每26个字节为一个分钟的记录,这26个字节是这样分配的,
    时间占两个字节化为十进制为570的话表示9:30分(570/60=9.5) 
    下一个是占四个字节的叫现价,
    再下四个字节叫均价,
    另外还有两个字节为该分钟成交量(现在有可能已经改为四个字节),
    剩下的14个字节是预留的,
    """
    def parse_fst_file(self):
        full_path = config.tdx_local_fst + "sh20200417.tfz"
        filesize = os.path.getsize(full_path)  # 文件字节数
        print("filesize为: %s" % (filesize))
        if filesize == 0: return

        #print(chardet.detect(open(full_path, mode='rb').read()))   #查看文件编码格式

        file = open(full_path, "rb")
        try:
            i = 0
            while True:
                print("游标位置:", file.tell())
                stock_date = file.read(2)
                cur_price = file.read(4)
                arr_price = file.read(4)
                vol = file.read(4)
                stock_reservation = file.read(12)
                stock_date = unpack("h", stock_date)
                cur_price = unpack("l", cur_price)
                arr_price = unpack("l", arr_price)
                vol = unpack("l", vol)
                #stock_reservation = unpack("s", stock_reservation)
                print(stock_date)
                print(cur_price)
                print(arr_price)
                print(vol)
                print(stock_reservation)
                i = i + 1
                if i == 2:break

            for line in file:
                result = chardet.detect(line)
                print("code: ", result)

                buf_size = len(line)
                rec_count = buf_size // 32
                begin = 0
                end = 32
                print("行内容:", line)
                print("buf_size:", buf_size)
                print("rec_count:", rec_count)
                a = unpack('IIIIIfII', line[begin:end])
                print("解码后的数据0: %s" % (str(a[0])))
                print("解码后的数据1: %s" % (str(a[1])))
                print("解码后的数据2: %s" % (str(a[2])))
                break
        finally:
            file.close()
Пример #11
0
def get_block_info2():
    # 默认扁平格式
    df = BlockReader().get_df(".../T0002/hq_cache/block_zs.dat")
    print(df)
Пример #12
0
def tdx_block_reader():

    # stock_data = ts.get_stock_basics()
    # stock_data.to_csv('stock.csv',columns=['name'])

    stock_edges = pd.DataFrame(columns=['fromID', 'toID', 'type'])

    block_nodes = {}
    begin_code = int(900001)
    #指数版块
    zs_df = BlockReader().get_df("C:/new_tdx/T0002/hq_cache/block_zs.dat")
    for row in zs_df.iterrows():
        if row[1]['blockname'] not in block_nodes:
            block_nodes[row[1]['blockname']] = begin_code
            begin_code += 1
        stock_edges = stock_edges.append(
            {
                'fromID': row[1]['code'],
                'toID': str(block_nodes[row[1]['blockname']]),
                'type': '属于'
            },
            ignore_index=True)

    #风格版块
    fg_df = BlockReader().get_df("C:/new_tdx/T0002/hq_cache/block_fg.dat")
    for row in fg_df.iterrows():
        if row[1]['blockname'] not in block_nodes:
            block_nodes[row[1]['blockname']] = begin_code
            begin_code += 1
        stock_edges = stock_edges.append(
            {
                'fromID': row[1]['code'],
                'toID': str(block_nodes[row[1]['blockname']]),
                'type': '属于'
            },
            ignore_index=True)

    #概念版块
    gn_df = BlockReader().get_df("C:/new_tdx/T0002/hq_cache/block_gn.dat")
    for row in gn_df.iterrows():
        if row[1]['blockname'] not in block_nodes:
            block_nodes[row[1]['blockname']] = begin_code
            begin_code += 1

        stock_edges = stock_edges.append(
            {
                'fromID': row[1]['code'],
                'toID': str(block_nodes[row[1]['blockname']]),
                'type': '属于'
            },
            ignore_index=True)

    # 地区
    block_area = ts.get_area_classified()
    for row in block_area.iterrows():
        if row[1]['area'] not in block_nodes:
            block_nodes[row[1]['area']] = begin_code
            begin_code += 1

        stock_edges = stock_edges.append(
            {
                'fromID': row[1]['code'],
                'toID': str(block_nodes[row[1]['area']]),
                'type': '属于'
            },
            ignore_index=True)

    # 行业
    block_industry = ts.get_industry_classified()
    for row in block_industry.iterrows():
        if row[1]['c_name'] not in block_nodes:
            block_nodes[row[1]['c_name']] = begin_code
            begin_code += 1
        stock_edges = stock_edges.append(
            {
                'fromID': row[1]['code'],
                'toID': str(block_nodes[row[1]['c_name']]),
                'type': '属于'
            },
            ignore_index=True)
    #edges to csv
    stock_edges.to_csv('stock_edges.csv', index=None, encoding="utf-8")

    # nodes to csv
    stock_data = ts.get_stock_basics()
    nodes_df = pd.DataFrame(columns=['code', 'name'])
    for k, v in block_nodes.items():
        # print(k)
        nodes_df = nodes_df.append({
            'code': str(v),
            'name': k
        },
                                   ignore_index=True)

    nodes_df = nodes_df.set_index('code')
    nodes_df = nodes_df.append(stock_data)
    nodes_df.to_csv('stock_nodes.csv', columns=['name'], encoding="utf-8")