Exemplo n.º 1
0
def download(trade_date):
    # http://www.czce.com.cn/cn/DFSStaticFiles/Future/2021/20210507/FutureDataDaily.txt
    url_model = 'http://www.czce.com.cn/cn/DFSStaticFiles/Future/%s/%s/FutureDataDaily.txt'
    # 货行情数据下载链接
    request_url = ''
    year = trade_date[:4]
    logger.info('year=%s trade_date=%s' % (year, trade_date))

    if trade_date >= '20210104':  # 20201231
        request_url = url_model % (year, trade_date)
    else:
        logger.error('期货行情数据下载日期有误请检查!')
        exit_now()
    logger.info('request_url = ' + request_url)
    # 下载货行情数据
    future_price_downloaded_origin_file_path = \
        get_future_price_downloaded_origin_file_path(trade_date, FuturesExchange.CZCE)
    if os.path.exists(future_price_downloaded_origin_file_path):
        logger.info('%s %s期货行情数据已下载:' %
                    (future_price_downloaded_origin_file_path, trade_date))
        return True
    res = requests.get(request_url)
    if res.status_code != 200:
        logger.error('response.status_code=%s, download %s failed' %
                     (str(res.status_code), request_url))
        return False
    with open(future_price_downloaded_origin_file_path, 'w',
              encoding='utf-8') as f:
        f.write(res.text)
    return True
 def get_future_price_contract_data_by_future_contract_name(
         future_variety_code, future_contract_name):
     future_price_contract_file_path = get_future_price_contract_file_path(
         future_variety_code, future_contract_name)
     with open(future_price_contract_file_path, 'r', encoding='utf-8') as f:
         lines = [line.split(',') for line in f.read().strip().split('\n')]
     new_lines = dict()
     for line in lines[1:]:
         trade_date = line[0]
         if trade_date in new_lines:
             logger.error('期货合约价格数据中有重复,请检查!' + trade_date + ' | ' +
                          future_price_contract_file_path)
             exit_now()
         new_lines[trade_date] = line
     return new_lines
Exemplo n.º 3
0
def parse_one(trade_date):
    future_holding_downloaded_origin_file_path = \
        get_future_holding_downloaded_origin_file_path(trade_date, FuturesExchange.CZCE)
    if not os.path.exists(future_holding_downloaded_origin_file_path):
        logger.error(
            'future_holding_downloaded_origin_file_path not exists, ' +
            future_holding_downloaded_origin_file_path)
        return False
    if trade_date >= '20150921':
        parse_downloaded_origin_file_from_20150921(
            trade_date, future_holding_downloaded_origin_file_path)
    elif '20150918' >= trade_date >= '20100818':
        parse_downloaded_origin_file_from_20100818_to_20150918(
            trade_date, future_holding_downloaded_origin_file_path)
    else:
        logger.error('parse_future_holding.parse(), 日期有误请检查!')
        exit_now()
    return True
Exemplo n.º 4
0
def parse_video_id():
    video_ids = list()
    with open(os.path.join(andy_lee_work_path, 'Andy_Lee.html'), 'r', encoding='utf-8') as f:
        content_html = f.read()
    bs_obj = BeautifulSoup(content_html, "lxml")
    items_list = bs_obj.findAll("a", {"id": "video-title"})
    if not items_list or len(items_list) == 0:
        logger.error('Andy_Lee.html没有解析出数据!')
        return False
    for item in items_list:
        if not item['href'].startswith('/watch?v='):
            logger.error('youtube video url error, %s' % item['href'])
            exit_now()
        video_id = item['href'].replace('/watch?v=', '')[:11]
        if video_id not in video_ids:
            video_ids.append(video_id)
        logger.info(video_id)
    logger.info('items length = %s' % len(video_ids))
    with open(andy_lee_all_video_ids_file_path, 'w', encoding='utf-8') as f:
        f.write('\n'.join(video_ids))
 def read_data(self):
     future_price_dict = self.get_future_price_contract_data_by_future_contract_name(
         self.future_variety_code, self.future_contract_name)
     future_holding_dict = self.get_future_holding_contract_data_by_future_contract_name(
         self.future_variety_code, self.future_contract_name)
     future_members_buy_holding_dict = self.get_future_members_holding_dict(
         len(future_price_dict.keys()))
     future_members_sell_holding_dict = self.get_future_members_holding_dict(
         len(future_price_dict.keys()))
     index = 0
     for trade_date, line in future_price_dict.items():
         if trade_date not in self.trade_calendar:
             logger.error('交易日历有问题,请检查!')
             exit_now()
         self.trade_date_list.append(trade_date)
         # 收盘价
         if float(line[6]) == 0:
             self.price_list.append(None)
         else:
             self.price_list.append(float(line[6]))
         # 持仓量
         total_holding = float(line[9])
         self.total_holding_list.append(total_holding)
         # 警戒线
         self.cordon_list.append(0.5)
         if trade_date not in future_holding_dict:
             self.buy_list.append(None)
             self.sell_list.append(None)
             self.individual_buy_list.append(None)
             self.individual_sell_list.append(None)
         else:
             buy_holding = float(
                 future_holding_dict[trade_date]['total_buy'][0])
             sell_holding = float(
                 future_holding_dict[trade_date]['total_sell'][0])
             self.buy_list.append(
                 (buy_holding / total_holding) * 100 / 100.0)
             self.sell_list.append(
                 (sell_holding / total_holding) * 100 / 100.0)
             self.individual_buy_list.append(1 -
                                             (buy_holding / total_holding) *
                                             100 / 100.0)
             self.individual_sell_list.append(
                 1 - (sell_holding / total_holding) * 100 / 100.0)
             # 多单持仓
             buy = future_holding_dict[trade_date]['buy']
             for value in buy.values():
                 future_members_buy_holding_dict[value[0]][index] = value[1]
             # 空单持仓
             sell = future_holding_dict[trade_date]['sell']
             for value in sell.values():
                 future_members_sell_holding_dict[
                     value[0]][index] = value[1]
         index += 1
     for future_member_name, future_member_holding_list in future_members_buy_holding_dict.items(
     ):
         total_num = 0
         for v in future_member_holding_list:
             if v is None:
                 continue
             total_num += v
         if total_num != 0:
             self.new_future_members_buy_holding_dict[
                 future_member_name] = future_member_holding_list
     for future_member_name, future_member_holding_list in future_members_sell_holding_dict.items(
     ):
         total_num = 0
         for v in future_member_holding_list:
             if v is None:
                 continue
             total_num += v
         if total_num != 0:
             self.new_future_members_sell_holding_dict[
                 future_member_name] = future_member_holding_list
Exemplo n.º 6
0
# @Author   : 胡远
# @Github   : https://github.com/QuixoteHY
# @Email    : [email protected]
# @Describe :

from config.logger import logger
from config.utils import exit_now
from downloader.video.youtube import video_youtube_download_path, download_youtube_video_by_youtube_dl_in_cmd

import os
from bs4 import BeautifulSoup

andy_lee_work_path = os.path.join(video_youtube_download_path, 'Andy_Lee')
if not os.path.exists(andy_lee_work_path):
    logger.error('工作目录不存在!%s' % andy_lee_work_path)
    exit_now()
andy_lee_video_path = os.path.join(andy_lee_work_path, 'video')
if not os.path.exists(andy_lee_video_path):
    os.makedirs(andy_lee_video_path)
andy_lee_all_video_ids_file_path = os.path.join(andy_lee_work_path, 'all_video_ids.txt')
andy_lee_downloaded_record_file_path = os.path.join(andy_lee_work_path, 'downloaded_video_id.txt')


def read_downloaded_video_ids():
    downloaded_video_ids = list()
    for file_name in os.listdir(andy_lee_video_path):
        video_id = file_name[9:20]
        if video_id not in downloaded_video_ids:
            downloaded_video_ids.append(video_id)
    return downloaded_video_ids
Exemplo n.º 7
0
def parse_downloaded_origin_file_from_20100818_to_20150918(
        trade_date, future_holding_downloaded_origin_file_path):
    with open(future_holding_downloaded_origin_file_path,
              'r',
              encoding='utf-8') as f:
        items = f.read().replace('合约', '品种').split('品种')
    for index, item in enumerate(items):
        lines = item.strip().replace('\n\n',
                                     '\n').replace(' ',
                                                   '').replace('\t',
                                                               '').split('\n')
        logger.info('>' * 10 + trade_date + '\t' + '\n'.join(lines))
        # 检查最后一行,合计
        if '合计' not in lines[-1]:
            if index == 0:
                continue
            else:
                logger.error('缺少合计, ' + str(lines[-1]))
                exit_now()
        # 检查第一行,    :棉花日期:2010-08-18        :ER105日期:2010-08-18
        __future_varietties_dict = {
            'CF': '棉花',
            'RI': '早籼',
            'OI': '菜油',
            'SR': '白糖',
            'TA': 'PTA',
            'WS': '强麦',
            'WT': '硬麦',
            'MA': '甲醇',
            'PM': '普麦',
            'FG': '玻璃',
            'RM': '菜粕',
            'RS': '菜籽',
            # 'ZC': '动力煤',
            'TC': '煤',
            'JR': '粳稻',
            'LR': '晚籼',
            'SF': '硅铁',
            'SM': '锰硅',
        }
        check_res = 0
        future_variety_code = ''
        for _future_variety_code in FUTURE_VARIETIES.keys():
            if _future_variety_code in lines[0]:
                check_res += 1
                future_variety_code = _future_variety_code
        for _future_variety_code, _future_variety_name in __future_varietties_dict.items(
        ):
            if _future_variety_name in lines[0]:
                check_res += 1
                future_variety_code = _future_variety_code
        if check_res != 1 and future_variety_code not in ('TA', ):
            logger.error('合约代码对不上, lines[0]: ' + str(lines[0]) +
                         '|future_variety_code=' + future_variety_code)
            exit_now()
        # 重新命名合约名称  (:棉花日期:2010-08-18   :ER105日期:2010-08-18)
        future_contract_name = None
        czce_future_contract_name, future_date_str = \
            lines[0].strip().replace('-', '').replace(':', '').replace(':', '').split('日期')
        if czce_future_contract_name in FUTURE_VARIETIES[future_variety_code][
                '品种全称']:
            future_contract_name = 'total'
        elif future_variety_code in czce_future_contract_name:
            future_contract_name = \
                rename_czce_future_contract_name(trade_date, czce_future_contract_name, future_variety_code)
            logger.info(czce_future_contract_name + ' >>> ' +
                        future_contract_name)
        else:
            logger.error('合约代码有问题, lines[0]: ' + str(lines[0]) + ' | 品种全称=' +
                         FUTURE_VARIETIES[future_variety_code]['品种全称'])
            exit_now()
        # 检查日期
        if future_date_str != trade_date or len(future_date_str) != 8:
            logger.error('日期对不上, lines[0]: ' + str(lines[0]))
            exit_now()
        # 解析数据
        future_holding_dict = get_future_holding_dict()
        future_holding_dict['code'] = future_contract_name
        future_holding_dict['date'] = trade_date
        for line in lines[1:-1]:
            if '.0' not in line:
                logger.error('格式不对请检查, line=' + line)
                exit_now()
            line = line.replace('.0', '')
            line = line.strip().split(',')
            logger.info('line=' + str(line))
            if line[1] != '':
                future_holding_dict['volume'][int(
                    line[0])] = [line[1], int(line[2]),
                                 int(line[3])]
            if line[4] != '':
                future_holding_dict['buy'][int(
                    line[0])] = [line[4], int(line[5]),
                                 int(line[6])]
            if line[7] != '':
                future_holding_dict['sell'][int(
                    line[0])] = [line[7], int(line[8]),
                                 int(line[9])]
        line = lines[-1]
        if '.0' not in line:
            logger.error('格式不对请检查, line=' + line)
            exit_now()
        line = line.replace('.0', '')
        line = line.strip().split(',')
        if line[1] != '':
            line.insert(1, '')
        logger.info('合计 line=' + str(line))
        future_holding_dict['total_volume'] = [int(line[2]), int(line[3])]
        future_holding_dict['total_buy'] = [int(line[5]), int(line[6])]
        future_holding_dict['total_sell'] = [int(line[8]), int(line[9])]
        # 写入文件
        future_holding_contract_file_path = \
            get_future_holding_contract_file_path(future_variety_code, future_contract_name)
        insert_one_day_future_holding_data_to_js(
            future_holding_contract_file_path, future_holding_dict)
Exemplo n.º 8
0
def parse_downloaded_origin_file_from_20150921(
        trade_date, future_holding_downloaded_origin_file_path):
    with open(future_holding_downloaded_origin_file_path,
              'r',
              encoding='utf-8') as f:
        items = f.read().replace('合约', '品种').split('品种')
    for index, item in enumerate(items):
        lines = item.strip().replace('\n\n',
                                     '\n').replace(' ',
                                                   '').replace(',',
                                                               '').split('\n')
        logger.info('>' * 10 + trade_date + '\t' + '\n'.join(lines))
        # 检查最后一行,合计
        if '合计' not in lines[-1]:
            if index == 0:
                continue
            else:
                logger.error('缺少合计, ' + str(lines[-1]))
                exit_now()
        # 检查第二行表头
        if '名次' not in lines[1]:
            logger.error('缺少表头, ' + str(lines[1]))
            exit_now()
        # 第一行提取期货品种代码, 棉花CF日期::2015-09-21
        future_variety_code = None
        czce_future_contract_name = lines[0].replace(
            ':', '').split('日期')[0].strip()
        check_res = 0
        for __code, __item in FUTURE_VARIETIES.items():
            if czce_future_contract_name == __item['品种全称']:
                check_res += 1
                future_variety_code = __code
        if check_res > 1:
            logger.error('合约代码与品种全称匹配数大于1, czce_future_contract_name: %s, %s' %
                         (czce_future_contract_name, lines[0]))
            exit_now()
        if future_variety_code is None:
            if len(czce_future_contract_name) < 4 or czce_future_contract_name[-1] not in __digit or \
                    czce_future_contract_name[-2] not in __digit or czce_future_contract_name[-3] not in __digit:
                logger.error('合约代码有问题, czce_future_contract_name: %s, %s' %
                             (czce_future_contract_name, lines[0]))
                exit_now()
            future_variety_code = czce_future_contract_name[:-3]
        # 重新命名合约名称
        future_contract_name = None
        czce_future_contract_name, future_date_str = \
            lines[0].strip().replace('-', '').replace(':', '').replace(':', '').split('日期')
        if czce_future_contract_name == FUTURE_VARIETIES[future_variety_code][
                '品种全称']:
            future_contract_name = 'total'
        elif future_variety_code in czce_future_contract_name:
            future_contract_name = \
                rename_czce_future_contract_name(trade_date, czce_future_contract_name, future_variety_code)
            logger.info(czce_future_contract_name + ' >>> ' +
                        future_contract_name)
        else:
            logger.error('合约代码有问题, lines[0]: ' + str(lines[0]))
            exit_now()
        # 检查日期
        if future_date_str != trade_date or len(future_date_str) != 8:
            logger.error('日期对不上, lines[0]: ' + str(lines[0]))
            exit_now()
        # 解析数据
        future_holding_dict = get_future_holding_dict()
        future_holding_dict['code'] = future_contract_name
        future_holding_dict['date'] = trade_date
        for line in lines[2:-1]:
            line = line.strip().split('|')
            if line[1] != '-':
                future_holding_dict['volume'][int(
                    line[0])] = [line[1], int(line[2]),
                                 int(line[3])]
            if line[4] != '-':
                future_holding_dict['buy'][int(
                    line[0])] = [line[4], int(line[5]),
                                 int(line[6])]
            if line[7] != '-':
                future_holding_dict['sell'][int(
                    line[0])] = [line[7], int(line[8]),
                                 int(line[9])]
        line = lines[-1].strip().split('|')
        future_holding_dict['total_volume'] = [int(line[2]), int(line[3])]
        future_holding_dict['total_buy'] = [int(line[5]), int(line[6])]
        future_holding_dict['total_sell'] = [int(line[8]), int(line[9])]
        # 写入文件
        future_holding_contract_file_path = \
            get_future_holding_contract_file_path(future_variety_code, future_contract_name)
        insert_one_day_future_holding_data_to_js(
            future_holding_contract_file_path, future_holding_dict)