示例#1
0
def download_financialzip_fromtdx():
    """
    会创建一个download/文件夹
    """
    result = get_filename()
    res = []
    for item, md5 in result:
        if item in os.listdir(download_path) and \
                md5 == QA_util_file_md5('{}{}{}'.format(download_path, os.sep, item)):
            print('FILE {} is already in {}'.format(item, download_path))
        else:
            print('CURRENTLY GET/UPDATE {}'.format(item[0:12]))
            downloadpath = download_path + '/' + item
            datacrawler = HistoryFinancialCrawler()
            datacrawler.fetch_and_parse(reporthook=None,
                                        filename=item,
                                        path_to_download=downloadpath)
            res.append(item)
    return res
示例#2
0
def download_financialzip():
    """
    会创建一个download/文件夹
    """
    result = get_filename()
    res = []
    for item, md5 in result:
        if item in os.listdir(download_path) and md5 == QA_util_file_md5('{}{}{}'.format(download_path, os.sep, item)):

            print('FILE {} is already in {}'.format(item, download_path))
        else:
            print('CURRENTLY GET/UPDATE {}'.format(item[0:12]))
            r = requests.get('http://data.yutiansut.com/{}'.format(item))
            file = '{}{}{}'.format(download_path, os.sep, item)

            with open(file, "wb") as code:
                code.write(r.content)
            res.append(item)
    return res
示例#3
0
def QA_fecth_local_financial_report_cn(code, report_type):
    report_file = report_path[report_type] + code + '.csv'
    df = pd.read_csv(report_file, sep='\t',encoding='GB2312', header=None)
    if len(df) < 1:
        print("{} report of stock {} cannot be loaded".format(report_type, code))
        return None
    df = df.set_index(0).T # transpose dataframe, make rows into columns
    df = df[df['报表日期'].str.contains('19700101') == False] # remove rows with 19700101 and last all NA row
    df = df[::-1] # reverse the df
    df['报表日期'] = df['报表日期'].apply(lambda x: '{}-{}-{}'.format(x[:4], x[4:6], x[6:]))
    df['code'] = code
    t = os.path.getmtime(report_file)
    df['更新日期'] = str(datetime.datetime.fromtimestamp(t))[:10]
    df['hash_md5'] = QA_util_file_md5(report_file)
    #df['date_stamp'] = df['报表日期'].apply(lambda x: QA_util_date_stamp(x))
    # create index using 报表日期 after change it to datetime, useful for later index and compare date
    df['报表日期'] = pd.to_datetime(df['报表日期'])
    df = df.set_index('报表日期', drop=False)
    df['报表日期'] = df['报表日期'].apply(lambda x: str(x)[0:10])
    return df