示例#1
0
def validate_data():
    db = mongodb_utility.connect_db(db_name="list_company")
    sz_df = mongodb_utility.export2df(database=db, collection="sz_stock")
    sh_df = mongodb_utility.export2df(database=db, collection="sh_stock")
    fzb_df = mongodb_utility.export2df(database=db, collection="fzb")
    # llb_df = mongodb_utility.export2df(database=db,collection="llb")
    # lrb_df = mongodb_utility.export2df(database=db,collection="lrb")

    # missing_fzb_sz = sz_df[~sz_df['_id'].isin(fzb_df['机构ID'])]
    missing_fzb_sh = sh_df[~sh_df['_id'].isin(fzb_df['机构ID'])]
    missing_sh_a = missing_fzb_sh[pd.Series(
        missing_fzb_sh['A股上市日期']).notnull()][["_id", "A股上市日期"]]
    # print(missing_sh_a["A股上市日期"])
    # missing_sh_a["A股上市日期"] = missing_sh_a["A股上市日期"].apply(lambda x: x.strip())
    missing_sh_a["A股上市日期"] = missing_sh_a["A股上市日期"].apply(
        lambda x: str(x.strip())[0:4])
    missing_sh_2017 = missing_sh_a[missing_sh_a["A股上市日期"] < "2017"].copy()
    print(missing_sh_2017.count())
    missing_sh_2017["max_year"] = "2016"
    missing_sh_2017["A股上市日期"] = "2016"
    download_list = missing_sh_2017.values.tolist()
    """download missing data
    """
    url = 'http://www.cninfo.com.cn/cninfo-new/data/download'
    # frDownloader.getFR(url=url, stock_list=download_list)

    # missing_fzb_sh.to_csv("missing_fzb_sh.csv")
    # print(missing_fzb_sh.count())
    return
示例#2
0
def validate_stock(data_base=None, collection="fzb", stock_table="sh_stock"):
    # db = mongodb_utility.connect_db(db_name="list_company")
    stock_df = mongodb_utility.export2df(database=data_base,
                                         collection=stock_table)
    report_df = mongodb_utility.export2df(database=data_base,
                                          collection=collection)

    missing_stock = stock_df[~stock_df['_id'].isin(report_df['机构ID'])]
    # exclude B stock
    missing_stock_a = missing_stock[pd.Series(
        missing_stock['A股上市日期']).notnull()][["_id", "A股上市日期"]]
    missing_stock_a["A股上市日期"] = missing_stock_a["A股上市日期"].apply(
        lambda x: str(x.strip()))
    return missing_stock_a
示例#3
0
def validate_reports_download(table1='fzb', table2='lrb', auto_download=False):
    db = mongodb_utility.connect_db(db_name="list_company")
    df1 = mongodb_utility.export2df(database=db, collection=table1)
    df2 = mongodb_utility.export2df(database=db, collection=table2)
    t1_notin_t2 = df1[~df1['_id'].isin(df2['_id'])][['_id', '机构ID', '报告年度']]
    t1_notin_t2['报告年度'] = t1_notin_t2['报告年度'].apply(
        lambda x: str(x.strip())[0:4])
    t1_notin_t2 = t1_notin_t2.drop_duplicates(['机构ID', '报告年度'])
    missing_t2 = t1_notin_t2[['机构ID', '报告年度']].copy()
    missing_t2['max_year'] = missing_t2['报告年度']
    download_list = missing_t2.values.tolist()
    print(len(download_list))
    if auto_download:
        url = 'http://www.cninfo.com.cn/cninfo-new/data/download'
        frDownloader.getFR(url=url, stock_list=download_list, types=[table2])
    return download_list
示例#4
0
def update_sh_b_data(update_year="2016"):
    url = 'http://www.cninfo.com.cn/cninfo-new/data/download'
    """get stock list"""
    db = mongodb_utility.connect_db(db_name="list_company")
    sh_df = mongodb_utility.export2df(database=db, collection="sh_stock")
    """download sh B stock data"""
    sh_df_b = sh_df[Series(sh_df['A股上市日期']).isnull()][["_id"]]
    sh_df_b["_id"] = sh_df_b["_id"].apply(lambda x: str(int(x)))
    sh_df_b["min_year"] = update_year
    sh_df_b["max_year"] = update_year
    shb_list = sh_df_b.values.tolist()
    frDownloader.getFR(url=url, stock_list=shb_list)
示例#5
0
def validate_bw_reports():
    db = mongodb_utility.connect_db(db_name="list_company")
    fzb_df = mongodb_utility.export2df(database=db, collection="fzb")
    llb_df = mongodb_utility.export2df(database=db, collection="llb")
    lrb_df = mongodb_utility.export2df(database=db, collection="lrb")

    missing_fzb_in_lrb = fzb_df[~fzb_df['_id'].isin(lrb_df['_id'])][[
        '_id', '机构ID', '报告年度'
    ]]
    print(missing_fzb_in_lrb)
    missing_fzb_in_lrb.to_csv('missing_fzb_lrb.csv')

    missing_fzb_in_llb = fzb_df[~fzb_df['_id'].isin(llb_df['_id'])][[
        '_id', '机构ID', '报告年度'
    ]]
    missing_fzb_in_llb.to_csv('missing_fzb_llb.csv')

    missing_lrb_in_fzb = lrb_df[~lrb_df['_id'].isin(fzb_df['_id'])][[
        '_id', '机构ID', '报告年度'
    ]]
    missing_lrb_in_fzb.to_csv('missing_lrb_fzb.csv')

    missing_llb_in_fzb = llb_df[~llb_df['_id'].isin(fzb_df['_id'])][[
        '_id', '机构ID', '报告年度'
    ]]
    missing_llb_in_fzb.to_csv('missing_llb_fzb.csv')

    missing_lrb_in_llb = lrb_df[~lrb_df['_id'].isin(llb_df['_id'])][[
        '_id', '机构ID', '报告年度'
    ]]
    missing_lrb_in_llb.to_csv('missing_lrb_llb.csv')

    missing_llb_in_lrb = llb_df[~llb_df['_id'].isin(lrb_df['_id'])][[
        '_id', '机构ID', '报告年度'
    ]]
    missing_llb_in_lrb.to_csv('missing_llb_lrb.csv')