def empty_value_handle_work():
    """

    :return:
    """
    df = file_utils.read_file_to_df(clean_data_temp_file_url, u'作品著作权')
    values = {u'作品著作权类别'.encode('utf-8'): 9, u'作品著作权登记日期'.encode('utf-8'): '1000-01-01',
              u'作品著作权创作完成日期'.encode('utf-8'): '1000-01-01', u'作品著作权首次发布日期'.encode('utf-8'): '1000-01-01'}
    df = df.fillna(values)
    file_utils.write_file(df, clean_data_temp_file_url, u'作品著作权')

    status_1 = [u'A 文字', u'文字', u'文字作品']
    status_2 = [u'B 音乐', u'音乐', u'音乐作品']
    status_3 = [u'F 美术', u'美术', u'美术作品']
    status_4 = [u'G 摄影', u'摄影', u'摄影作品']
    status_5 = [u'H 电影', u'电影', u'电影作品和类似摄制电影的方法创造的作品', u'电影和类似摄制电影方法创作的作品', u'I 类似摄制电影方法创作作品', u'类似摄制电影方法创作的作品']
    status_6 = [u'J 工程设计图、产品设计图', u'工程设计图、产品设计图', u'工程设计图、产品设计图作品', u'建筑']
    status_7 = [u'K 地图、示意图', u'地图、示意图', u'图形']
    status_8 = [9]
    status_list = [status_1, status_2, status_3, status_4, status_5, status_6, status_7, status_8]
    status_after = [1, 2, 3, 4, 5, 6, 7, 9]

    dcu.merge_status(u'作品著作权', u'作品著作权类别'.encode('utf-8'), status_list, status_after, others=8)

    # TODO Other columns
    return
Exemplo n.º 2
0
def status_of_announcement(file_name, column_name):
    status_1 = [u'中标']
    status_2 = [u'公开招标']
    status_3 = [u'其他', u'其它']
    status_4 = [u'单一']
    status_5 = [u'变更']
    status_6 = [u'合同']
    status_7 = [u'废标']
    status_8 = [u'成交']
    status_9 = [u'招标']
    status_10 = [u'拟建']
    status_11 = [u'流标']
    status_12 = [u'竞争性谈判']
    status_13 = [u'竞价']
    status_14 = [u'竞谈']
    status_15 = [u'结果变更']
    status_16 = [u'询价']
    status_17 = [u'违规']
    status_18 = [u'预告']
    status_19 = [u'验收']
    status_no = ['Unknown']  # 错误的类别
    status_list = [status_1, status_2, status_3, status_4, status_5, status_6, status_7, status_8, status_9, status_10,
                   status_11, status_12, status_13, status_14, status_15, status_16, status_17, status_18, status_19, status_no]
    status_after = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, -1]
    dcu.merge_status(file_name, column_name, status_list, status_after)
    return
Exemplo n.º 3
0
def reason_log_out(file_name, column_name):
    status1 = [u'其他原因', u'决议解散', u'因公司合并或分立', u'宣告破产', u'章程规定的解散事由出现']
    status_no = ['-1']
    status_list = [status1, status_no]
    status_after = [1, -1]
    dcu.merge_status(file_name, column_name, status_list, status_after)
    return
def pre_clean():
    file_name = u'一般纳税人'
    dcu.merge_status(file_name,
                     u'认定日期'.encode('utf-8'), [], [],
                     empty_mask='0000-00-00')
    dcu.merge_status(file_name,
                     u'纳税人状态'.encode('utf-8'), [], [],
                     empty_mask='Unknown')
    dcu.merge_status(file_name,
                     u'出口状态备案状态'.encode('utf-8'), [], [],
                     empty_mask='Unknown')
    dcu.merge_status(file_name,
                     u'登记注册类型'.encode('utf-8'), [], [],
                     empty_mask='Unknown')
    dcu.merge_status(file_name,
                     u'纳税人资格'.encode('utf-8'), [], [],
                     empty_mask='Unknown')

    dcu.drop_columns(file_name, u'有效日期期起'.encode('utf-8'))
    dcu.drop_columns(file_name, u'有效截止日期'.encode('utf-8'))
    dcu.drop_columns(file_name, u'是否具有一般纳税人资格'.encode('utf-8'))
    dcu.drop_columns(file_name, u'扣缴义务'.encode('utf-8'))
    dcu.drop_columns(file_name, u'是否按季申报'.encode('utf-8'))

    return
Exemplo n.º 5
0
def industry_category(file_name, column_name):
    status_1 = ['A']
    status_2 = ['B']
    status_3 = ['C']
    status_4 = ['D']
    status_5 = ['E']
    status_6 = ['F']
    status_7 = ['G']
    status_8 = ['H']
    status_9 = ['I']
    status_10 = ['J']
    status_11 = ['G']
    status_12= ['K']
    status_13 = ['L']
    status_14 = ['M']
    status_15 = ['N']
    status_16 = ['O']
    status_17 = ['P']
    status_18 = ['Q']
    status_19 = ['R']
    status_no = ['Unknown'] #错误的类别
    status_list = [status_1, status_2, status_3, status_4, status_5, status_6, status_7, status_8, status_9, status_10,
                   status_11, status_12, status_13, status_14, status_15, status_16, status_17, status_18, status_19, status_no]
    status_after = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, -1]
    dcu.merge_status(file_name, column_name, status_list, status_after)
    return
Exemplo n.º 6
0
def industry(file_name, column_name):  # 行业类别进行数字化处理
    status_1 = [u'金融', u'企业服务', u'区块链', u'黄金', u'保险']
    status_2 = [u'汽车交通', u'交通运输', u'物流', u'养护']
    status_3 = [
        u'消费', u'旅游', u'文化娱乐', u'酒店', u'生活服务', u'食品饮料', u'电商', u'体育', u'社交',
        u'消费生活', u'文娱传媒', u'会员', u'商业地产', u'乳制品', u'服饰', u'VRAR', u'乐器',
        u'VR·AR', u'视频'
    ]
    status_4 = [u'医疗', u'教育', u'环保', u'公用事业', u'医疗健康', u'中药研发']
    status_5 = [
        u'传统行业', u'能源矿产', u'钢铁', u'生产制造', u'工具', u'轮胎', u'材料', u'能源矿产', u'水泥',
        u'化工', u'污水处理', u'开采', u'水处理', u'锅炉', u'太阳能光伏', u'压缩机', u'零配件', u'燃气',
        u'石油', u'液压', u'供水', u'工程机械', u'电机', u'数控机床', u'自动化', u'纤维', u'能源开采',
        u'园林绿化', u'产品研发', u'阀门', u'pvc', u'冷链', u'制冷', u'纺织', u'机电', u'风机',
        u'变压器', u'压缩机', u'混凝土', u'开采', u'面料', u'电气', u'印刷', u'橡胶', u'机床'
    ]
    status_6 = [u'建筑施工', u'房地产', u'工程施工', u'地产建筑', u'建筑材料', u'房产家居', u'建筑装饰']
    status_7 = [
        u'智能硬件', u'人工智能', u'智能终端', u'航空', u'先进制造', u'硬件', u'无人机', u'光电',
        u'大数据', u'工业安全监控', u'火箭', u'物联网', u'智能淋浴', u'智慧城市', u'机器人', u'生态系统',
        u'智慧办公', u'云服务', u'激光', u'核电', u'数字视频'
    ]
    status_8 = [u'农业', u'农药', u'种子', u'兽药', u'农机研发', u'饲料']
    status_no = ['Unknown', 'Others', u'未知轮次']  # 错误的类别
    status_list = [
        status_1, status_2, status_3, status_4, status_5, status_6, status_7,
        status_8, status_no
    ]
    status_after = [1, 2, 3, 4, 5, 6, 7, 8, -1]
    dcu.merge_status(file_name, column_name, status_list, status_after)

    return
Exemplo n.º 7
0
def clean_gdscjytddy():
    file_name = u'购地-市场交易-土地抵押'



    dcu.drop_columns(file_name, u'土地抵押人性质'.encode('utf-8'))

    dcu.merge_status(file_name, u'抵押面积(公顷)'.encode('utf-8'), [], [], empty_mask='0')
    dcu.merge_status(file_name, u'土地面积'.encode('utf-8'), [], [], empty_mask='0')
    dcu.merge_status(file_name, u'评估金额(万元)'.encode('utf-8'), [], [], empty_mask='0')
    dcu.merge_status(file_name, u'抵押金额(万元)'.encode('utf-8'), [], [], empty_mask='0')
    dcu.merge_status(file_name, u'土地抵押登记结束时间'.encode('utf-8'), [], [], empty_mask='0000-00-00')

    time_rearranged(file_name, u'土地抵押登记起始时间'.encode('utf-8'), i = 0)
    time_split(file_name, 'time0'.encode('utf-8'), i = 0)
    time_rearranged(file_name, u'土地抵押登记结束时间'.encode('utf-8'), i = 1)
    time_split(file_name, 'time1'.encode('utf-8'), i=1)

    land_usage(file_name, u'土地用途'.encode('utf-8'))
    land_usage(file_name, u'抵押土地用途'.encode('utf-8'))

    dcu.extract_keyword(file_name, u'抵押土地权属性质与使用权类型'.encode('utf-8'), [u'国有', u'集体'], empty_mask='-1', others_mask='3')
    land_status(file_name, u'抵押土地权属性质与使用权类型'.encode('utf-8')) # 1:国有,2:集体,3:others,-1:Unknown

    return
Exemplo n.º 8
0
def log_out_custom(file_name, column_name):
    status_1 = [u'正常']
    status_2 = [u'注销']
    status_no = ['Unknown']  # 错误的类别
    status_list = [status_1, status_2, status_no]
    status_after = [1, 2, -1]
    dcu.merge_status(file_name, column_name, status_list, status_after)
    return
Exemplo n.º 9
0
def bidding_or_tendering(file_name, column_name):
    status_1 = [u'中标']
    status_2 = [u'招标']
    status_no = ['Unknown']  # 错误的类别
    status_list = [status_1, status_2,  status_no]
    status_after = [1, 2, 3, 4, 5, -1]
    dcu.merge_status(file_name, column_name, status_list, status_after)
    return
Exemplo n.º 10
0
def whether_on_stock_market(file_name, column_name):
    status_1 = ['N']
    status_2 = ['Y']
    status_no = ['Unknown'] #错误的类别
    status_list = [status_1, status_2, status_no]
    status_after = [1, 2, -1]
    dcu.merge_status(file_name, column_name, status_list, status_after)
    return
Exemplo n.º 11
0
def kind_of_company(file_name, column_name):
    status_1 = [u'外企']
    status_2 = [u'民营']
    status_no = ['Unknown'] #错误的类别
    status_list = [status_1, status_2, status_no]
    status_after = [1, 2, -1]
    dcu.merge_status(file_name, column_name, status_list, status_after)
    return
Exemplo n.º 12
0
def money_kind(file_name, column_name):
    status_1 = [u'人民币']
    status_2 = [u'美元']
    status_no = ['Unknown', '-'] #错误的类别
    status_list = [status_1, status_2, status_no]
    status_after = [1, 2, -1]
    dcu.merge_status(file_name, column_name, status_list, status_after)
    return
def status_export(file_name, column_name):
    status_1 = [u'出口退(免)税企业']
    status_2 = [u'非出口退(免)税企业']
    status_no = ['Unknown']
    status_list = [status_1, status_2, status_no]
    status_after = [1, 2, -1]
    dcu.merge_status(file_name, column_name, status_list, status_after)
    return
Exemplo n.º 14
0
def land_status(file_name, column_name):
    status_5 = [u'国有']
    status_6 = [u'集体']
    status_no = ['-1'] #错误的类别
    status_list = [status_5, status_6, status_no]
    status_after = [1, 2, -1]
    dcu.merge_status(file_name, column_name, status_list, status_after, empty_mask='-1')
    return
Exemplo n.º 15
0
def clean_competing_products():
    file_name = u'竞品'
    dcu.merge_status(file_name,
                     u'竞品的行业'.encode('utf-8'), [], [],
                     empty_mask='Unknown')
    dcu.merge_status(file_name,
                     u'竞品详细地址'.encode('utf-8'), [], [],
                     empty_mask='Unknown')
    dcu.merge_status(file_name,
                     u'竞品运营状态'.encode('utf-8'), [], [],
                     empty_mask='Unknown')
    dcu.merge_status(file_name,
                     u'竞品成立时间'.encode('utf-8'), [], [],
                     empty_mask='0000-00-00')
    dcu.merge_status(file_name,
                     u'竞品轮次'.encode('utf-8'), [], [],
                     empty_mask='Unknown')

    dcu.extract_keyword(file_name,
                        u'竞品详细地址'.encode('utf-8'), [
                            u'北京', u'上海', u'广州', u'深圳', u'成都', u'杭州', u'南京',
                            u'武汉', u'天津', u'西安', u'重庆', u'青岛', u'沈阳', u'长沙',
                            u'大连', u'厦门', u'无锡', u'福州', u'济南'
                        ],
                        empty_mask='Unknown',
                        others_mask='Others')

    dcu.extract_keyword(
        file_name,
        u'竞品的标签'.encode('utf-8'), [
            u'金融', u'汽车交通', u'旅游', u'企业服务', u'传统行业', u'能源矿产', u'生活服务', u'交通运输',
            u'建筑施工', u'物流', u'房地产', u'钢铁', u'智能硬件', u'农业', u'医疗', u'文化娱乐',
            u'人工智能', u'工程施工', u'酒店', u'智能终端', u'地产建筑', u'食品饮料', u'消费', u'电商',
            u'生产制造', u'教育', u'航空', u'先进制造', u'硬件', u'体育', u'社交', u'建筑材料',
            u'环保', u'房产家居', u'工具', u'轮胎', u'农药', u'消费生活', u'材料', u'能源矿产',
            u'水泥', u'化工', u'种子', u'无人机', u'光电', u'公用事业', u'文娱传媒', u'医疗健康',
            u'污水处理', u'未知轮次'
            u'大数据', u'工业安全监控'
            u'会员', u'开采', u'水处理', u'兽药', u'锅炉', u'太阳能光伏', u'压缩机', u'火箭',
            u'农机研发', u'零配件', u'区块链', u'燃气', u'商业地产', u'石油', u'液压', u'供水',
            u'工程机械', u'乳制品', u'电机', u'机床', u'中药研发', u'黄金', u'自动化', u'能源开采'
            u'纤维', u'保险', u'园林绿化', u'物联网', u'冷链', u'智能淋浴', u'智慧城市', u'产品研发',
            u'机器人', u'服饰', u'阀门', u'养护', u'智慧办公', u'pvc', u'制冷', u'生态系统',
            u'饲料', u'纺织', u'机电', u'风机', u'云服务', u'激光', u'变压器', u'VRAR', u'压缩机',
            u'混凝土', u'核电', u'开采', u'面料', u'电气', u'乐器', u'VR·AR', u'建筑装饰',
            u'印刷', u'数字视频', u'橡胶', u'视频'
        ],
        empty_mask='Unknown',
        others_mask='Others')  # 对竞品标签的关键字进行处理,分析行业

    industry(file_name, u'竞品的标签'.encode('utf-8'))
    round(file_name, u'竞品轮次'.encode('utf-8'))
    status(file_name, u'竞品运营状态'.encode('utf-8'))
    address(file_name, u'竞品详细地址'.encode('utf-8'))

    time_split(file_name, u'竞品成立时间'.encode('utf-8'), i=0)

    return
Exemplo n.º 16
0
def clean_financing():
    file_name = u'融资信息'
    dcu.merge_status(file_name, u'融资日期'.encode('utf-8'), [], [], empty_mask='0000-00-00')
    dcu.merge_status(file_name, u'轮次'.encode('utf-8'), [], [], empty_mask='Unknown')

    time_rearranged(file_name, u'融资日期'.encode('utf-8'))

    round(file_name, u'轮次'.encode('utf-8'))

    column_name = u'投资金额'.encode('utf-8')
    wr1 = fu.read_file_to_df(clean_data_temp_file_url, u'融资信息.xlsx',
                             sheet_name='Sheet')
    wr1.fillna({column_name: 'Unknown'})  # 对空值进行处理以进行索引

    for index in range(0, len(wr1)):
        content = wr1.at[index, column_name]
        if str(content).startswith(u'数'):
            str1 = '0'
            wr1.set_value(index, column_name, str1)
        elif str(content).startswith(u'未披露'):
            str1 = '0'
            wr1.set_value(index, column_name, str1)
    fu.write_file(wr1, clean_data_temp_file_url, u'融资信息', ext='.xlsx',
                  sheet_name='Sheet', index=False)

    for index in range(0, len(wr1)):
        content = wr1.at[index, column_name]
        if str(content).endswith(u'万美元'):
            # num = re.sub(u'万美元','', str(content))
            num = str(content).replace(u'万美元', u'')  # 去除万美元并乘以美元汇率,从而换算成人民币
            numb = float(num)
            numc = numb * (10 ** 4) * 6.72  # 3月24日美元汇率
            wr1.set_value(index, column_name, numc)

        elif str(content).endswith(u'万港币'):
            # num = re.sub(u'万港币','', str(content))
            num = str(content).replace(u'万港币', '')  # 去除万美元并乘以港币汇率,从而换算成人民币
            numb = float(num)
            numc = numb * (10 ** 4) * 0.856  # 3月24日港币汇率
            wr1.set_value(index, column_name, numc)

        elif str(content).endswith(u'万人民币'):
            num = str(content).replace(u'万人民币', '')  # 去除万人民币
            numb = float(num)
            numc = numb * (10 ** 4)
            wr1.set_value(index, column_name, numc)

        elif str(content).endswith(u'万'):
            num = str(content).replace(u'万', '')  # 去除万人民币
            numb = float(num)
            numc = numb * (10 ** 4)
            wr1.set_value(index, column_name, numc)


    fu.write_file(wr1, clean_data_temp_file_url, u'融资信息', ext='.xlsx',
                  sheet_name='Sheet', index=False)

    return
def status_taxers(file_name, column_name):
    status_1 = [u'是', u'正常']
    status_2 = [u'报验']
    status_3 = [u'核销报验', u'注销']
    status_no = ['Unknown']
    status_list = [status_1, status_2, status_3, status_no]
    status_after = [1, 2, 3, -1]
    dcu.merge_status(file_name, column_name, status_list, status_after)
    return
def numeric_patent():
    # numeric first
    status_1 = [u'发明专利', u'发明公布', u'发明公布更正', u'发明授权', u'发明授权更正']
    status_2 = [u'外观设计', u'外观设计更正']
    status_3 = [u'实用新型', u'实用新型更正']
    status_list = [status_1, status_2, status_3]
    status_after = [0, 1, 2]
    dcu.merge_status(u'专利', u'专利类型'.encode('utf-8'), status_list, status_after)
    return
def empty_value_handle_trademark():
    """
    Dirty value handle for table 商标.xlsx.
    First we'll drop rows that empty value is too many.
    # ['主营业务收入','净利润','利润总额','所有者权益合计', '纳税总额','营业总收入','负债总额','资产总额']
    # Once there are more than 3 empties in these 8 columns we will drop that row.
    Then we check nulls column by column and decide how to process with it.
    Next we should numeric all the value for future process.
    After these are done, it's time to work out features we can use in this table which belongs
        to exploratory data analysis.

    -----------------------------
    商标状态
    ------
    Empty percentage is 0.2597%(367 out of 141312). We replace them as 'Unknown'.

    -----------------------------
    申请日期
    ------
    Empty percentage is 0.3637%(514 out of 141312). We replace with '1000-01-01'.
    Others are well formatted.

    -----------------------------
    专用权期限开始日期
    ------
    All empty, drop it.

    -----------------------------
    专用权期限结束日期
    ------
    Empty percentage is 21.4922%(30371 out of 141312). This column's value can be extract from '商标使用期限时间段', so we
    drop it.
    -----------------------------
    商标使用期限时间段
    ------
    Empty percentage is 1.5915%(2249 out of 141312). We map them to '1000-01-01至1000-01-01'.
    Others are well formatted except some are '至', for these value we change to '1000-01-01至1000-01-01'.

    -----------------------------
    :return:
    """
    df = file_utils.read_file_to_df(clean_data_temp_file_url, u'商标')
    values = {u'商标状态'.encode('utf-8'): 'Unknown', u'申请日期'.encode('utf-8'): '1000-01-01',
              u'商标使用期限时间段'.encode('utf-8'): u'1000-01-01至1000-01-01'}
    df = df.fillna(values)
    file_utils.write_file(df, clean_data_temp_file_url, u'商标')

    dcu.drop_columns(u'商标', [u'专用权期限开始日期'.encode('utf-8')])
    dcu.drop_columns(u'商标', [u'专用权期限结束日期'.encode('utf-8')])

    status_1 = [u'至']
    status_list = [status_1]
    status_after = [u'1000-01-01至1000-01-01']

    dcu.merge_status(u'商标', u'商标使用期限时间段'.encode('utf-8'), status_list, status_after)
    return
Exemplo n.º 20
0
def level_of_credit(file_name, column_name):
    status_1 = [u'失信企业']
    status_2 = [u'一般信用企业']
    status_3 = [u'一般认证企业']
    status_4 = [u'高级认证企业']
    status_no = ['Unknown']  # 错误的类别
    status_list = [status_1, status_2, status_3, status_4, status_no]
    status_after = [1, 2, 3, 4, 5, -1]
    dcu.merge_status(file_name, column_name, status_list, status_after)
    return
Exemplo n.º 21
0
def status_of_company(file_name, column_name):
    status_1 = [u'在营', u'存续', u'存续(在营、开业、在册)', u'开业', u'在营(开业)企业',
                u'存续(在营、开业、在册)']
    status_2 = [u'在业']
    status_3 = [u'迁出']
    status_no = ['Unknown']
    status_list = [status_1, status_2, status_3, status_no]
    status_after = [1, 2, 3, -1]
    dcu.merge_status(file_name, column_name, status_list, status_after)
    return
Exemplo n.º 22
0
def status(file_name, column_name):  # 行业类别进行数字化处理
    status_1 = [u'停止更新']
    status_2 = [u'已关闭']
    status_3 = [u'融资中']
    status_4 = [u'运营中']
    status_no = ['Unknown', u'未知轮次']  # 错误的类别
    status_list = [status_1, status_2, status_3, status_4, status_no]
    status_after = [1, 2, 3, 4, -1]
    dcu.merge_status(file_name, column_name, status_list, status_after)

    return
Exemplo n.º 23
0
def ranking_of_co(file_name, column_name):
    status_1 = ['A']
    status_2 = ['AA-']
    status_3 = ['AA']
    status_4 = ['AA+']
    status_5 = ['AAA']
    status_no = ['Unknown','-'] #错误的类别
    status_list = [status_1, status_2, status_3, status_4, status_5, status_no]
    status_after = [1, 2, 3, 4, 5, -1]
    dcu.merge_status(file_name, column_name, status_list, status_after)
    return
Exemplo n.º 24
0
def status_of_annual_report(file_name, column_name):
    status_1 = [u'超期未报送']
    status_2 = [u'超期报送']
    status_3 = [u'未报送']
    status_4 = [u'已报送']
    status_5 = [u'不需要']
    status_no = ['Unknown']  # 错误的类别
    status_list = [status_1, status_2, status_3, status_4, status_5, status_no]
    status_after = [1, 2, 3, 4, 5, -1]
    dcu.merge_status(file_name, column_name, status_list, status_after)
    return
Exemplo n.º 25
0
def interest_pay(file_name, column_name):
    status_1 = [u'到期一次还本付息']
    status_2 = [u'按季付息']
    status_3 = [u'半年付息']
    status_4 = [u'按年付息']
    status_5 = [u'附息式固定利率']
    status_6 = [u'附息式浮动利率']
    status_no = ['Unknown', '-'] #错误的类别
    status_list = [status_1, status_2, status_3, status_4, status_5, status_6, status_no]
    status_after = [1, 2, 3, 4, 5, 6, -1]
    dcu.merge_status(file_name, column_name, status_list, status_after)
    return
Exemplo n.º 26
0
def kind_of_tax_company(file_name, column_name):
    status_1 = [u'临时注册企业']
    status_2 = [u'保税仓库']
    status_3 = [u'加工生产企业']
    status_4 = [u'报关企业']
    status_5 = ['1', u'进出口收发货人']
    status_6 = [u'进出口运输工具负责人']
    status_no = ['Unknown'] #错误的类别
    status_list = [status_1, status_2, status_3, status_4, status_5, status_6, status_no]
    status_after = [1, 2, 3, 4, 5, 6, -1]
    dcu.merge_status(file_name, column_name, status_list, status_after)
    return
Exemplo n.º 27
0
def kind_of_bond(file_name, column_name):
    status_1 = [u'中央企业债']
    status_2 = [u'企业债券']
    status_3 = [u'公司、企业债']
    status_4 = [u'地方企业债']
    status_5 = [u'沪企债']
    status_6 = [u'深企债']
    status_7 = [u'银行间企债']
    status_no = ['Unknown', '-'] #错误的类别
    status_list = [status_1, status_2, status_3, status_4, status_5, status_6, status_7, status_no]
    status_after = [1, 2, 3, 4, 5, 6, 7, -1]
    dcu.merge_status(file_name, column_name, status_list, status_after)
    return
Exemplo n.º 28
0
def ranking_of_bond(file_name, column_name):
    status_1 = ['C']
    status_2 = ['CC']
    status_3 = ['B']
    status_4 = ['AA-']
    status_5 = ['AA']
    status_6 = ['AA+']
    status_7 = ['AAA']
    status_no = ['Unknown'] #错误的类别
    status_list = [status_1, status_2, status_3, status_4, status_5, status_6, status_7, status_no]
    status_after = [1, 2, 3, 4, 5, 6, 7, -1]
    dcu.merge_status(file_name, column_name, status_list, status_after)
    return
Exemplo n.º 29
0
def round(file_name, column_name):  # 行业类别进行数字化处理
    status_1 = [u'未融资']
    status_2 = [u'种子轮']
    status_3 = [u'天使轮']
    status_4 = [u'Pre-A轮']
    status_5 = [u'A轮']
    status_6 = [u'A+轮']
    status_7 = [u'Pre-B轮']
    status_8 = [u'B轮']
    status_9 = [u'B+轮']
    status_10 = [u'C轮']
    status_11 = [u'C+轮']
    status_12 = [u'D轮']
    status_13 = [u'E轮']
    status_14 = [u'E轮及以后']
    status_15 = [u'F轮']
    status_16 = [u'ICO']
    status_17 = [u'Pre-IPO']
    status_18 = [u'IPO']
    status_19 = [u'IPO后']
    status_20 = [u'上市']
    status_21 = [u'新三板']
    status_22 = [u'新三板定增']
    status_23 = [u'新四板']
    status_24 = [u'股权转让']
    status_25 = [u'债权融资']
    status_26 = [u'并购']
    status_27 = [u'战略合并']
    status_28 = [u'扶持基金']
    status_29 = [u'战略投资']
    status_30 = [u'被收购']
    status_31 = [u'后期阶段']
    status_32 = [u'私有化']
    status_33 = [u'众筹']
    status_34 = [u'主板定向增发']
    status_no = ['Unknown', u'未知轮次']  # 错误的类别
    status_list = [
        status_1, status_2, status_3, status_4, status_5, status_6, status_7,
        status_8, status_9, status_10, status_11, status_12, status_13,
        status_14, status_15, status_16, status_17, status_18, status_19,
        status_20, status_21, status_22, status_23, status_24, status_25,
        status_26, status_27, status_28, status_29, status_30, status_31,
        status_32, status_33, status_34, status_no
    ]
    status_after = [
        1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
        21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, -1
    ]
    dcu.merge_status(file_name, column_name, status_list, status_after)

    return
Exemplo n.º 30
0
def kind_of_range(file_name, column_name):
    status_1 = [u'一般经济区域']
    status_2 = [u'保税区']
    status_3 = [u'保税港区、综合保税区']
    status_4 = [u'保税物流园区']
    status_5 = [u'经济技术开发全区']
    status_6 = [u'经济技术开发区']
    status_7 = [u'经济特区']
    status_8 = [u'高新技术产业开发区']
    status_no = ['Unknown'] #错误的类别
    status_list = [status_1, status_2, status_3, status_4, status_5, status_6, status_7, status_8, status_no]
    status_after = [1, 2, 3, 4, 5, 6, 7, 8, -1]
    dcu.merge_status(file_name, column_name, status_list, status_after)
    return