def combine_industry_and_acquisition(): ''' 匹配合并并购表和行业表 ''' company_industry_tables = re.excel_table_to_OrderedDict_bySheetName(file='company_industry_filter_sample.xls', by_name=u'sub_data') M_A_subsample_tables = re.excel_table_to_OrderedDict_bySheetName(file='merger_and_acquisition_filter_sample.xls', by_name=u'data') print('len of company_industry_tables: %d' % len(company_industry_tables)) print('len of M_A_subsample_tables: %d' % len(M_A_subsample_tables)) result = [] for i in range(len(M_A_subsample_tables)): row_M_A = M_A_subsample_tables[i] for j in range(len(company_industry_tables)): row_C_I = company_industry_tables[j] if row_M_A['Symbol'] == row_C_I['Stkcd']: row_M_A.update(row_C_I) print(row_M_A) result.append(row_M_A) break; we.write_excel('M&A_industry_combine.xls', 'combine_data', result) print(len(result))
def filter_ceo_data(): tables = [] for i in range(1, 10): # table = re.excel_table_byname(file='CG_Director_new.xls', by_name=u'CEO' + str(i)) table = re.excel_table_to_OrderedDict_bySheetName(file='CG_Director_ALL.xls', by_name=u'CEO' + str(i)) print(len(table)) tables.extend(table) print('len of tables: %d' % len(tables)) return tables result = [] CMList = ['10', '11', '13'] GMList = ['30', '31', '32', '37'] for item in tables: zhiwei = str(item['D0201a']) isCM = str(zhiwei[0:2]) isGM1 = str(zhiwei[2:4]) isGM2 = str(zhiwei[4:6]) # if isCM == '11' or '30' in isGM or '32' in isGM or '37' in isGM: # if '副' not in item['D0201b']: # print(item) # result.append(item) if isCM in CMList or isGM1 in GMList or isGM2 in GMList: # print(item) result.append(item) print(len(result)) return result
def add_ceoAge_to_GMTenure(): table = re.excel_table_to_OrderedDict_bySheetName(file='GM_Tenure_with_Param.xls', by_name=u'data') CEOAge = RWTXT.txt_to_dict_list('CG_Director_ALL.txt') CEOAge.sort(key=itemgetter('Stkcd'), reverse=True) lenCEO = len(CEOAge) print(len(CEOAge)) CEOAge1 = CEOAge[0:int(lenCEO / 2)] CEOAge2 = CEOAge[int(lenCEO / 2):int(lenCEO)] for item in table: app = OrderedDict() startT = time.clock() Ceo_Age = [] if item['GM_Name'] != '': Ceo_Age = common_lib.Group_Search_dictList(CEOAge, item['StockId'], item['Year'], item['GM_Name']) if len(Ceo_Age) > 0: app['CEOAge'] = Ceo_Age[0]['D0401b'] app['CEOSex'] = 1 if Ceo_Age[0]['D0301b'] == '男' else 0 else: app['CEOAge'] = '' app['CEOSex'] = '' print(Ceo_Age) endT = time.clock() print('Running time:%s Seconds' % ((endT - startT))) item.update(app) print(table[0]) print(len(table)) we.write_excel('GM_Tenure_with_Param(WithCeoAge_V2).xls', 'data', table)
def write_txt(): start = time.clock() print(start) tables = [] for i in range(1, 10): # table = re.excel_table_byname(file='CG_Director_new.xls', by_name=u'CEO' + str(i)) table = re.excel_table_to_OrderedDict_bySheetName( file='CG_Director_ALL.xls', by_name=u'CEO' + str(i)) print(len(table)) tables.extend(table) print('len of tables: %d' % len(tables)) tables.sort(key=itemgetter('Stkcd')) end = time.clock() print(end) print('read excel Running time:%s Seconds' % ((end - start))) startX = time.clock() output = open('CG_Director_ALL.txt', 'w', encoding='utf-8') titles = '' titleList = list(tables[0].keys()) titles = '|'.join(titleList) output.writelines(titles) for item in tables: row = list(OrderedDict(item).values()) print(row) row = [str(i) if type(i) == float else i for i in row] # print(row) data = '|'.join(row) output.write('\n') output.write(data) output.close() endX = time.clock() print('write txt Running time:%s Seconds' % ((endX - startX)))
def add_ceoAge_to_GMTenure_V3(): GM_Table = re.excel_table_to_OrderedDict_bySheetName(file='GM_Tenure_with_Param.xls', by_name=u'data') mongoClient = mdb.connect_to_db() CG_Director_List = mdb.Get_CG_Director_ALL(mongoClient) for item in GM_Table: app = OrderedDict() startT = time.clock() Ceo_Age = [] if item['GM_Name'] != '': Ceo_Age = mdb.Search_CG_Director_ALL(CG_Director_List, item['StockId'], item['Year'], item['GM_Name']) # Ceo_Age=CG_Director_List.find({'Stkcd': item['StockId'], 'Reptdt': item['Year']+'-12-31', 'D0101b': item['GM_Name']}) if len(Ceo_Age) > 0: app['CEOAge'] = Ceo_Age[0]['D0401b'] app['CEOSex'] = 1 if Ceo_Age[0]['D0301b'] == '男' else 0 else: app['CEOAge'] = '' app['CEOSex'] = '' print(Ceo_Age) endT = time.clock() print('Running time:%s Seconds' % ((endT - startT))) item.update(app) print(GM_Table[5]) print(len(GM_Table))
def sort_ceo_final_data(): table = re.excel_table_to_OrderedDict_bySheetName(file='CEO_final_V2.xls', by_name=u'data') result = sorted(table, key=itemgetter('Stkcd', 'Reptdt')) for item in result: print(item) we.write_excel('CEO_final_V2.xls', 'data', result) print(len(result))
def add_Assets0101_to_GMTenure(): table = re.excel_table_to_OrderedDict_bySheetName(file='GM_Tenure_with_Param(WithCeoAge_VFinal).xls', by_name=u'data') print(len(table)) FirmAssets = re.excel_table_to_OrderedDict_bySheetName(file='company_assets_Filter(01-01).xls', by_name=u'data') print(len(FirmAssets)) for item in table: app = OrderedDict() asset = [x for x in FirmAssets if x['Stkcd'] == item['StockId'] and str(x['Accper']).split('-')[0] == item['Year']] print(asset) # app['TotalAsset'] = asset[0]['A001000000'] app['TotalAsset(01-01)'] = float(max(asset, key=itemgetter('A001000000'))['A001000000']) item.update(app) print(table[0]) print(len(table)) we.write_excel('GM_Tenure_with_Param(WithCeoAge_VFinal).xls', 'data', table)
def filter_merge_acquisition(): ''' 过滤并购数据: BusinessID [业务编码]:包含A=资产、B=股权, TradingPositionID [上市公司交易地位编码]:S3101=买方、S3106=既是买方又是标的方 ''' tables = re.excel_table_to_OrderedDict_bySheetName(file='merger_and_acquisition.xls', by_name=u'STK_MA_TRADINGMAIN') tables.extend( re.excel_table_to_OrderedDict_bySheetName(file='merger_and_acquisition.xls', by_name=u'STK_MA_TRADINGMAIN1')) print(len(tables)) result = [] for i in range(len(tables)): row = tables[i] if 'A' in row['BusinessID'] or 'B' in row['BusinessID']: if 'S3101' in row['TradingPositionID'] or 'S3106' in row['TradingPositionID']: print(row) result.append(row) we.write_excel('merger_and_acquisition_filter_sample.xls', 'data', result) print(len(result))
def add_ceoAge_to_GMTenure_V2(): GM_Table = re.excel_table_to_OrderedDict_bySheetName(file='GM_Tenure_with_Param.xls', by_name=u'data') CEOAge = filter_ceo_data() CEOAge.sort(key=itemgetter('Stkcd')) lenCEO = len(CEOAge) print(lenCEO) CEOAge1 = CEOAge[0:int(lenCEO / 2)] CEOAge2 = CEOAge[int(lenCEO / 2):int(lenCEO)] for item in GM_Table: app = OrderedDict() startT = time.clock() # Ceo_Age = [x for x in CEOAge1 if x['Stkcd'] == item['StockId'] \ # and str(x['Reptdt']).split('-')[0] == item['Year'] \ # and x['D0101b'] == item['GM_Name']] # if len(Ceo_Age) > 0: # app['CEOAge'] = Ceo_Age[0]['D0401b'] # app['CEOSex'] = 1 if Ceo_Age[0]['D0301b'] == '男' else 0 # else: # Ceo_Age = [x for x in CEOAge2 if x['Stkcd'] == item['StockId'] \ # and str(x['Reptdt']).split('-')[0] == item['Year'] \ # and x['D0101b'] == item['GM_Name']] # if len(Ceo_Age) > 0: # app['CEOAge'] = Ceo_Age[0]['D0401b'] # app['CEOSex'] = 1 if Ceo_Age[0]['D0301b'] == '男' else 0 # else: # app['CEOAge'] = '' # app['CEOSex'] = '' if item['StockId'] == '600002': a = 1 b = 2 Ceo_Age = [] if item['GM_Name'] != '': Ceo_Age = common_lib.Binary_Search_dictList(CEOAge, item['StockId'], item['Year'], item['GM_Name']) if len(Ceo_Age) > 0: app['CEOAge'] = Ceo_Age[0]['D0401b'] app['CEOSex'] = 1 if Ceo_Age[0]['D0301b'] == '男' else 0 else: app['CEOAge'] = '' app['CEOSex'] = '' print(Ceo_Age) endT = time.clock() print('Running time:%s Seconds' % ((endT - startT))) item.update(app) print(GM_Table[0]) print(len(GM_Table)) we.write_excel('GM_Tenure_with_Param(WithCeoAge).xls', 'data', GM_Table)
def add_M_A_to_GMTenure(): table = re.excel_table_to_OrderedDict_bySheetName(file='GM_Tenure_with_Param(WithCeoAge_VFinal).xls', by_name=u'data') print(len(table)) MACount = re.excel_table_to_OrderedDict_bySheetName(file='M&A_TimeCount.xls', by_name=u'data') print(len(MACount)) for item in table: app = OrderedDict() count = [x for x in MACount if x['StockId'] == item['StockId'] and x['Year'] == item['Year']] print(count) if len(count) > 0: app['MATimes'] = count[0]['Times'] app['MATotalExpenseValue'] = count[0]['TotalExpenseValue'] else: app['MATimes'] = '' app['MATotalExpenseValue'] = '' item.update(app) print(table[0]) print(len(table)) we.write_excel('GM_Tenure_with_Param(WithCeoAge_VFinal2).xls', 'data', table)
def filter_company_industry(): ''' 过滤公司行业数据: 剔除B股、ST股、金融行业(行业代码J开头) ''' tables = re.excel_table_to_OrderedDict_bySheetName(file='company-industry.xls', by_name=u'CG_Co') print(len(tables)) result = [] for i in range(len(tables)): row = tables[i] if 'B' not in row['Stktype'] and 'ST' not in row['Stknme'] and 'J' not in row['Nnindcd']: print(row) result.append(row) we.write_excel('company_industry_filter_sample.xls', 'sub_data', result) print(len(result))
def merge_M_A_by_SameTime(): table = re.excel_table_to_OrderedDict_bySheetName(file='M&A_MergeTime.xls', by_name=u'SucceedAndNonRelevance') print(len(table)) result = [] for Symbol, items1 in groupby(table, key=itemgetter('Symbol')): for FirstDeclareDate, items2 in groupby(items1, key=itemgetter('FirstDeclareDate')): app = OrderedDict() app['StockId'] = Symbol app['FirstDeclareDate'] = FirstDeclareDate app['Year'] = str(FirstDeclareDate).split('-')[0] ExpenseValue = 0 for i in items2: ExpenseValue += float(i['ExpenseValue']) app['ExpenseValue'] = str(ExpenseValue) print(app) result.append(app) we.write_excel('M&A_MergeTime.xls', 'data', result)
def calc_M_A_Times(): table = re.excel_table_to_OrderedDict_bySheetName(file='M&A_MergeTime.xls', by_name=u'data') print(len(table)) result = [] for StockId, items1 in groupby(table, key=itemgetter('StockId')): for Year, items2 in groupby(items1, key=itemgetter('Year')): app = OrderedDict() app['StockId'] = StockId app['Year'] = Year times = 0 TotalExpenseValue = 0 for i in items2: times += 1 TotalExpenseValue += float(i['ExpenseValue']) app['Times'] = str(times) app['TotalExpenseValue'] = str(TotalExpenseValue) print(app) result.append(app) we.write_excel('M&A_TimeCount.xls', 'data', result)
def group_M_A_by_industry(): table = re.excel_table_to_OrderedDict_bySheetName(file='M&A_industry_combine_except2017.xls', by_name=u'data') result1 = [] # Sort by the desired field first table.sort(key=itemgetter('Nnindcd')) # Iterate in groups for Nnindcd, items in groupby(table, key=itemgetter('Nnindcd')): app = OrderedDict() app['IndustryId'] = Nnindcd print(Nnindcd) lenItem = 0 industry_name = '' for i in items: lenItem += 1 industry_name = dict(i)['Nnindnme'] # print(' ', i) app['Times'] = lenItem app['IndustryName'] = industry_name result1.append(app) print(' ', lenItem, '---' + industry_name) we.write_excel('M&A_groupby_industry.xls', 'data', result1)
def group_M_A_by_year(): table = re.excel_table_to_OrderedDict_bySheetName(file='M&A_industry_combine_except2017.xls', by_name=u'data') for item in table: item['FirstDeclareYear'] = str(item['FirstDeclareDate']).split('-')[0] result = [] # Sort by the desired field first table.sort(key=itemgetter('FirstDeclareYear')) # Iterate in groups for FirstDeclareYear, items in groupby(table, key=itemgetter('FirstDeclareYear')): app = {} app['Year'] = FirstDeclareYear print(FirstDeclareYear) lenItem = 0 for i in items: lenItem += 1 # print(' ', i) app['Times'] = lenItem result.append(app) print(' ', lenItem) we.write_excel('M&A_groupby_year.xls', 'data', result)
def deal_change_tenure_times(): table = re.excel_table_to_OrderedDict_bySheetName(file='GM_Tenure_VFinal.xls', by_name=u'data') print(len(table)) ceo_change_times = [] tenure_one_year = [] tenure_under_two_year = [] for StockId, items1 in groupby(table, key=itemgetter('StockId')): change_time = 0 for GM_Name, items2 in groupby(items1, key=itemgetter('GM_Name')): change_time += 1 temp_list = [] for i in items2: temp_list.append(OrderedDict(i)) if GM_Name != '' and len(temp_list) == 1: tenure_one_year.extend(temp_list) if GM_Name != '' and len(temp_list) <= 2: tenure_under_two_year.extend(temp_list) app_change_time = OrderedDict() app_change_time['StockId'] = StockId app_change_time['ChangeTime'] = change_time - 1 ceo_change_times.append(app_change_time) print(len(ceo_change_times)) print(len(tenure_one_year)) print(len(tenure_under_two_year)) tenure_without_one_year = [i for i in table if i not in tenure_one_year] tenure_without_under_two_year = [i for i in table if i not in tenure_under_two_year] print(len(tenure_without_one_year)) print(len(tenure_without_under_two_year)) we.write_excel('CEO_Change_Time.xls', 'data', ceo_change_times) we.write_excel('GM_Tenure_OneYear.xls', 'data', tenure_one_year) we.write_excel('GM_Tenure_UnderTwoYear.xls', 'data', tenure_under_two_year) we.write_excel('GM_Tenure_Without_OneYear.xls', 'data', tenure_without_one_year) we.write_excel('GM_Tenure_Without_UnderTwoYear.xls', 'data', tenure_without_under_two_year)
def add_ceoAge_to_GMTenure_V4(): table = re.excel_table_to_OrderedDict_bySheetName(file='GM_Tenure_with_Param.xls', by_name=u'data') CEOAge = RWTXT.txt_to_dict_list('CG_Director_Age.txt') print(len(CEOAge)) for item in table: app = OrderedDict() startT = time.clock() Ceo_Age = [] if item['GM_Name'] != '': Ceo_Age = [x for x in CEOAge if x['StockId'] == item['StockId'] and x['Name'] == item['GM_Name']] if len(Ceo_Age) > 0: app['CEOAge'] = str(int(item['Year']) - int(Ceo_Age[0]['BirthYear'])) app['CEOSex'] = Ceo_Age[0]['Sex'] else: app['CEOAge'] = '' app['CEOSex'] = '' print(Ceo_Age) endT = time.clock() print('Running time:%s Seconds' % ((endT - startT))) item.update(app) print(table[0]) print(len(table)) we.write_excel('GM_Tenure_with_Param(WithCeoAge_VFinal3).xls', 'data', table)
def filter_data(): tables = re.excel_table_to_OrderedDict_bySheetName( file='GM_Tenure_with_Param(WithCeoAge_VFinal2).xls', by_name=u'data') tables = [i for i in tables if i['CashRatio'] != '' and i['GM_Name'] != ''] return tables