def visualization_ttest(eff, df): malm_near, malm_further, r = preda.malm_by_efflevel(eff_chem, df) pc_n = pd.melt(malm_n.loc[:,['pc_11', 'pc_12', 'pc_13', 'pc_14', 'pc_15', 'pc_16', 'pc_17', 'pc_18']]) pc_n=pc_n.iloc[:,1].dropna() pc_f = pd.melt(malm_f.loc[:,['pc_11', 'pc_12', 'pc_13', 'pc_14', 'pc_15', 'pc_16', 'pc_17', 'pc_18']]) pc_f=pc_f.iloc[:,1].dropna() pc_ttest = stats.ttest_ind(pc_n, pc_f)[0] tc_n = pd.melt(malm_n.loc[:,['tc_11', 'tc_12', 'tc_13', 'tc_14', 'tc_15', 'tc_16', 'tc_17', 'tc_18']]) tc_n=tc_n.iloc[:,1].dropna() tc_f = pd.melt(malm_f.loc[:,['tc_11', 'tc_12', 'tc_13', 'tc_14', 'tc_15', 'tc_16', 'tc_17', 'tc_18']]) tc_f=tc_f.iloc[:,1].dropna() tc_ttest = stats.ttest_ind(tc_n, tc_f)[0] ec_n = pd.melt(malm_n.loc[:,['ec_11', 'ec_12', 'ec_13', 'ec_14', 'ec_15', 'ec_16', 'ec_17', 'ec_18']]) ec_n=ec_n.iloc[:,1].dropna() ec_f = pd.melt(malm_f.loc[:,['ec_11', 'ec_12', 'ec_13', 'ec_14', 'ec_15', 'ec_16', 'ec_17', 'ec_18']]) ec_f=ec_f.iloc[:,1].dropna() ec_ttest = stats.ttest_ind(ec_n, ec_f)[0] ttest = [pc_ttest, tc_ttest, ec_ttest] ttest = pd.DataFrame(data=ttest) ttest.index = ['tfpch', 'techch', 'effch'] ax=plt.subplots(figsize=(10,8)) ax= ttest.plot(kind='bar', color='blue', legend = None) ax.axhline(1.645, linestyle='--', color='grey', linewidth=2) ax.axhline(0, color='black', linewidth=2) ax.axhline(-1.645, linestyle='--', color='grey', linewidth=2) plt.xlabel('Malmquist Indices') plt.ylabel('T-Statistic') plt.title('Malmquist indices differences for laggard and leader firms') return ax
def independent_ttest_malm(eff_chem, df): result = [] malm_near, malm_further, r = preda.malm_by_efflevel(eff_chem, df) for i in range(0,24): result.append(stats.ttest_ind(malm_further.iloc[:,i].dropna(),malm_near.iloc[:,i].dropna(),equal_var=False)) result = pd.DataFrame(data=result) result['pvalue_one_tail'] = result['pvalue']/2 result=result.T result.columns = ['pc_11', 'tc_11','ec_11', 'pc_12', 'tc_12','ec_12', 'pc_13', 'tc_13','ec_13', 'pc_14', 'tc_14','ec_14', 'pc_15', 'tc_15','ec_15', 'pc_16', 'tc_16','ec_16', 'pc_17', 'tc_17','ec_17', 'pc_18', 'tc_18','ec_18'] result=result.round(3) return result
eff_elec_dmu.loc[eff_elec_dmu['firm_533'] != 0,'firm_533'], color='purple', linewidth=1, marker='o', markerfacecolor='violet', markersize=6) plt.plot(eff_elec_dmu.loc[eff_elec_dmu['year'] >=2015, 'year'], eff_elec_dmu.loc[eff_elec_dmu['firm_643'] != 0,'firm_643'], color='darkred', linewidth=1, marker='o', markerfacecolor='red', markersize=6) plt.title('Efficient newborn firms in electronic sector', color='blue', fontsize=20) plt.ylabel('efficiency score', color='blue', fontsize=14) ''' ################################## MACHINERY ################################## import pandas as pd import numpy as np import Preprocessing_data as preda import matplotlib.pyplot as plt #import DEA index = preda.get_index() mac = preda.get_raw_data('Machinery_raw.xls') mac = preda.clean_datemac(mac) old_mac, young_mac, newborn_mac = preda.get_f_wrt_age(mac) newborn_mac = preda.get_data_newborn(newborn_mac) '''dataframe of machinery for the whole period''' total_mac = pd.concat([old_mac,young_mac,newborn_mac], axis =0, ignore_index = True) '''drop some firms that have unreasonable entry, firms with multi-industry, and data for the whole cooperation around the world''' total_mac = total_mac.drop(total_mac.index[total_mac['ID'].isin([271])]).reset_index() total_mac = total_mac.drop('index', axis = 1) total_mac.columns = ['ID', 'name', 'fa_18', 'fa_17', 'fa_16', 'fa_15', 'fa_14', 'fa_13', 'fa_12', 'fa_11', 'fa_10', 'em_18', 'em_17', 'em_16', 'em_15', 'em_14','em_13', 'em_12', 'em_11', 'em_10', 'ec_18', 'ec_17', 'ec_16', 'ec_15', 'ec_14', 'ec_13', 'ec_12', 'ec_11', 'ec_10', 'mc_18', 'mc_17', 'mc_16','mc_15', 'mc_14', 'mc_13','mc_12', 'mc_11', 'mc_10',
# try: # input_data = pd.read_csv(inputFileString) # break # except: # print("There is no file name.") # data = input("Try again! Input your data here: ") # Understand data # understand = UnderstandData(data) # df_columns = input_data.columns.tolist() # for i in df_columns: # if input_data[i].dtypes == object: # print(understand.describe_nominal(input_data[i])) # else: # print(understand.describe_numeric(input_data[i])) def formatParamString(inputFileString, outputFileString, logFileString): inputFileString = inputFileString + '.csv' outputFileString = outputFileString + '.csv' logFileString = logFileString + '.txt' return inputFileString, outputFileString, logFileString inputFileString, outputFileString, logFileString = formatParamString( inputFileString, outputFileString, logFileString) discretize = Preprocessing_data(inputFileString, outputFileString, logFileString) discretize.discretize()
# -*- coding: utf-8 -*- """ Created on Tue Jun 16 13:17:54 2020 @author: overall result """ import pandas as pd import numpy as np import Preprocessing_data as preda import matplotlib.pyplot as plt import seaborn as sns '''Report for DEA model''' title_list = ['eff_score_chem.csv', 'eff_score_elec.csv', 'eff_score_mac.csv'] eff_chem, eff_elec, eff_mac = [ preda.read_eff_score(title) for title in title_list ] sales = ['sales_chem.csv', 'sales_elec.csv', 'sales_mac.csv'] sales_chem, sales_elec, sales_mac = [ pd.read_csv(title, sep='|').iloc[:, 1:] for title in sales ] eff_chem_dmu = preda.eff_dmu(eff_chem) eff_elec_dmu = preda.eff_dmu(eff_elec) eff_mac_dmu = preda.eff_dmu(eff_mac) """with geometric average""" summary_chem = preda.efficiency_sum_stats(eff_chem) summary_elec = preda.efficiency_sum_stats(eff_elec) summary_mac = preda.efficiency_sum_stats(eff_mac) """with weighted average"""
# -*- coding: utf-8 -*- """ Created on Mon Apr 20 22:37:21 2020 @author: Quinn @ver1. 20.04 """ import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns import Preprocessing_data as preda chemicals_raw=preda.get_data_chem() chemicals_raw.head() chemicals_raw['year_cor'].isnull().sum() chemicals_raw['year_cor'].unique() chemicals_raw['year_cor']=chemicals_raw['year_cor'].replace('NaT', np.nan) chemicals_raw[chemicals_raw['year_cor'].isnull()]['company'] '''ULTRABIO WERKE GMBH 2010 MORSA WACHSWARENFABRIK SALLINGER GMBH RARO PLASTICS GMBH null ADVANCE PHARMA GMBH 1320 duplicate vs 1319 1527 ATEKU GMBH & CO. KG 2009 1948 null value 2242 null 2413 bỏ 2724 bỏ bỏ
# -*- coding: utf-8 -*- """ Created on Tue May 5 20:31:56 2020 @author: DELL """ import pandas as pd import numpy as np import Preprocessing_data as preda import matplotlib.pyplot as plt # index for deflated data index = preda.get_index() elec = preda.get_raw_data('Electronics_raw.xls') elec = preda.clean_dateelec(elec) old_elec, young_elec, newborn_elec = preda.get_f_wrt_age(elec) newborn_elec = preda.get_data_newborn(newborn_elec) '''dataframe of electronic for the whole period''' total_elec = pd.concat([old_elec, young_elec, newborn_elec], axis=0, ignore_index=True) '''drop some firms that have unreasonable entry, firms with multi-industry, and data for the whole cooperation around the world''' total_elec = total_elec.drop(total_elec.index[total_elec['ID'].isin( [6, 11, 23, 423, 98, 85])]).reset_index() total_elec = total_elec.drop('index', axis=1) total_elec.columns = [ 'ID', 'name', 'fa_18', 'fa_17', 'fa_16', 'fa_15', 'fa_14', 'fa_13', 'fa_12', 'fa_11', 'fa_10', 'em_18', 'em_17', 'em_16', 'em_15', 'em_14',
# -*- coding: utf-8 -*- """ Created on Tue May 5 16:39:17 2020 @author: DELL """ import pandas as pd import numpy as np import Preprocessing_data as preda import matplotlib.pyplot as plt import seaborn as sns # index for deflated data index = preda.get_index() # manipulate raw data, prepare data for main analysis chemicals_raw = preda.get_raw_data('Chemicals_raw.xls') chem =preda.clean_datechem(chemicals_raw) old_chem, young_chem, newborn_chem = preda.get_f_wrt_age(chem) newborn_chem = preda.get_data_newborn(newborn_chem) '''dataframe of chemicals for the whole period''' total_chem = pd.concat([old_chem,young_chem,newborn_chem], axis = 0, ignore_index = True) '''drop some firms that have unreasonable entry, firms with multi-industry, and data for the whole cooperation around the world''' total_chem = total_chem.drop(total_chem.index[total_chem['ID'].isin([8,21,51,54,156])]).reset_index() total_chem = total_chem.drop('index', axis = 1) total_chem.columns = ['ID', 'name', 'fa_18', 'fa_17', 'fa_16', 'fa_15', 'fa_14', 'fa_13', 'fa_12', 'fa_11', 'fa_10', 'em_18', 'em_17', 'em_16', 'em_15', 'em_14','em_13', 'em_12', 'em_11', 'em_10', 'ec_18', 'ec_17', 'ec_16', 'ec_15', 'ec_14', 'ec_13', 'ec_12', 'ec_11', 'ec_10', 'mc_18', 'mc_17', 'mc_16','mc_15', 'mc_14', 'mc_13','mc_12', 'mc_11', 'mc_10', 's_18', 's_17', 's_16', 's_15', 's_14','s_13','s_12', 's_11', 's_10', 'year_cor']