def visualization_ttest(eff, df):
    malm_near, malm_further, r = preda.malm_by_efflevel(eff_chem, df)
    pc_n = pd.melt(malm_n.loc[:,['pc_11', 'pc_12', 'pc_13', 'pc_14', 'pc_15',
                                 'pc_16', 'pc_17', 'pc_18']])
    pc_n=pc_n.iloc[:,1].dropna()
    
    pc_f = pd.melt(malm_f.loc[:,['pc_11', 'pc_12', 'pc_13', 'pc_14', 'pc_15',
                                 'pc_16', 'pc_17', 'pc_18']])
    pc_f=pc_f.iloc[:,1].dropna()
    
    pc_ttest = stats.ttest_ind(pc_n, pc_f)[0]




    tc_n = pd.melt(malm_n.loc[:,['tc_11', 'tc_12', 'tc_13', 'tc_14', 'tc_15',
                                 'tc_16', 'tc_17', 'tc_18']])
    tc_n=tc_n.iloc[:,1].dropna()
    
    tc_f = pd.melt(malm_f.loc[:,['tc_11', 'tc_12', 'tc_13', 'tc_14', 'tc_15',
                                 'tc_16', 'tc_17', 'tc_18']])
    tc_f=tc_f.iloc[:,1].dropna()
    
    tc_ttest = stats.ttest_ind(tc_n, tc_f)[0]



    ec_n = pd.melt(malm_n.loc[:,['ec_11', 'ec_12', 'ec_13', 'ec_14', 'ec_15',
                                 'ec_16', 'ec_17', 'ec_18']])
    ec_n=ec_n.iloc[:,1].dropna()
    
    ec_f = pd.melt(malm_f.loc[:,['ec_11', 'ec_12', 'ec_13', 'ec_14', 'ec_15',
                                 'ec_16', 'ec_17', 'ec_18']])
    ec_f=ec_f.iloc[:,1].dropna()
    
    ec_ttest = stats.ttest_ind(ec_n, ec_f)[0]
    
    ttest = [pc_ttest, tc_ttest, ec_ttest]
    
    ttest = pd.DataFrame(data=ttest)
    
    ttest.index = ['tfpch', 'techch', 'effch']
    
    ax=plt.subplots(figsize=(10,8))
    ax= ttest.plot(kind='bar', color='blue', legend = None)
    ax.axhline(1.645, linestyle='--', color='grey', linewidth=2)
    ax.axhline(0, color='black', linewidth=2)
    ax.axhline(-1.645, linestyle='--', color='grey', linewidth=2)
    plt.xlabel('Malmquist Indices')
    plt.ylabel('T-Statistic')
    plt.title('Malmquist indices differences for laggard and leader firms')
    return ax
def independent_ttest_malm(eff_chem, df):
    result = []
    malm_near, malm_further, r = preda.malm_by_efflevel(eff_chem, df)
    for i in range(0,24):
        result.append(stats.ttest_ind(malm_further.iloc[:,i].dropna(),malm_near.iloc[:,i].dropna(),equal_var=False))
    result = pd.DataFrame(data=result)
    result['pvalue_one_tail'] = result['pvalue']/2
    result=result.T
    result.columns = ['pc_11', 'tc_11','ec_11',
                      'pc_12', 'tc_12','ec_12',
                      'pc_13', 'tc_13','ec_13',
                      'pc_14', 'tc_14','ec_14',
                      'pc_15', 'tc_15','ec_15',
                      'pc_16', 'tc_16','ec_16',
                      'pc_17', 'tc_17','ec_17',
                      'pc_18', 'tc_18','ec_18']
    result=result.round(3)
    return result
Пример #3
0
         eff_elec_dmu.loc[eff_elec_dmu['firm_533'] != 0,'firm_533'],
         color='purple', linewidth=1, marker='o', markerfacecolor='violet', markersize=6)
plt.plot(eff_elec_dmu.loc[eff_elec_dmu['year'] >=2015, 'year'], 
         eff_elec_dmu.loc[eff_elec_dmu['firm_643'] != 0,'firm_643'],
         color='darkred', linewidth=1, marker='o', markerfacecolor='red', markersize=6)
plt.title('Efficient newborn firms in electronic sector', color='blue', fontsize=20)
plt.ylabel('efficiency score', color='blue', fontsize=14)
'''

################################## MACHINERY ##################################
import pandas as pd
import numpy as np
import Preprocessing_data as preda
import matplotlib.pyplot as plt
#import DEA
index = preda.get_index()
mac = preda.get_raw_data('Machinery_raw.xls')
mac = preda.clean_datemac(mac)
old_mac, young_mac, newborn_mac = preda.get_f_wrt_age(mac)
newborn_mac = preda.get_data_newborn(newborn_mac)
'''dataframe of machinery for the whole period'''
total_mac = pd.concat([old_mac,young_mac,newborn_mac], axis =0, ignore_index = True)
'''drop some firms that have unreasonable entry, firms with multi-industry, and 
    data for the whole cooperation around the world'''
total_mac = total_mac.drop(total_mac.index[total_mac['ID'].isin([271])]).reset_index()
total_mac = total_mac.drop('index', axis = 1)
total_mac.columns = ['ID', 'name',
                      'fa_18', 'fa_17', 'fa_16', 'fa_15', 'fa_14', 'fa_13', 'fa_12', 'fa_11', 'fa_10',
                      'em_18', 'em_17', 'em_16', 'em_15', 'em_14','em_13', 'em_12', 'em_11', 'em_10',
                      'ec_18', 'ec_17', 'ec_16', 'ec_15', 'ec_14', 'ec_13', 'ec_12', 'ec_11', 'ec_10',
                      'mc_18', 'mc_17', 'mc_16','mc_15', 'mc_14', 'mc_13','mc_12', 'mc_11', 'mc_10',
#     try:
#         input_data = pd.read_csv(inputFileString)
#         break
#     except:
#         print("There is no file name.")
#         data = input("Try again! Input your data here: ")


# Understand data
# understand = UnderstandData(data)
# df_columns = input_data.columns.tolist()
# for i in df_columns:
#     if input_data[i].dtypes == object:
#         print(understand.describe_nominal(input_data[i]))
#     else:
#         print(understand.describe_numeric(input_data[i]))
def formatParamString(inputFileString, outputFileString, logFileString):
    inputFileString = inputFileString + '.csv'
    outputFileString = outputFileString + '.csv'
    logFileString = logFileString + '.txt'

    return inputFileString, outputFileString, logFileString


inputFileString, outputFileString, logFileString = formatParamString(
    inputFileString, outputFileString, logFileString)

discretize = Preprocessing_data(inputFileString, outputFileString,
                                logFileString)
discretize.discretize()
Пример #5
0
# -*- coding: utf-8 -*-
"""
Created on Tue Jun 16 13:17:54 2020

@author: overall result
"""
import pandas as pd
import numpy as np
import Preprocessing_data as preda
import matplotlib.pyplot as plt
import seaborn as sns
'''Report for DEA model'''

title_list = ['eff_score_chem.csv', 'eff_score_elec.csv', 'eff_score_mac.csv']
eff_chem, eff_elec, eff_mac = [
    preda.read_eff_score(title) for title in title_list
]

sales = ['sales_chem.csv', 'sales_elec.csv', 'sales_mac.csv']
sales_chem, sales_elec, sales_mac = [
    pd.read_csv(title, sep='|').iloc[:, 1:] for title in sales
]

eff_chem_dmu = preda.eff_dmu(eff_chem)
eff_elec_dmu = preda.eff_dmu(eff_elec)
eff_mac_dmu = preda.eff_dmu(eff_mac)
"""with geometric average"""
summary_chem = preda.efficiency_sum_stats(eff_chem)
summary_elec = preda.efficiency_sum_stats(eff_elec)
summary_mac = preda.efficiency_sum_stats(eff_mac)
"""with weighted average"""
# -*- coding: utf-8 -*-
"""
Created on Mon Apr 20 22:37:21 2020

@author: Quinn
@ver1. 20.04
"""
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import Preprocessing_data as preda
chemicals_raw=preda.get_data_chem()
chemicals_raw.head()


chemicals_raw['year_cor'].isnull().sum()
chemicals_raw['year_cor'].unique()
chemicals_raw['year_cor']=chemicals_raw['year_cor'].replace('NaT', np.nan)
chemicals_raw[chemicals_raw['year_cor'].isnull()]['company']
'''ULTRABIO WERKE GMBH 2010
   MORSA WACHSWARENFABRIK SALLINGER GMBH 
   RARO PLASTICS GMBH null
   ADVANCE PHARMA GMBH 
   1320 duplicate vs 1319
   1527 ATEKU GMBH & CO. KG 2009
   1948 null value
   2242 null
   2413 bỏ
   2724 bỏ
   bỏ
Пример #7
0
# -*- coding: utf-8 -*-
"""
Created on Tue May  5 20:31:56 2020

@author: DELL
"""

import pandas as pd
import numpy as np
import Preprocessing_data as preda
import matplotlib.pyplot as plt
# index for deflated data
index = preda.get_index()

elec = preda.get_raw_data('Electronics_raw.xls')

elec = preda.clean_dateelec(elec)
old_elec, young_elec, newborn_elec = preda.get_f_wrt_age(elec)
newborn_elec = preda.get_data_newborn(newborn_elec)
'''dataframe of electronic for the whole period'''
total_elec = pd.concat([old_elec, young_elec, newborn_elec],
                       axis=0,
                       ignore_index=True)
'''drop some firms that have unreasonable entry, firms with multi-industry, and 
    data for the whole cooperation around the world'''
total_elec = total_elec.drop(total_elec.index[total_elec['ID'].isin(
    [6, 11, 23, 423, 98, 85])]).reset_index()
total_elec = total_elec.drop('index', axis=1)
total_elec.columns = [
    'ID', 'name', 'fa_18', 'fa_17', 'fa_16', 'fa_15', 'fa_14', 'fa_13',
    'fa_12', 'fa_11', 'fa_10', 'em_18', 'em_17', 'em_16', 'em_15', 'em_14',
Пример #8
0
# -*- coding: utf-8 -*-
"""
Created on Tue May  5 16:39:17 2020

@author: DELL
"""
import pandas as pd
import numpy as np
import Preprocessing_data as preda
import matplotlib.pyplot as plt
import seaborn as sns
# index for deflated data
index = preda.get_index()
# manipulate raw data, prepare data for main analysis
chemicals_raw = preda.get_raw_data('Chemicals_raw.xls')
chem =preda.clean_datechem(chemicals_raw)
old_chem, young_chem, newborn_chem = preda.get_f_wrt_age(chem)
newborn_chem = preda.get_data_newborn(newborn_chem)
'''dataframe of chemicals for the whole period'''
total_chem = pd.concat([old_chem,young_chem,newborn_chem], axis = 0, ignore_index = True)
'''drop some firms that have unreasonable entry, firms with multi-industry, and 
    data for the whole cooperation around the world'''
total_chem = total_chem.drop(total_chem.index[total_chem['ID'].isin([8,21,51,54,156])]).reset_index()
total_chem = total_chem.drop('index', axis = 1)
total_chem.columns = ['ID', 'name',
                      'fa_18', 'fa_17', 'fa_16', 'fa_15', 'fa_14', 'fa_13', 'fa_12', 'fa_11', 'fa_10',
                      'em_18', 'em_17', 'em_16', 'em_15', 'em_14','em_13', 'em_12', 'em_11', 'em_10',
                      'ec_18', 'ec_17', 'ec_16', 'ec_15', 'ec_14', 'ec_13', 'ec_12', 'ec_11', 'ec_10',
                      'mc_18', 'mc_17', 'mc_16','mc_15', 'mc_14', 'mc_13','mc_12', 'mc_11', 'mc_10',
                      's_18', 's_17', 's_16', 's_15', 's_14','s_13','s_12', 's_11', 's_10',
                      'year_cor']