def visualization_ttest(eff, df):
    malm_near, malm_further, r = preda.malm_by_efflevel(eff_chem, df)
    pc_n = pd.melt(malm_n.loc[:,['pc_11', 'pc_12', 'pc_13', 'pc_14', 'pc_15',
                                 'pc_16', 'pc_17', 'pc_18']])
    pc_n=pc_n.iloc[:,1].dropna()
    
    pc_f = pd.melt(malm_f.loc[:,['pc_11', 'pc_12', 'pc_13', 'pc_14', 'pc_15',
                                 'pc_16', 'pc_17', 'pc_18']])
    pc_f=pc_f.iloc[:,1].dropna()
    
    pc_ttest = stats.ttest_ind(pc_n, pc_f)[0]




    tc_n = pd.melt(malm_n.loc[:,['tc_11', 'tc_12', 'tc_13', 'tc_14', 'tc_15',
                                 'tc_16', 'tc_17', 'tc_18']])
    tc_n=tc_n.iloc[:,1].dropna()
    
    tc_f = pd.melt(malm_f.loc[:,['tc_11', 'tc_12', 'tc_13', 'tc_14', 'tc_15',
                                 'tc_16', 'tc_17', 'tc_18']])
    tc_f=tc_f.iloc[:,1].dropna()
    
    tc_ttest = stats.ttest_ind(tc_n, tc_f)[0]



    ec_n = pd.melt(malm_n.loc[:,['ec_11', 'ec_12', 'ec_13', 'ec_14', 'ec_15',
                                 'ec_16', 'ec_17', 'ec_18']])
    ec_n=ec_n.iloc[:,1].dropna()
    
    ec_f = pd.melt(malm_f.loc[:,['ec_11', 'ec_12', 'ec_13', 'ec_14', 'ec_15',
                                 'ec_16', 'ec_17', 'ec_18']])
    ec_f=ec_f.iloc[:,1].dropna()
    
    ec_ttest = stats.ttest_ind(ec_n, ec_f)[0]
    
    ttest = [pc_ttest, tc_ttest, ec_ttest]
    
    ttest = pd.DataFrame(data=ttest)
    
    ttest.index = ['tfpch', 'techch', 'effch']
    
    ax=plt.subplots(figsize=(10,8))
    ax= ttest.plot(kind='bar', color='blue', legend = None)
    ax.axhline(1.645, linestyle='--', color='grey', linewidth=2)
    ax.axhline(0, color='black', linewidth=2)
    ax.axhline(-1.645, linestyle='--', color='grey', linewidth=2)
    plt.xlabel('Malmquist Indices')
    plt.ylabel('T-Statistic')
    plt.title('Malmquist indices differences for laggard and leader firms')
    return ax
def independent_ttest_malm(eff_chem, df):
    result = []
    malm_near, malm_further, r = preda.malm_by_efflevel(eff_chem, df)
    for i in range(0,24):
        result.append(stats.ttest_ind(malm_further.iloc[:,i].dropna(),malm_near.iloc[:,i].dropna(),equal_var=False))
    result = pd.DataFrame(data=result)
    result['pvalue_one_tail'] = result['pvalue']/2
    result=result.T
    result.columns = ['pc_11', 'tc_11','ec_11',
                      'pc_12', 'tc_12','ec_12',
                      'pc_13', 'tc_13','ec_13',
                      'pc_14', 'tc_14','ec_14',
                      'pc_15', 'tc_15','ec_15',
                      'pc_16', 'tc_16','ec_16',
                      'pc_17', 'tc_17','ec_17',
                      'pc_18', 'tc_18','ec_18']
    result=result.round(3)
    return result
result['tfpch_near'] < result['tfpch_further']
result['techch_near'] > result['techch_further']
result['effch_near'] < result['effch_further']

pd.melt(result, ['year'])
plt.subplots(figsize=(8,6))
sns.lineplot(x='year', y='value', hue='variable',
                 data=pd.melt(result.iloc[:,[0,5,6]], ['year']), marker='o')
plt.title('Average efficiency scores of chemical firms by ages', color='black')

'''independent samples t-test to confirm the statistical significance of the effect of the distance from
frontier on TFPCH, EFFCH, and TECHCH'''

from scipy import stats

malm_near, malm_further, result = preda.malm_by_efflevel(eff_chem, df)

malm_near, malm_further = preda.malm_by_efflevel(eff_chem, df)

p7_11_n = malm_near.loc[:,['pc_15', 'tc_15', 'ec_15']].dropna()
p7_11_f = malm_further.loc[:,['pc_15', 'tc_15', 'ec_15']].dropna()

stats.ttest_ind(p7_11_n.iloc[:,1], p7_11_f.iloc[:,1], equal_var=False)

from scipy import stats
def independent_ttest_malm(eff_chem, df):
    result = []
    malm_near, malm_further, r = preda.malm_by_efflevel(eff_chem, df)
    for i in range(0,24):
        result.append(stats.ttest_ind(malm_further.iloc[:,i].dropna(),malm_near.iloc[:,i].dropna(),equal_var=False))
    result = pd.DataFrame(data=result)