Пример #1
0
 def test_score(self):
     N = 100000
     z1 = np.random.normal(size=N)
     z2 = np.random.choice([0, 1], size=N)
     z3 = np.random.choice(['a', 'b', 'c'], size=N)
     numeric_mapping = {'a': 3, 'b': 4, 'c': 5}
     z3_numeric = [numeric_mapping[z3i] for z3i in z3]
     p_assign = np.exp(z1 + z2 +
                       z3_numeric) / (1. + np.exp(z1 + z2 + z3_numeric))
     assignment = np.random.binomial(1, p_assign)
     outcome = np.random.normal(assignment)
     matcher = PropensityScoreMatching()
     X = pd.DataFrame({
         'z1': z1,
         'z2': z2,
         'z3': z3,
         'assignment': assignment,
         'outcome': outcome
     })
     confounder_types = {'z1': 'c', 'z2': 'o', 'z3': 'o'}
     matcher.score(X, confounder_types, store_model_fit=True)
     assert 0.9 <= matcher.model_fit.params['z1'] <= 1.1
     assert 0.9 <= matcher.model_fit.params['z2'] <= 1.1
     assert 0.0 <= matcher.model_fit.params['z3_b'] <= 2.0
     assert 1.0 <= matcher.model_fit.params['z3_c'] <= 3.0
     assert 2.0 <= matcher.model_fit.params['intercept'] <= 4.0
Пример #2
0
def sxxp_womenBoard_tobin():
    '''
    Test the effect of having a large % of women on the board on tobins
    M&M:
        A large percentage of women in the board could also affect negatively the performance.
    '''
    data, types = getData("sxxp_fboard","corp_gov_causal")
    controlFor = stageAlgo(types)
    treatment = 'X..Women.on.Bd'
    target = 'Tobins.Q'

    matcher = PropensityScoreMatching()
    ATE_results = matcher.estimate_ATE(data, treatment, target, {'P.B': 'c', 'Asset':'c', 'Tax':'c', 'P.E':'c'}, bootstrap=True)

    #now write results to mysql
    conn = MySQLdb.connect(host="localhost",
                         user="******",
                         passwd="",
                         db="causal_results")
    cur = conn.cursor()
    query = """ insert into akelleh_results values ('%s','%s','%s','%s','%s','%s');  """ % (now,"sxxp",treatment,target,str(ATE_results),"A large percentage of women in the board could also affect negatively the performance.")
    cur.execute(query)
    conn.commit()
    conn.close()
    print("Done")
Пример #3
0
def spx_indepDirFinlL_azs():
    '''
    Test the effect of having a lead indep director and fincl leverage > 2.5
    M&M:
        ...but also the presence of an independent lead director in the company
        along with a financial leverage higher than 2.5 incur a higher risk of bankruptcy.
    Latest Results:
        (-0.47855609106276292, -0.4343301267327499, -0.3864914259963988)
    Comments:
        So this 'treatment' causes quite a dip in AZS, which is what MM are saying
    '''
    data, types = getData("spx_indepdirfincl","corp_gov_causal")
    controlFor = stageAlgo(types)
    treatment = 'Indep.Lead.Dir.Fincl..l'
    target = 'AZS'

    matcher = PropensityScoreMatching()
    ATE_results = matcher.estimate_ATE(data, treatment, target, {'P.EBITDA': 'c', 'P.B': 'c', 'Asset':'c', 'Tax':'c', 'P.E':'c'}, bootstrap=True)

    #now write results to mysql
    conn = MySQLdb.connect(host="localhost",
                         user="******",
                         passwd="",
                         db="causal_results")
    cur = conn.cursor()
    query = """ insert into akelleh_results values ('%s','%s','%s','%s','%s','%s');  """ % (now,"spx",treatment,target,str(ATE_results),"...but also the presence of an independent lead director in the company along with a financial leverage higher than 2.5 incur a higher risk of bankruptcy.")
    cur.execute(query)
    conn.commit()
    conn.close()
    print("Done")
Пример #4
0
def spx_fceo_tobin():
    '''
    Test the effect of having a female ceo on tobins
    M&M:
        This is my own
    Latest Results:
        (-0.48556936715099608, -0.3878325746547393, -0.29127847792553346)
    Comments:
        Non-zero influence?
    '''
    data, types = getData("spx_fceo","corp_gov_causal")
    controlFor = stageAlgo(types)
    treatment = 'Feml.CEO.or.Equiv'
    target = 'Tobins.Q'

    matcher = PropensityScoreMatching()
    ATE_results = matcher.estimate_ATE(data, treatment, target, {'P.EBITDA': 'c', 'P.B': 'c', 'Asset':'c', 'Tax':'c', 'P.E':'c'}, bootstrap=True)

    #now write results to mysql
    conn = MySQLdb.connect(host="localhost",
                         user="******",
                         passwd="",
                         db="causal_results")
    cur = conn.cursor()
    query = """ insert into akelleh_results values ('%s','%s','%s','%s','%s','%s');  """ % (now,"spx",treatment,target,str(ATE_results),"This is my own")
    cur.execute(query)
    conn.commit()
    conn.close()
    print("Done")
Пример #5
0
    def test_match(self):
        matcher = PropensityScoreMatching()
        X = pd.DataFrame({'assignment': [1, 0, 0, 0, 0, 0],
                          'propensity score': [3, 1, 2, 3, 5, 4]})

        test, control = matcher.match(X, n_neighbors=3)
        assert set(control['propensity score'].values) == set([2, 3, 4])
Пример #6
0
def eebp_finlL_tobins():
        '''
        Test the effect of financial leverage on tobins
        M&M:
            ...and that a financial leverage less than 4 is needed in order to be on the upper side of the Tobin’s Q ratio

        '''
        data, types = getData("eebp_fl","corp_gov_causal")
        controlFor = stageAlgo(types)
        treatment = 'Fincl.l.treatment'
        target = 'Tobins.Q'

        matcher = PropensityScoreMatching()
        ATE_results = matcher.estimate_ATE(data, treatment, target, {'P.B': 'c', 'Asset':'c', 'Tax':'c', 'P.E':'c'}, bootstrap=True)

        #now write results to mysql
        conn = MySQLdb.connect(host="localhost",
                             user="******",
                             passwd="",
                             db="causal_results")
        cur = conn.cursor()
        query = """ insert into akelleh_results values ('%s','%s','%s','%s','%s','%s');  """ % (now,"eebp",treatment,target,str(ATE_results),"...and that a financial leverage less than 4 is needed in order to be on the upper side of the Tobins Q ratio")
        cur.execute(query)
        conn.commit()
        conn.close()
        print("Done")
Пример #7
0
def psm(tdf, i):
    start_time = time.time()
    cause_set = [
        "bias_party", "bias_degree", "misinfo_factcheck", "misinfo_veracity"
    ]
    for cause in cause_set:
        confound_dict = {
            "meta_like": "o",
            "meta_dislike": "o",
            "meta_view": "o",
            "linguist_swear": "u",
            "linguist_laugh": "u",
            "linguist_emoji": "u",
            "linguist_fake": "u",
            "linguist_administration": "u",
            "linguist_american": "u",
            "linguist_nation": "u",
            "linguist_personal": "u"
        }
        for other_cause in cause_set:
            if other_cause != cause:
                confound_dict[other_cause] = "u"
        matcher = PropensityScoreMatching()
        samples = matcher.estimate_ATE(tdf,
                                       cause,
                                       "moderated",
                                       confound_dict,
                                       bootstrap=True)
        samples.to_csv(os.path.join(ate_path,
                                    cause + "_per" + str(i) + ".csv"),
                       index=False)
        print(time.time() - start_time)
Пример #8
0
    def test_at_estimators(self):
        N = 1000  # how many data points

        z1 = 0.5 * np.random.normal(size=N)  # a few confounding variables
        z2 = 0.5 * np.random.normal(size=N)
        z3 = 0.5 * np.random.normal(size=N)

        arg = (z1 + z2 + z3 + np.random.normal(size=N))
        p = np.exp(arg) / (
            1. + np.exp(arg)
        )  # propensity to receive treatment, P(d|z), taking on a logistic form
        d = np.random.binomial(1, p)

        y = (np.random.normal(size=N) + (z1 + z2 + z3 + 1.) * d
             )  # effect of d is confounded by z. True ATE is 1.

        X = pd.DataFrame({
            'd': d,
            'z1': z1,
            'z2': z2,
            'z3': z3,
            'y': y,
            'p': p
        })

        matcher = PropensityScoreMatching()
        ATE = matcher.estimate_ATE(X, 'd', 'y', {
            'z1': 'c',
            'z2': 'c',
            'z3': 'c'
        })
        assert 0.9 <= ATE <= 1.1
Пример #9
0
    def test_at_estimators(self):
        ates = []
        atcs = []
        atts = []
        for i in range(100):
            N = 1000
            X = np.random.choice([0.25, 0.75], size=N)
            X = pd.DataFrame(X, columns=['Z'])
            X.loc[:, 'assignment'] = np.random.binomial(1, p=X['Z'])
            X.loc[:, 'outcome'] = np.random.normal(3.1 * X['assignment'] +
                                                   2.0 * X['Z'])

            matcher = PropensityScoreMatching()
            att = matcher.estimate_ATT(X,
                                       'assignment',
                                       'outcome', {'Z': 'c'},
                                       n_neighbors=10)
            X.loc[:, 'inverted assignment'] = (X['assignment'] + 1) % 2
            atc = matcher.estimate_ATT(X,
                                       'inverted assignment',
                                       'outcome', {'Z': 'c'},
                                       n_neighbors=10)

            ate = (att + atc) / 2.
            atts.append(att)
            atcs.append(atc)
            ates.append(ate)
        X = pd.DataFrame({'att': atts, 'ate': ates, 'atc': atcs})
        assert (3.0 <= X.mean()).all()
        assert (X.mean() <= 4.0).all()
Пример #10
0
def eebp_ageRange_tobins():
    '''
    Test the effect of age range in board on tobins
    M&M:
        we found that a smaller age range for the board members is positively related with the companies’ performance
    '''
    data, types = getData("eebp_agerange","corp_gov_causal")
    controlFor = stageAlgo(types)
    treatment = 'BOD.Age.Rng'
    target = 'Tobins.Q'

    matcher = PropensityScoreMatching()
    ATE_results = matcher.estimate_ATE(data, treatment, target, {'P.B': 'c', 'Asset':'c', 'Tax':'c', 'P.E':'c'}, bootstrap=True)

    #now write results to mysql
    conn = MySQLdb.connect(host="localhost",
                         user="******",
                         passwd="",
                         db="causal_results")
    cur = conn.cursor()
    query = """ insert into akelleh_results values ('%s','%s','%s','%s','%s','%s');  """ % (now,"eebp",treatment,target,str(ATE_results),"we found that a smaller age range for the board members is positively related with the companies performance")
    cur.execute(query)
    conn.commit()
    conn.close()
    print("Done")
Пример #11
0
def sxxp_indepDirFormerCEOBoard_tobin():
    '''
    Test the effect of having a lead indep director or former ceo on board on tobins Q
    M&M:
        the presence of an independent lead director or a former CEO in the board could be a sign of weaker performances, being negatively correlated with Tobin’s Q
    Latest Results:
        (0.012699075182737657, 0.05961530907139483, 0.099317124249211436)
    Comments:
        Nothing much, but is positive contrary to what MM say

    '''
    data, types = getData("sxxp_indepdirfceo","corp_gov_causal")
    controlFor = stageAlgo(types)
    treatment = 'Indep.Lead.Dir.Feml.CEO.or.Equiv'
    target = 'Tobins.Q'

    matcher = PropensityScoreMatching()
    ATE_results = matcher.estimate_ATE(data, treatment, target, {'P.B': 'c', 'Asset':'c', 'Tax':'c', 'P.E':'c'}, bootstrap=True)

    #now write results to mysql
    conn = MySQLdb.connect(host="localhost",
                         user="******",
                         passwd="",
                         db="causal_results")
    cur = conn.cursor()
    query = """ insert into akelleh_results values ('%s','%s','%s','%s','%s','%s');  """ % (now,"sxxp",treatment,target,str(ATE_results),"the presence of an independent lead director or a former CEO in the board could be a sign of weaker performances, being negatively correlated with Tobins Q")
    cur.execute(query)
    conn.commit()
    conn.close()
    print("Done")
Пример #12
0
    def test_match(self):
        matcher = PropensityScoreMatching()
        X = pd.DataFrame({
            'assignment': [1, 0, 0, 0, 0, 0],
            'propensity score': [3, 1, 2, 3, 5, 4]
        })

        test, control = matcher.match(X, n_neighbors=3)
        assert set(control['propensity score'].values) == set([2, 3, 4])
Пример #13
0
    def test_match(self):
        matcher = PropensityScoreMatching()
        X = pd.DataFrame(
            {"assignment": [1, 1, 1, 1, 1, 0, 0, 0, 0, 0], "propensity score": [1, 2, 3, 4, 5, 1, 2, 3, 4, 5]}
        )

        test, control = matcher.match(X, n_neighbors=3)

        matches = test[test["propensity score"] == 2]["matches"].values[0][0]
        assert set(control.iloc[matches]["propensity score"].values) == set([1, 2, 3])

        matches = test[test["propensity score"] == 4]["matches"].values[0][0]
        assert set(control.iloc[matches]["propensity score"].values) == set([3, 4, 5])
Пример #14
0
    def test_match(self):
        matcher = PropensityScoreMatching()
        X = pd.DataFrame({
            'assignment': [1, 1, 1, 1, 1, 0, 0, 0, 0, 0],
            'propensity score': [1, 2, 3, 4, 5, 1, 2, 3, 4, 5]
        })

        test, control = matcher.match(X, n_neighbors=3)

        matches = test[test['propensity score'] == 2]['matches'].values[0][0]
        assert set(control.iloc[matches]['propensity score'].values) == set(
            [1, 2, 3])

        matches = test[test['propensity score'] == 4]['matches'].values[0][0]
        assert set(control.iloc[matches]['propensity score'].values) == set(
            [3, 4, 5])
Пример #15
0
 def test_score(self):
     N = 5000
     z1 = np.random.normal(size=N)
     z2 = np.random.choice(['a','b','c'], size=N)
     numeric_mapping = {'a' :3, 'b' :4, 'c' :5}
     z2_numeric = [numeric_mapping[z2i] for z2i in z2]
     p_assign = np.exp(z1 + z2_numeric) / (1. + np.exp(z1 + z2_numeric))
     assignment = np.random.binomial(1, p_assign)
     outcome = np.random.normal(assignment)
     matcher = PropensityScoreMatching()
     X = pd.DataFrame({'z1': z1, 'z2': z2, 'assignment': assignment, 'outcome': outcome})
     confounder_types = {'z1': 'c', 'z2':'o'}
     matcher.score(X, confounder_types, store_model_fit=True)
     assert 0.7 <= matcher.propensity_score_model.params['z1'] <= 1.3
     assert 0.0 <= matcher.propensity_score_model.params['z2_b'] <= 2.0
     assert 1.0 <= matcher.propensity_score_model.params['z2_c'] <= 3.0
     assert 2.0 <= matcher.propensity_score_model.params['intercept'] <= 4.0
Пример #16
0
def eebp_indepChaFCEO_azs():
        '''
        Test the effect of financial leverage on tobins
        M&M:
            ...to be on the “safe” zone of the Altman Z-score it is important to have an independent chairperson or even a woman as CEO.
        '''
        data, types = getData("eebp_indepChFmlCEO","corp_gov_causal")
        #controlFor = stageAlgo(types)
        controlFor = {
            'P.B': 'c',
            'Fincl..l':'c',
            'Asset':'c',
            'Tax':'c',
            'P.E':'c',
            'OPM.T12M':'c',
            'P.EBITDA':'c',
            'EV.EBITDA.T12M':'c',
            'ROC':'c',
            'ROE':'c',
            'BOD.Age.Rng':'c',
            'Norm.NI.to.NI.for.Cmn..':'c',
            'Cash.Gen.Cash.Reqd':'c',
            'Bd.Avg.Age':'c',
            'X5Yr.Avg.Adj.ROE':'c',
            #'Dvd.Yld':'c',
            'EBITDA.Sh':'c',
            'Net.Debt.to.EBITDA':'c'
        }
        treatment = 'Indep.Chrprsn.Feml.CEO.or.Equiv'
        target = 'AZS.class.Binary'

        matcher = PropensityScoreMatching()
        ATE_results = matcher.estimate_ATE(data, treatment, target, controlFor, bootstrap=True)

        #now write results to mysql
        conn = MySQLdb.connect(host="localhost",
                             user="******",
                             passwd="",
                             db="causal_results")
        cur = conn.cursor()
        query = """ insert into akelleh_results values ('%s','%s','%s','%s','%s','%s');  """ % (now,"eebp",treatment,target,str(ATE_results),"...to be on the safe zone of the Altman Z-score it is important to have an independent chairperson or even a woman as CEO.")
        cur.execute(query)
        conn.commit()
        conn.close()
        print("Done")
Пример #17
0
    def test_at_estimators(self):
        N = 1000  # how many data points

        z1 = 0.5 * np.random.normal(size=N)  # a few confounding variables
        z2 = 0.5 * np.random.normal(size=N)
        z3 = 0.5 * np.random.normal(size=N)

        arg = (z1 + z2 + z3 + np.random.normal(size=N))
        p = np.exp(arg) / (1. + np.exp(arg))  # propensity to receive treatment, P(d|z), taking on a logistic form
        d = np.random.binomial(1, p)

        y = (np.random.normal(size=N) + (z1 + z2 + z3 + 1.) * d)  # effect of d is confounded by z. True ATE is 1.

        X = pd.DataFrame({'d': d, 'z1': z1, 'z2': z2, 'z3': z3, 'y': y, 'p': p})

        matcher = PropensityScoreMatching()
        ATE = matcher.estimate_ATE(X, 'd', 'y', {'z1': 'c', 'z2': 'c', 'z3': 'c'})
        assert 0.9 <= ATE <= 1.1
Пример #18
0
 def test_score(self):
     N = 100000
     z1 = np.random.normal(size=N)
     z2 = np.random.choice([0, 1], size=N)
     z3 = np.random.choice(["a", "b", "c"], size=N)
     numeric_mapping = {"a": 3, "b": 4, "c": 5}
     z3_numeric = [numeric_mapping[z3i] for z3i in z3]
     p_assign = np.exp(z1 + z2 + z3_numeric) / (1.0 + np.exp(z1 + z2 + z3_numeric))
     assignment = np.random.binomial(1, p_assign)
     outcome = np.random.normal(assignment)
     matcher = PropensityScoreMatching()
     X = pd.DataFrame({"z1": z1, "z2": z2, "z3": z3, "assignment": assignment, "outcome": outcome})
     confounder_types = {"z1": "c", "z2": "o", "z3": "o"}
     matcher.score(X, confounder_types, store_model_fit=True)
     assert 0.9 <= matcher.model_fit.params["z1"] <= 1.1
     assert 0.9 <= matcher.model_fit.params["z2"] <= 1.1
     assert 0.0 <= matcher.model_fit.params["z3_b"] <= 2.0
     assert 1.0 <= matcher.model_fit.params["z3_c"] <= 3.0
     assert 2.0 <= matcher.model_fit.params["intercept"] <= 4.0
Пример #19
0
def spx_wmOnBoard_tobin():
    '''
    Test the effect of having women on the board of directors on the tobins Q score
    M&M:
        For the American companies inside the S&P 500 index,
        we found a positive correlation between the percentage higher
        than 20 % of women in the board and the Tobin’s Q ratio
    Latest Results:
        (-0.094388682158789469, -0.04962212239013655, -0.0068850052276448973)
    Comments:
        So no real causal influence here, wrong sign
    '''
    data, types = getData("spx_fboard","corp_gov_causal")
    controlFor = stageAlgo(types)
    treatment = 'X..Women.on.Bd'
    target = 'Tobins.Q'

    matcher = PropensityScoreMatching()
    ATE_results = matcher.estimate_ATE(data, treatment, target, {'P.EBITDA': 'c', 'P.B': 'c', 'Asset':'c', 'Tax':'c', 'P.E':'c'}, bootstrap=True)

    matcher.check_support(data, 'X..Women.on.Bd', {'P.EBITDA': 'c', 'P.B': 'c','Asset':'c', 'Tax':'c', 'P.E':'c'})

    print("")
    print("Balance before matching")
    print(matcher.assess_balance(data, 'X..Women.on.Bd', {'P.EBITDA': 'c', 'P.B': 'c','Asset':'c', 'Tax':'c', 'P.E':'c', 'propensity score': 'c'}))
    print ("")

    data = matcher.score(data, assignment='X..Women.on.Bd', confounder_types={'P.EBITDA': 'c', 'P.B': 'c','Asset':'c', 'Tax':'c', 'P.E':'c'})
    treated, control = matcher.match(data, assignment='X..Women.on.Bd')
    print("")
    print("Balance after matching")
    print(matcher.assess_balance(treated.append(control), 'X..Women.on.Bd', {'P.EBITDA': 'c', 'P.B': 'c','Asset':'c', 'Tax':'c', 'P.E':'c', 'propensity score': 'c'}))
    print ("")

    #now write results to mysql
    conn = MySQLdb.connect(host="localhost",
                         user="******",
                         passwd="",
                         db="causal_results")
    cur = conn.cursor()
    query = """ insert into akelleh_results values ('%s','%s','%s','%s','%s','%s');  """ % (now,"spx",treatment,target,str(ATE_results),"For the American companies inside the S and P 500 index, we found a positive correlation between the percentage higher than 20pct of women in the board and the Tobins Q ratio")
    cur.execute(query)
    conn.commit()
    conn.close()
    print("Done")
Пример #20
0
    def test_at_estimators(self):
        ates = []
        atcs = []
        atts = []
        for i in range(100):
            N = 1000
            X = np.random.choice([0.25, 0.75], size=N)
            X = pd.DataFrame(X, columns=["Z"])
            X.loc[:, "assignment"] = np.random.binomial(1, p=X["Z"])
            X.loc[:, "outcome"] = np.random.normal(3.1 * X["assignment"] + 2.0 * X["Z"])

            matcher = PropensityScoreMatching()
            att = matcher.estimate_ATT(X, "assignment", "outcome", {"Z": "c"}, n_neighbors=10)
            X.loc[:, "inverted assignment"] = (X["assignment"] + 1) % 2
            atc = matcher.estimate_ATT(X, "inverted assignment", "outcome", {"Z": "c"}, n_neighbors=10)

            ate = (att + atc) / 2.0
            atts.append(att)
            atcs.append(atc)
            ates.append(ate)
        X = pd.DataFrame({"att": atts, "ate": ates, "atc": atcs})
        assert (3.0 <= X.mean()).all()
        assert (X.mean() <= 4.0).all()
    df = pd.read_csv(comments_path)

    cause_set = [
        "bias_party", "bias_degree", "misinfo_factcheck", "misinfo_veracity"
    ]
    for cause in cause_set:
        confound_dict = {
            "meta_like": "o",
            "meta_dislike": "o",
            "meta_view": "o",
            "linguist_swear": "u",
            "linguist_laugh": "u",
            "linguist_emoji": "u",
            "linguist_fake": "u",
            "linguist_administration": "u",
            "linguist_american": "u",
            "linguist_nation": "u",
            "linguist_personal": "u"
        }
        for other_cause in cause_set:
            if other_cause != cause:
                confound_dict[other_cause] = "u"
        matcher = PropensityScoreMatching()
        samples = matcher.estimate_ATE(df,
                                       cause,
                                       "moderated",
                                       confound_dict,
                                       bootstrap=True)
        samples.to_csv(os.path.join(ate_path, cause + ".csv"), index=False)
        print(cause, confound_dict)
Пример #22
0
import pandas as pd
import numpy as np
from causality.estimation.parametric import PropensityScoreMatching

N = 10000
z1 = np.random.normal(size=N)
z2 = np.random.normal(size=N)
z3 = np.random.normal(size=N)

p_d = 1. / (1. + np.exp(-(z1 + z2 + z3)/4.))
d = np.random.binomial(1, p=p_d)

y0 = np.random.normal()
y1 = y0 + z1 + z2 + z3

y = (d==1)*y1 + (d==0)*y0
X = pd.DataFrame({'d': d, 'z1': z1, 'z2': z2, 'z3': z3, 'y': y, 'y0': y0, 'y1': y1, 'p': p_d})

print(X.head(10))

matcher = PropensityScoreMatching()
print(matcher.estimate_ATE(X, 'd', 'y', {'z1': 'c', 'z2': 'c', 'z3': 'c'}))

matcher.check_support(X, 'd', {'z1': 'c', 'z2': 'c', 'z3': 'c'})

print(matcher.assess_balance(X, 'd', {'z1': 'c', 'z2': 'c', 'z3': 'c'}))