Пример #1
0
def test_concat_sort(data):
    a = concat([data.df1, data.df2], 1)
    b = concat([data.df1, data.df2, data.s], 1)
    c = concat([data.df1, data.df2, data.s], 1, sort=True)
    d = concat([data.df2, data.df1, data.s], 1, sort=False)
    assert list(a.columns) == ['A', 'B', 'C']
    assert list(b.columns) == ['A', 'B', 'C', 'D']
    assert list(c.columns) == ['A', 'B', 'C', 'D']
    assert list(d.columns) == ['B', 'C', 'A', 'D']
Пример #2
0
def test_concat_sort(data):
    a = concat([data.df1, data.df2], 1)
    b = concat([data.df1, data.df2, data.s], 1)
    c = concat([data.df1, data.df2, data.s], 1, sort=True)
    d = concat([data.df2, data.df1, data.s], 1, sort=False)
    assert list(a.columns) == ["A", "B", "C"]
    assert list(b.columns) == ["A", "B", "C", "D"]
    assert list(c.columns) == ["A", "B", "C", "D"]
    assert list(d.columns) == ["B", "C", "A", "D"]
def fit(request):
    method = request.param
    data = generate_simultaneous_data()
    if "ols" in method or "sur" in method:
        mod = SUR
        for key in data:
            temp = data[key]
            temp["exog"] = concat([temp["exog"], temp["endog"]], 1)
            del temp["endog"]
            del temp["instruments"]
    else:
        mod = IV3SLS
    if "ols" in method or "2sls" in method:
        fit_method = "ols"
    else:
        fit_method = "gls"
    mod = mod(data)
    iterate = "ireg3" in method
    stata = results[method]
    debiased = method in ("ols", "2sls")
    kwargs = {}
    decimal = 3 if "ireg3" in method else 5
    rtol = 10**-decimal
    res = mod.fit(
        cov_type="unadjusted",
        method=fit_method,
        debiased=debiased,
        iterate=iterate,
        **kwargs,
    )
    return stata, res, rtol
Пример #4
0
 def _get_series_property(self, name: str) -> DataFrame:
     out: List[Tuple[str, Series]] = [(k, getattr(v, name))
                                      for k, v in self._results.items()]
     cols = [v[0] for v in out]
     values = concat([v[1] for v in out], axis=1)
     values.columns = cols
     return values
Пример #5
0
def fit(request):
    method = request.param
    data = generate_simultaneous_data()
    if 'ols' in method or 'sur' in method:
        mod = SUR
        for key in data:
            temp = data[key]
            temp['exog'] = concat([temp['exog'], temp['endog']], 1)
            del temp['endog']
            del temp['instruments']
    else:
        mod = IV3SLS
    if 'ols' in method or '2sls' in method:
        fit_method = 'ols'
    else:
        fit_method = 'gls'
    mod = mod(data)
    iterate = 'ireg3' in method
    stata = results[method]
    debiased = method in ('ols', '2sls')
    kwargs = {}
    decimal = 2 if 'ireg3' in method else 5
    rtol = 10**-decimal
    res = mod.fit(cov_type='unadjusted',
                  method=fit_method,
                  debiased=debiased,
                  iterate=iterate,
                  **kwargs)
    return stata, res, rtol
Пример #6
0
def data():
    premia = np.array([.1, .1, .1])
    out = generate_data(nportfolio=10,
                        output='pandas',
                        alpha=True,
                        premia=premia)
    out['joined'] = concat([out.factors, out.portfolios], 1)
    return out
Пример #7
0
def test_formula_equivalence_weights(data):
    weights = AttrDict()
    eqn_copy = AttrDict()
    for key in data.eqns:
        eqn = {k: v for k, v in data.eqns[key].items()}
        nobs = eqn["dependent"].shape[0]
        w = np.random.chisquare(2, (nobs, 1)) / 2
        weights[key] = w
        eqn["weights"] = w
        eqn_copy[key] = eqn

    mod = IVSystemGMM(eqn_copy, weight_type="unadjusted")
    df = []
    formulas = {}
    for i, key in enumerate(data.eqns):
        eqn = data.eqns[key]
        dep = eqn.dependent
        ex = eqn.exog
        en = eqn.endog
        instr = eqn.instruments
        dep = DataFrame(dep, columns=["dep_{0}".format(i)])
        has_const = False
        if np.any(np.all(ex == 1, 0)):
            ex = ex[:, 1:]
            has_const = True
        ex = DataFrame(
            ex,
            columns=["ex_{0}_{1}".format(i, j) for j in range(ex.shape[1])])
        en = DataFrame(
            en,
            columns=["en_{0}_{1}".format(i, j) for j in range(en.shape[1])])
        instr = DataFrame(
            instr,
            columns=["instr_{0}_{1}".format(i, j) for j in range(ex.shape[1])])
        fmla = "".join(dep.columns) + " ~  "
        if has_const:
            fmla += " 1 + "
        fmla += " + ".join(ex.columns) + " + ["
        fmla += " + ".join(en.columns) + " ~ "
        fmla += " + ".join(instr.columns) + " ] "
        formulas[key] = fmla
        df.extend([dep, ex, en, instr])
    df = concat(df, 1)
    formula_mod = IVSystemGMM.from_formula(formulas,
                                           df,
                                           weights=weights,
                                           weight_type="unadjusted")
    res = mod.fit(cov_type="unadjusted")
    formula_res = formula_mod.fit(cov_type="unadjusted")
    assert_allclose(res.params, formula_res.params)
Пример #8
0
def test_formula_equivalence(data):
    mod = IVSystemGMM(data.eqns, weight_type="unadjusted")
    formula = []
    df = []
    for i, key in enumerate(data.eqns):
        eqn = data.eqns[key]
        dep = eqn.dependent
        ex = eqn.exog
        en = eqn.endog
        instr = eqn.instruments
        dep = DataFrame(dep, columns=["dep_{0}".format(i)])
        has_const = False
        if np.any(np.all(ex == 1, 0)):
            ex = ex[:, 1:]
            has_const = True
        ex = DataFrame(
            ex,
            columns=["ex_{0}_{1}".format(i, j) for j in range(ex.shape[1])])
        en = DataFrame(
            en,
            columns=["en_{0}_{1}".format(i, j) for j in range(en.shape[1])])
        instr = DataFrame(
            instr,
            columns=["instr_{0}_{1}".format(i, j) for j in range(ex.shape[1])])
        fmla = "".join(dep.columns) + " ~  "
        if has_const:
            fmla += " 1 + "
        fmla += " + ".join(ex.columns) + " + ["
        fmla += " + ".join(en.columns) + " ~ "
        fmla += " + ".join(instr.columns) + " ] "
        formula.append(fmla)
        df.extend([dep, ex, en, instr])

    formulas = {}
    for i, f in enumerate(formula):
        formulas["eq{0}".format(i)] = f
    df = concat(df, 1)
    formula_mod = IVSystemGMM.from_formula(formulas,
                                           df,
                                           weight_type="unadjusted")
    res = mod.fit(cov_type="unadjusted")
    formula_res = formula_mod.fit(cov_type="unadjusted")
    assert_allclose(res.params, formula_res.params)
Пример #9
0
def test_predict_formula_function(data, model_and_func):
    model, func = model_and_func
    fmla = 'y ~ 1 + sigmoid(x3) + x4 + [x1 + x2 ~ z1 + z2 + z3] + np.exp(x5)'
    mod = model.from_formula(fmla, data)
    res = mod.fit()

    exog = [data[['Intercept']], sigmoid(data[['x3']]), data[['x4']],
            np.exp(data[['x5']])]
    exog = concat(exog, 1)
    endog = data[['x1', 'x2']]
    pred = res.predict(exog, endog)
    pred2 = res.predict(data=data)
    assert_frame_equal(pred, pred2)
    assert_allclose(res.fitted_values, pred)

    res2 = func(fmla, data).fit()
    pred3 = res2.predict(exog, endog)
    pred4 = res2.predict(data=data)
    assert_frame_equal(pred, pred3)
    assert_frame_equal(pred, pred4)
Пример #10
0
def test_formula_function(data, model_and_func):
    model, func = model_and_func
    fmla = 'y ~ 1 + sigmoid(x3) + x4 + [x1 + x2 ~ z1 + z2 + z3] + np.exp(x5)'
    mod = model.from_formula(fmla, data)
    res = mod.fit()

    dep = data.y
    exog = [data[['Intercept']], sigmoid(data[['x3']]), data[['x4']],
            np.exp(data[['x5']])]
    exog = concat(exog, 1)
    endog = data[['x1', 'x2']]
    instr = data[['z1', 'z2', 'z3']]
    mod = model(dep, exog, endog, instr)
    res2 = mod.fit()
    assert_equal(res.params.values, res2.params.values)
    res3 = func(fmla, data).fit()
    assert_equal(res.params.values, res3.params.values)

    with pytest.raises(ValueError):
        res2.predict(data=data)
Пример #11
0
def test_formula_function(data, model_and_func):
    model, func = model_and_func
    fmla = "y ~ 1 + sigmoid(x3) + x4 + [x1 + x2 ~ z1 + z2 + z3] + np.exp(x5)"
    mod = model.from_formula(fmla, data)
    res = mod.fit()

    dep = data.y
    exog = [
        data[["Intercept"]],
        sigmoid(data[["x3"]]),
        data[["x4"]],
        np.exp(data[["x5"]]),
    ]
    exog = concat(exog, 1)
    endog = data[["x1", "x2"]]
    instr = data[["z1", "z2", "z3"]]
    mod = model(dep, exog, endog, instr)
    res2 = mod.fit()
    assert_equal(res.params.values, res2.params.values)
    res3 = func(fmla, data).fit()
    assert_equal(res.params.values, res3.params.values)

    with pytest.raises(ValueError):
        res2.predict(data=data)
Пример #12
0
def test_formula_equivalence(data):
    mod = IVSystemGMM(data.eqns, weight_type='unadjusted')
    formula = []
    df = []
    for i, key in enumerate(data.eqns):
        eqn = data.eqns[key]
        dep = eqn.dependent
        ex = eqn.exog
        en = eqn.endog
        instr = eqn.instruments
        dep = DataFrame(dep, columns=['dep_{0}'.format(i)])
        has_const = False
        if np.any(np.all(ex == 1, 0)):
            ex = ex[:, 1:]
            has_const = True
        ex = DataFrame(ex, columns=['ex_{0}_{1}'.format(i, j) for j in range(ex.shape[1])])
        en = DataFrame(en, columns=['en_{0}_{1}'.format(i, j) for j in range(en.shape[1])])
        instr = DataFrame(instr, columns=['instr_{0}_{1}'.format(i, j)
                                          for j in range(ex.shape[1])])
        fmla = ''.join(dep.columns) + ' ~  '
        if has_const:
            fmla += ' 1 + '
        fmla += ' + '.join(ex.columns) + ' + ['
        fmla += ' + '.join(en.columns) + ' ~ '
        fmla += ' + '.join(instr.columns) + ' ] '
        formula.append(fmla)
        df.extend([dep, ex, en, instr])
    from collections import OrderedDict
    formulas = OrderedDict()
    for i, f in enumerate(formula):
        formulas['eq{0}'.format(i)] = f
    df = concat(df, 1)
    formula_mod = IVSystemGMM.from_formula(formulas, df, weight_type='unadjusted')
    res = mod.fit(cov_type='unadjusted')
    formula_res = formula_mod.fit(cov_type='unadjusted')
    assert_allclose(res.params, formula_res.params)
Пример #13
0
w = w / w.mean()

items = ['x' + str(i) for i in range(1, k + 1)]
items = ['intercept'] + items
major = pd.date_range('12-31-1999', periods=t, freq='A-DEC')
minor = ['firm.' + str(i) for i in range(1, n + 1)]

x = panel_to_frame(x, items, major, minor, swap=True)
y = panel_to_frame(y[None, :], ['y'], major, minor, swap=True)
w = panel_to_frame(w[None, :], ['w'], major, minor, swap=True)

x = PanelData(x)
y = PanelData(y)
w = PanelData(w)

z = concat([x.dataframe, y.dataframe, w.dataframe], 1)
final_index = pd.MultiIndex.from_product([minor, major])
final_index.levels[0].name = 'firm'
z = z.reindex(final_index)
z.index.levels[0].name = 'firm'
z.index.levels[1].name = 'time'

z = z.reset_index()
z['firm_id'] = z.firm.astype('category')
z['firm_id'] = z.firm_id.cat.codes

vars = ['y', 'x1', 'x2', 'x3', 'x4', 'x5']
missing = 0.05
for v in vars:
    locs = np.random.choice(n * t, int(n * t * missing))
    temp = z[v].copy()
Пример #14
0
 def _get_series_property(self, name):
     out = ([(k, getattr(v, name)) for k, v in self._results.items()])
     cols = [v[0] for v in out]
     values = concat([v[1] for v in out], 1)
     values.columns = cols
     return values
Пример #15
0
    if np.any(locs):
        dep.flat[locs] = np.nan
    exog = missing_data[key]['exog']
    locs = np.where(np.random.random_sample(np.prod(exog.shape)) < 0.02)[0]
    if np.any(locs):
        exog.flat[locs] = np.nan

out = []
for i, dataset in enumerate((basic_data, common_data, missing_data)):
    base = 'mod_{0}'.format(i)
    for j, key in enumerate(dataset):
        dep = dataset[key]['dependent']
        dep = pd.DataFrame(dep, columns=[base + '_y_{0}'.format(j)])
        dataset[key]['dependent'] = dep
        exog = dataset[key]['exog'][:, 1:]
        exog_cols = [
            base + '_x_{0}{1}'.format(j, k) for k in range(exog.shape[1])
        ]
        exog = pd.DataFrame(exog, columns=exog_cols)
        exog = exog.copy()
        exog['cons'] = 1.0
        dataset[key]['exog'] = exog
        if i != 1 or j == 0:
            out.extend([dep, exog])
        else:
            out.extend([dep])

if __name__ == '__main__':
    df = concat(out, 1)
    df.to_stata('simulated-sur.dta')
Пример #16
0
def generate_panel_data(
    nentity: int = 971,
    ntime: int = 7,
    nexog: int = 5,
    const: bool = False,
    missing: float = 0,
    other_effects: int = 2,
    ncats: Union[int, List[int]] = 4,
    rng: Optional[np.random.RandomState] = None,
) -> PanelModelData:
    """

    Parameters
    ----------
    nentity : int, default 971
        The number of entities in the panel.
    ntime : int, default 7
        The number of time periods in the panel.
    nexog : int, default 5
        The number of explanatory variables in the dataset.
    const : bool, default False
        Flag indicating that the model should include a constant.
    missing : float, default 0
        The percentage of values that are missing. Should be between 0 and 100.
    other_effects : int, default 2
        The number of other effects generated.
    ncats : Union[int, Sequence[int]], default 4
        The number of categories to use in other_effects and variance
        clusters. If list-like, then it must have as many elements
        as other_effects.
    rng : RandomState, default None
        A NumPy RandomState instance. If not provided, one is initialized
        using a fixed seed.

    Returns
    -------
    PanelModelData
        A namedtuple derived class containing 4 DataFrames:

        * `data` - A simulated data with variables y and x# for # in 0,...,4.
          If const is True, then also contains a column named const.
        * `weights` - Simulated non-negative weights.
        * `other_effects` - Simulated effects.
        * `clusters` - Simulated data to use in clustered covariance estimation.
    """
    if rng is None:
        rng = np.random.RandomState(
            [
                0xA14E2429,
                0x448D2E51,
                0x91B558E7,
                0x6A3F5CD2,
                0x22B43ABB,
                0xE746C92D,
                0xCE691A7D,
                0x66746EE7,
            ]
        )

    n, t, k = nentity, ntime, nexog
    k += int(const)
    x = rng.standard_normal((k, t, n))
    beta = np.arange(1, k + 1)[:, None, None] / k
    y = (
        (x * beta).sum(0)
        + rng.standard_normal((t, n))
        + 2 * rng.standard_normal((1, n))
    )
    w = rng.chisquare(5, (t, n)) / 5
    c = None
    cats = [f"cat.{i}" for i in range(other_effects)]
    if other_effects:
        if not isinstance(ncats, list):
            ncats = [ncats] * other_effects
        c = []
        for i in range(other_effects):
            nc = ncats[i]
            c.append(rng.randint(0, nc, (1, t, n)))
        c = np.concatenate(c, 0)

    vcats = [f"varcat.{i}" for i in range(2)]
    vc2 = np.ones((2, t, 1)) @ rng.randint(0, n // 2, (2, 1, n))
    vc1 = vc2[[0]]

    if const:
        x[0] = 1.0

    if missing > 0:
        locs = rng.choice(n * t, int(n * t * missing))
        y.flat[locs] = np.nan
        locs = rng.choice(n * t * k, int(n * t * k * missing))
        x.flat[locs] = np.nan

    entities = [f"firm{i}" for i in range(n)]
    time = date_range("1-1-1900", periods=t, freq="A-DEC")
    var_names = [f"x{i}" for i in range(k)]
    if const:
        var_names[1:] = var_names[:-1]
        var_names[0] = "const"
    # y = DataFrame(y, index=time, columns=entities)
    y_df = panel_to_frame(
        y[None], items=["y"], major_axis=time, minor_axis=entities, swap=True
    )
    index = y_df.index
    w_df = panel_to_frame(
        w[None], items=["w"], major_axis=time, minor_axis=entities, swap=True
    )
    w_df = w_df.reindex(index)
    x_df = panel_to_frame(
        x, items=var_names, major_axis=time, minor_axis=entities, swap=True
    )
    x_df = x_df.reindex(index)
    c_df = panel_to_frame(
        c, items=cats, major_axis=time, minor_axis=entities, swap=True
    )
    other_eff = c_df.reindex(index)
    vc1_df = panel_to_frame(
        vc1, items=vcats[:1], major_axis=time, minor_axis=entities, swap=True
    )
    vc1_df = vc1_df.reindex(index)
    vc2_df = panel_to_frame(
        vc2, items=vcats, major_axis=time, minor_axis=entities, swap=True
    )
    vc2_df = vc2_df.reindex(index)
    clusters = concat([vc1_df, vc2_df])
    data = concat([y_df, x_df], axis=1)
    return PanelModelData(data, w_df, other_eff, clusters)
Пример #17
0
def expand_categoricals(x, drop_first):
    if x.shape[1] == 0:
        return x
    return concat([convert_columns(x[c], drop_first) for c in x.columns],
                  axis=1)
Пример #18
0
from linearmodels.tests.system._utility import generate_simultaneous_data

data = generate_simultaneous_data()
all_cols = []
out = []
for key in data:
    eqn = data[key]
    for key in ('exog', 'endog'):
        vals = eqn[key]
        for col in vals:
            if col in all_cols:
                continue
            else:
                out.append(vals[col])
                all_cols.append(col)
out = concat(out, 1)
if 'const' in out:
    out.pop('const')
out.to_stata('simulated-3sls.dta', write_index=False)
SEP = """

file open myfile using {outfile}, write append
file write myfile  "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! {method} !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" _n
file close myfile

"""

# add , 2sls to get non GLS estimator
CMD = """
reg3 (dependent_0 dependent_1 dependent_2 exog_1 exog_2 exog_3 exog_4 exog_5) ///
     (dependent_1 dependent_0 dependent_2 exog_1 exog_2 exog_3 exog_6 exog_7) ///
Пример #19
0
def data():
    premia = np.array([0.1, 0.1, 0.1])
    out = generate_data(nportfolio=10, output="pandas", alpha=True, premia=premia)
    out["joined"] = concat([out.factors, out.portfolios], 1)
    return out
Пример #20
0
def expand_categoricals(x: AnyPandas, drop_first: bool) -> AnyPandas:
    if x.shape[1] == 0:
        return x
    return concat([convert_columns(x[c], drop_first) for c in x.columns],
                  axis=1)
Пример #21
0
def expand_categoricals(x: DataFrame, drop_first: bool) -> DataFrame:
    return concat([convert_columns(x[c], drop_first) for c in x.columns],
                  axis=1)