示例#1
0
def test_labels(panel):
    dh = PanelData(panel)
    assert dh.vars == list(panel.items)
    assert dh.time == list(panel.major_axis)
    assert dh.entities == list(panel.minor_axis)
示例#2
0
def test_labels(mi_df):
    dh = PanelData(mi_df)
    assert dh.vars == list(mi_df.columns)
    assert dh.time == list(mi_df.index.levels[1])
    assert dh.entities == list(mi_df.index.get_level_values(0).unique())
示例#3
0
a = x.T @ p @ x
b = (x.T @ z) @ (x.T @ z).T
a
b
np.linalg.inv(a) @ b
np.trace(np.linalg.inv(a) @ b)
30
30

data = generate_data(0,
                     'pandas',
                     ntk=(101, 3, 5),
                     other_effects=1,
                     const=False)

y = PanelData(data.y)
x = PanelData(data.x)
w = PanelData(data.w)

x.dataframe.iloc[:, 0] = 1
mod = PanelOLS(data.y, data.x, weights=data.w)
mod.fit()
mod = PanelOLS(y, x, weights=data.w, entity_effect=True)
mod.fit()
mod = PanelOLS(data.y, data.x, weights=data.w, time_effect=True)
mod.fit()
mod = PanelOLS(data.y,
               data.x,
               weights=data.w,
               time_effect=True,
               entity_effect=True)
示例#4
0
def test_first_difference(data):
    x = PanelData(data.x)
    x.first_difference()
示例#5
0
def test_numpy_1d():
    n = 11
    x = np.random.random(n)
    with pytest.raises(ValueError):
        PanelData(x)
示例#6
0
def test_demean_invalid(panel):
    data = PanelData(panel)
    with pytest.raises(ValueError):
        data.demean('unknown')
示例#7
0
def test_series_multiindex(panel):
    mi = panel.swapaxes(1, 2).to_frame(filter_observations=False)
    from_df = PanelData(mi.iloc[:, [0]])
    from_series = PanelData(mi.iloc[:, 0])
    assert_frame_equal(from_df.dataframe, from_series.dataframe)
示例#8
0
def test_series_multiindex(mi_df):
    from_df = PanelData(mi_df.iloc[:, [0]])
    from_series = PanelData(mi_df.iloc[:, 0])
    assert_frame_equal(from_df.dataframe, from_series.dataframe)
示例#9
0
def test_invalid_seires(mi_df):
    si = mi_df.reset_index()
    with pytest.raises(ValueError):
        PanelData(si.iloc[:, 0])
示例#10
0
def generate_data(missing, datatype, const=False, ntk=(971, 7, 5), other_effects=0, rng=None):
    if rng is None:
        np.random.seed(12345)
    else:
        np.random.set_state(rng.get_state())

    n, t, k = ntk
    k += const
    x = standard_normal((k, t, n))
    beta = np.arange(1, k + 1)[:, None, None] / k
    y = (x * beta).sum(0) + standard_normal((t, n)) + 2 * standard_normal((1, n))
    w = np.random.chisquare(5, (t, n)) / 5
    c = None
    if other_effects == 1:
        cats = ['Industries']
    else:
        cats = ['cat.' + str(i) for i in range(other_effects)]
    if other_effects:
        c = np.random.randint(0, 4, (other_effects, t, n))

    vcats = ['varcat.' + str(i) for i in range(2)]
    vc2 = np.ones((2, t, 1)) @ np.random.randint(0, n // 2, (2, 1, n))
    vc1 = vc2[[0]]

    if const:
        x[0] = 1.0

    if missing > 0:
        locs = np.random.choice(n * t, int(n * t * missing))
        y.flat[locs] = np.nan
        locs = np.random.choice(n * t * k, int(n * t * k * missing))
        x.flat[locs] = np.nan

    if datatype in ('pandas', 'xarray'):
        entities = ['firm' + str(i) for i in range(n)]
        time = pd.date_range('1-1-1900', periods=t, freq='A-DEC')
        var_names = ['x' + str(i) for i in range(k)]
        # y = pd.DataFrame(y, index=time, columns=entities)
        y = panel_to_frame(y[None], items=['y'], major_axis=time, minor_axis=entities, swap=True)
        w = panel_to_frame(w[None], items=['w'], major_axis=time, minor_axis=entities, swap=True)
        w = w.reindex(y.index)
        x = panel_to_frame(x, items=var_names, major_axis=time, minor_axis=entities, swap=True)
        x = x.reindex(y.index)
        c = panel_to_frame(c, items=cats, major_axis=time, minor_axis=entities, swap=True)
        c = c.reindex(y.index)
        vc1 = panel_to_frame(vc1, items=vcats[:1], major_axis=time, minor_axis=entities, swap=True)
        vc1 = vc1.reindex(y.index)
        vc2 = panel_to_frame(vc2, items=vcats, major_axis=time, minor_axis=entities, swap=True)
        vc2 = vc2.reindex(y.index)

    if datatype == 'xarray':
        # TODO: This is broken now, need to transfor multiindex to xarray 3d
        import xarray as xr
        x = xr.DataArray(PanelData(x).values3d,
                         coords={'entities': entities, 'time': time,
                                 'vars': var_names},
                         dims=['vars', 'time', 'entities'])
        y = xr.DataArray(PanelData(y).values3d,
                         coords={'entities': entities, 'time': time,
                                 'vars': ['y']},
                         dims=['vars', 'time', 'entities'])
        w = xr.DataArray(PanelData(w).values3d,
                         coords={'entities': entities, 'time': time,
                                 'vars': ['w']},
                         dims=['vars', 'time', 'entities'])
        if c.shape[1] > 0:
            c = xr.DataArray(PanelData(c).values3d,
                             coords={'entities': entities, 'time': time,
                                     'vars': c.columns},
                             dims=['vars', 'time', 'entities'])
        vc1 = xr.DataArray(PanelData(vc1).values3d,
                           coords={'entities': entities, 'time': time,
                                   'vars': vc1.columns},
                           dims=['vars', 'time', 'entities'])
        vc2 = xr.DataArray(PanelData(vc2).values3d,
                           coords={'entities': entities, 'time': time,
                                   'vars': vc2.columns},
                           dims=['vars', 'time', 'entities'])

    if rng is not None:
        rng.set_state(np.random.get_state())

    return AttrDict(y=y, x=x, w=w, c=c, vc1=vc1, vc2=vc2)
示例#11
0
def test_incorrect_types_xarray():
    with pytest.raises(ValueError):
        PanelData(xr.DataArray(np.random.randn(10)))
示例#12
0
        std_errs = {}
        std_errs_no = {}
        std_errs_u = {}
        std_errs_u_no = {}
        std_errs_r = {}
        std_errs_r_no = {}
        vals = np.zeros((NUM_REPS, 5, 7))
        for b in range(NUM_REPS):
            if b % 25 == 0:
                print(key, n, b)
            data = generate_data(0.00, 'pandas', ntk=(n, 3, 5), other_effects=1, const=False, rng=rs)
            mo, fo = options[key]

            mod_type, cluster_type = key.split(':')

            y = PanelData(data.y)
            random_effects = np.random.randint(0, n // 3, size=y.dataframe.shape)
            other_random = np.random.randint(0, n // 5, size=y.dataframe.shape)

            if mod_type == 'random':
                effects = y.copy()
                effects.dataframe.iloc[:, :] = random_effects
                mo['other_effects'] = effects

            if cluster_type in ('random', 'other-random', 'entity-nested', 'random-nested'):
                clusters = y.copy()
                if cluster_type == 'random':
                    clusters.dataframe.iloc[:, :] = random_effects
                elif cluster_type == 'other-random':
                    clusters.dataframe.iloc[:, :] = other_random
                elif cluster_type == 'entity_nested':
import pytest

from linearmodels.panel.data import PanelData
from linearmodels.shared.typed_getters import (
    get_array_like,
    get_bool,
    get_float,
    get_panel_data_like,
    get_string,
)

ARRAY_LIKE: Tuple[Type, ...] = (np.ndarray, pd.Series, pd.DataFrame)
PANEL_LIKE: Tuple[Type, ...] = ARRAY_LIKE + (PanelData, )
ARRAYS: Tuple[Any,
              ...] = (np.array([1.0]), pd.Series([1.0]), pd.DataFrame([[1.0]]))
PANELS: Tuple[Any, ...] = ARRAYS + (PanelData(np.array([[[1.0]]])), )
try:
    import xarray as xr

    ARRAY_LIKE += (xr.DataArray, )
    PANEL_LIKE += (xr.DataArray, )
    ARRAYS += (xr.DataArray(ARRAYS[0]), )
    PANELS += (xr.DataArray(ARRAYS[0]), )
except ImportError:
    pass


@pytest.fixture(params=ARRAYS)
def arr(request):
    return request.param
示例#14
0
複数年の場合。

df.reset_index().query('year in [2000,2002]')

上と同じ結果。

df.reset_index().query('year not in [2001]')

## `linearmodels`の`PanelData`

`linearmodels`では`MultiIndex`化された`DataFrame`をそのまま読み込み推定することができる。一方で,`linearmodels`の関数`PanelData`を使い`MultiIndex`化された`DataFrame`を`PanelData`オブジェクトに変換すると分析に必要な計算を簡単にできるようになる。必須ではないが,知っていて損はしない関数である。

まず`df`を`PanelData`オブジェクトに変換する。

dfp = PanelData(df)
dfp

---
属性`shape`は,`PanelData`の変数の数を表示する。以下が返り値の内容である。

$$
\left(\text{変数の数},\text{期間数},\text{観察単位の数}\right)
$$

dfp.shape

* 変数の数:4(列にある変数)
* 期間数:3(年)
* 観察単位の数:3(国)
示例#15
0
def test_missing(panel):
    panel.iloc[0, :, ::3] = np.nan
    dh = PanelData(panel)
    assert_equal(dh.isnull, np.any(np.isnan(dh.values2d), 1))
示例#16
0
def test_repr_html(mi_df):
    data = PanelData(mi_df)
    html = data._repr_html_()
    assert '<br/>' in html
示例#17
0
def test_str_repr(panel):
    data = PanelData(panel)
    assert 'PanelData' in str(data)
    assert str(hex(id(data))) in data.__repr__()
示例#18
0
def generate_data(
    missing: bool,
    datatype: Literal["pandas", "xarray", "numpy"],
    const: bool = False,
    ntk: tuple[int, int, int] = (971, 7, 5),
    other_effects: int = 0,
    rng: RandomState | None = None,
    num_cats: int | list[int] = 4,
):
    if rng is None:
        np.random.seed(12345)
    else:
        np.random.set_state(rng.get_state())

    n, t, k = ntk
    k += const
    x = standard_normal((k, t, n))
    beta = np.arange(1, k + 1)[:, None, None] / k
    y = np.empty((t, n), dtype=np.float64)
    y[:, :] = (x * beta).sum(0) + standard_normal(
        (t, n)) + 2 * standard_normal((1, n))
    w = np.random.chisquare(5, (t, n)) / 5
    c = np.empty((y.size, 0), dtype=int)
    if other_effects == 1:
        cats = ["Industries"]
    else:
        cats = ["cat." + str(i) for i in range(other_effects)]
    if other_effects:
        if isinstance(num_cats, int):
            num_cats = [num_cats] * other_effects
        oe = []
        for i in range(other_effects):
            nc = num_cats[i]
            oe.append(np.random.randint(0, nc, (1, t, n)))
        c = np.concatenate(oe, 0)

    vcats = ["varcat." + str(i) for i in range(2)]
    vc2 = np.ones((2, t, 1)) @ np.random.randint(0, n // 2, (2, 1, n))
    vc1 = vc2[[0]]

    if const:
        x[0] = 1.0

    if missing > 0:
        locs = np.random.choice(n * t, int(n * t * missing))
        y.flat[locs] = np.nan
        locs = np.random.choice(n * t * k, int(n * t * k * missing))
        x.flat[locs] = np.nan
    if rng is not None:
        rng.set_state(np.random.get_state())
    if datatype == "numpy":
        return AttrDict(y=y, x=x, w=w, c=c, vc1=vc1, vc2=vc2)

    entities = ["firm" + str(i) for i in range(n)]
    time = date_range("1-1-1900", periods=t, freq="A-DEC")
    var_names = ["x" + str(i) for i in range(k)]
    # y = DataFrame(y, index=time, columns=entities)
    y_df = panel_to_frame(y[None],
                          items=["y"],
                          major_axis=time,
                          minor_axis=entities,
                          swap=True)
    w_df = panel_to_frame(w[None],
                          items=["w"],
                          major_axis=time,
                          minor_axis=entities,
                          swap=True)
    w_df = w_df.reindex(y_df.index)
    x_df = panel_to_frame(x,
                          items=var_names,
                          major_axis=time,
                          minor_axis=entities,
                          swap=True)
    x_df = x_df.reindex(y_df.index)
    if c.shape[1]:
        c_df = panel_to_frame(c,
                              items=cats,
                              major_axis=time,
                              minor_axis=entities,
                              swap=True)
    else:
        c_df = DataFrame(index=y_df.index)
    c_df = c_df.reindex(y_df.index)
    vc1_df = panel_to_frame(vc1,
                            items=vcats[:1],
                            major_axis=time,
                            minor_axis=entities,
                            swap=True)
    vc1_df = vc1_df.reindex(y_df.index)
    vc2_df = panel_to_frame(vc2,
                            items=vcats,
                            major_axis=time,
                            minor_axis=entities,
                            swap=True)
    vc2_df = vc2_df.reindex(y_df.index)
    if datatype == "pandas":
        return AttrDict(y=y_df, x=x_df, w=w_df, c=c_df, vc1=vc1_df, vc2=vc2_df)

    assert datatype == "xarray"
    import xarray as xr
    from xarray.core.dtypes import NA

    x_xr = xr.DataArray(
        PanelData(x_df).values3d,
        coords={
            "entities": entities,
            "time": time,
            "vars": var_names
        },
        dims=["vars", "time", "entities"],
    )
    y_xr = xr.DataArray(
        PanelData(y_df).values3d,
        coords={
            "entities": entities,
            "time": time,
            "vars": ["y"]
        },
        dims=["vars", "time", "entities"],
    )
    w_xr = xr.DataArray(
        PanelData(w_df).values3d,
        coords={
            "entities": entities,
            "time": time,
            "vars": ["w"]
        },
        dims=["vars", "time", "entities"],
    )
    c_vals = PanelData(c_df).values3d if c.shape[1] else NA
    c_xr = xr.DataArray(
        c_vals,
        coords={
            "entities": entities,
            "time": time,
            "vars": c_df.columns
        },
        dims=["vars", "time", "entities"],
    )
    vc1_xr = xr.DataArray(
        PanelData(vc1_df).values3d,
        coords={
            "entities": entities,
            "time": time,
            "vars": vc1_df.columns
        },
        dims=["vars", "time", "entities"],
    )
    vc2_xr = xr.DataArray(
        PanelData(vc2_df).values3d,
        coords={
            "entities": entities,
            "time": time,
            "vars": vc2_df.columns
        },
        dims=["vars", "time", "entities"],
    )
    return AttrDict(y=y_xr, x=x_xr, w=w_xr, c=c_xr, vc1=vc1_xr, vc2=vc2_xr)
示例#19
0
def test_roundtrip_3d(data):
    x = data.x
    xpd = PanelData(x)
    xv = x if isinstance(x, np.ndarray) else x.values
    assert_equal(xpd.values3d, xv)
y = (beta * x).sum(0) + eps
y += np.random.randn(1, n)
w = np.random.chisquare(10, size=(1, n)) / 10.0
w = np.ones((t, 1)) @ w
w = w / w.mean()

items = ["x" + str(i) for i in range(1, k + 1)]
items = ["intercept"] + items
major = pd.date_range("12-31-1999", periods=t, freq="A-DEC")
minor = ["firm." + str(i) for i in range(1, n + 1)]

x = panel_to_frame(x, items, major, minor, swap=True)
y = panel_to_frame(y[None, :], ["y"], major, minor, swap=True)
w = panel_to_frame(w[None, :], ["w"], major, minor, swap=True)

x = PanelData(x)
y = PanelData(y)
w = PanelData(w)

z = concat([x.dataframe, y.dataframe, w.dataframe], 1)
final_index = pd.MultiIndex.from_product([minor, major])
final_index.levels[0].name = "firm"
z = z.reindex(final_index)
z.index.levels[0].name = "firm"
z.index.levels[1].name = "time"

z = z.reset_index()
z["firm_id"] = z.firm.astype("category")
z["firm_id"] = z.firm_id.cat.codes

vars = ["y", "x1", "x2", "x3", "x4", "x5"]
示例#21
0
def test_invalid_seires(panel):
    si = panel.to_frame().reset_index()
    with pytest.raises(ValueError):
        PanelData(si.iloc[:, 0])
示例#22
0
def generate_data(
        missing,
        datatype,
        const=False,
        ntk=(971, 7, 5),
        other_effects=0,
        rng=None,
        num_cats=4,
):
    if rng is None:
        np.random.seed(12345)
    else:
        np.random.set_state(rng.get_state())

    n, t, k = ntk
    k += const
    x = standard_normal((k, t, n))
    beta = np.arange(1, k + 1)[:, None, None] / k
    y = (x * beta).sum(0) + standard_normal((t, n)) + 2 * standard_normal(
        (1, n))
    w = np.random.chisquare(5, (t, n)) / 5
    c = None
    if other_effects == 1:
        cats = ["Industries"]
    else:
        cats = ["cat." + str(i) for i in range(other_effects)]
    if other_effects:
        if not isinstance(num_cats, list):
            num_cats = [num_cats] * other_effects
        c = []
        for i in range(other_effects):
            nc = num_cats[i]
            c.append(np.random.randint(0, nc, (1, t, n)))
        c = np.concatenate(c, 0)

    vcats = ["varcat." + str(i) for i in range(2)]
    vc2 = np.ones((2, t, 1)) @ np.random.randint(0, n // 2, (2, 1, n))
    vc1 = vc2[[0]]

    if const:
        x[0] = 1.0

    if missing > 0:
        locs = np.random.choice(n * t, int(n * t * missing))
        y.flat[locs] = np.nan
        locs = np.random.choice(n * t * k, int(n * t * k * missing))
        x.flat[locs] = np.nan

    if datatype in ("pandas", "xarray"):
        entities = ["firm" + str(i) for i in range(n)]
        time = date_range("1-1-1900", periods=t, freq="A-DEC")
        var_names = ["x" + str(i) for i in range(k)]
        # y = DataFrame(y, index=time, columns=entities)
        y = panel_to_frame(y[None],
                           items=["y"],
                           major_axis=time,
                           minor_axis=entities,
                           swap=True)
        w = panel_to_frame(w[None],
                           items=["w"],
                           major_axis=time,
                           minor_axis=entities,
                           swap=True)
        w = w.reindex(y.index)
        x = panel_to_frame(x,
                           items=var_names,
                           major_axis=time,
                           minor_axis=entities,
                           swap=True)
        x = x.reindex(y.index)
        c = panel_to_frame(c,
                           items=cats,
                           major_axis=time,
                           minor_axis=entities,
                           swap=True)
        c = c.reindex(y.index)
        vc1 = panel_to_frame(vc1,
                             items=vcats[:1],
                             major_axis=time,
                             minor_axis=entities,
                             swap=True)
        vc1 = vc1.reindex(y.index)
        vc2 = panel_to_frame(vc2,
                             items=vcats,
                             major_axis=time,
                             minor_axis=entities,
                             swap=True)
        vc2 = vc2.reindex(y.index)

    if datatype == "xarray":
        # TODO: This is broken now, need to transform MultiIndex to xarray 3d
        import xarray as xr

        x = xr.DataArray(
            PanelData(x).values3d,
            coords={
                "entities": entities,
                "time": time,
                "vars": var_names
            },
            dims=["vars", "time", "entities"],
        )
        y = xr.DataArray(
            PanelData(y).values3d,
            coords={
                "entities": entities,
                "time": time,
                "vars": ["y"]
            },
            dims=["vars", "time", "entities"],
        )
        w = xr.DataArray(
            PanelData(w).values3d,
            coords={
                "entities": entities,
                "time": time,
                "vars": ["w"]
            },
            dims=["vars", "time", "entities"],
        )
        if c.shape[1] > 0:
            c = xr.DataArray(
                PanelData(c).values3d,
                coords={
                    "entities": entities,
                    "time": time,
                    "vars": c.columns
                },
                dims=["vars", "time", "entities"],
            )
        vc1 = xr.DataArray(
            PanelData(vc1).values3d,
            coords={
                "entities": entities,
                "time": time,
                "vars": vc1.columns
            },
            dims=["vars", "time", "entities"],
        )
        vc2 = xr.DataArray(
            PanelData(vc2).values3d,
            coords={
                "entities": entities,
                "time": time,
                "vars": vc2.columns
            },
            dims=["vars", "time", "entities"],
        )

    if rng is not None:
        rng.set_state(np.random.get_state())

    return AttrDict(y=y, x=x, w=w, c=c, vc1=vc1, vc2=vc2)
示例#23
0
def test_repr_html(panel):
    data = PanelData(panel)
    html = data._repr_html_()
    assert '<br/>' in html
示例#24
0
def test_valid_weight_shape(data):
    # Same size
    n = np.prod(data.y.shape)
    weights = 1 + np.random.random_sample(n)
    mod = PanelOLS(data.y, data.x, weights=weights)
    mod.fit()
    w = mod.weights.values2d
    missing = PanelData(data.y).isnull | PanelData(data.x).isnull
    expected = weights[~missing.squeeze()][:, None]
    expected = expected / expected.mean()
    assert_equal(w, expected)

    # Per time
    if isinstance(data.x, pd.DataFrame):
        n = len(data.y.index.levels[1])
        k = len(data.y.index.levels[0])
    elif isinstance(data.x, np.ndarray):
        n = data.y.shape[0]
        k = data.y.shape[1]
    else:
        n = data.y.shape[1]
        k = data.y.shape[2]

    weights = 1 + np.random.random_sample(n)
    mod = PanelOLS(data.y, data.x, weights=weights)
    mod.fit()
    w = mod.weights.values2d
    expected = weights[:, None] @ np.ones((1, k))
    expected = expected.T.ravel()
    expected = expected[~missing.squeeze()][:, None]
    expected = expected / expected.mean()
    assert_equal(w, expected)

    # Per entity
    if isinstance(data.x, pd.DataFrame):
        n = len(data.y.index.levels[0])
        k = len(data.y.index.levels[1])
    elif isinstance(data.x, np.ndarray):
        n = data.y.shape[1]
        k = data.y.shape[0]
    else:
        n = data.y.shape[2]
        k = data.y.shape[1]
    weights = 1 + np.random.random_sample(n)
    mod = PanelOLS(data.y, data.x, weights=weights)
    mod.fit()
    w = mod.weights.values2d
    expected = np.ones((k, 1)) @ weights[None, :]
    expected = expected.T.ravel()
    expected = expected[~missing.squeeze()][:, None]
    expected = expected / expected.mean()
    assert_equal(w, expected)

    weights = 1 + np.random.random_sample(data.y.shape)
    mod = PanelOLS(data.y, data.x, weights=weights)
    mod.fit()
    w = mod.weights.values2d
    expected = weights.T.ravel()
    expected = expected[~missing.squeeze()][:, None]
    expected = expected / expected.mean()
    assert_equal(w, expected)
示例#25
0
def test_dimensions(mi_df):
    dh = PanelData(mi_df)
    assert dh.nentity == len(mi_df.index.levels[0])
    assert dh.nvar == mi_df.shape[1]
    assert dh.nobs == len(mi_df.index.levels[1])
示例#26
0
def test_dimensions(panel):
    dh = PanelData(panel)
    assert dh.nentity == panel.shape[2]
    assert dh.nvar == panel.shape[0]
    assert dh.nobs == panel.shape[1]
示例#27
0
def test_incorrect_types():
    with pytest.raises(TypeError):
        PanelData(list(np.random.randn(10)))
示例#28
0
y = (beta * x).sum(0) + eps
y += np.random.randn(1, n)
w = np.random.chisquare(10, size=(1, n)) / 10.0
w = np.ones((t, 1)) @ w
w = w / float(w.mean())

items = ["x" + str(i) for i in range(1, k + 1)]
items = ["intercept"] + items
major = pd.date_range("12-31-1999", periods=t, freq="A-DEC")
minor = ["firm." + str(i) for i in range(1, n + 1)]

x = panel_to_frame(x, items, major, minor, swap=True)
y = panel_to_frame(y[None, :], ["y"], major, minor, swap=True)
w = panel_to_frame(w[None, :], ["w"], major, minor, swap=True)

x_panel_data = PanelData(x)
y_panel_data = PanelData(y)
w_panel_data = PanelData(w)

z = pd.concat(
    [x_panel_data.dataframe, y_panel_data.dataframe, w_panel_data.dataframe],
    1,
    sort=False,
)
final_index = pd.MultiIndex.from_product([minor, major])
final_index.levels[0].name = "firm"
z = z.reindex(final_index)
z.index.levels[0].name = "firm"
z.index.levels[1].name = "time"

z = z.reset_index()