Exemplo n.º 1
0
    def __init__(self, x, var_name='x', convert_dummies=True, drop_first=True):
        self._var_name = var_name
        self._convert_dummies = convert_dummies
        self._drop_first = drop_first
        if isinstance(x, PanelData):
            x = x.dataframe
        self._original = x

        if isinstance(x, DataArray):
            if x.ndim not in (2, 3):
                raise ValueError('Only 2-d or 3-d DataArrays are supported')
            x = x.to_pandas()

        if isinstance(x, Series) and isinstance(x.index, pd.MultiIndex):
            x = DataFrame(x)
        elif isinstance(x, Series):
            raise ValueError(
                'Series can only be used with a 2-level MultiIndex')

        if isinstance(x, (Panel, DataFrame)):
            if isinstance(x, DataFrame):
                if isinstance(x.index, pd.MultiIndex):
                    if len(x.index.levels) != 2:
                        raise ValueError('DataFrame input must have a '
                                         'MultiIndex with 2 levels')
                    self._frame = x.copy()
                else:
                    self._frame = DataFrame(
                        {var_name: x.T.stack(dropna=False)})
            else:
                self._frame = x.swapaxes(1,
                                         2).to_frame(filter_observations=False)
        elif isinstance(x, ndarray):
            if not 2 <= x.ndim <= 3:
                raise ValueError('2 or 3-d array required for numpy input')
            if x.ndim == 2:
                x = x[None, :, :]

            k, t, n = x.shape
            variables = [var_name] if k == 1 else [
                var_name + '.{0}'.format(i) for i in range(k)
            ]
            entities = ['entity.{0}'.format(i) for i in range(n)]
            time = list(range(t))
            x = x.astype(np.float64)
            panel = Panel(x,
                          items=variables,
                          major_axis=time,
                          minor_axis=entities)
            self._frame = panel.swapaxes(1,
                                         2).to_frame(filter_observations=False)
        else:
            raise TypeError('Only ndarrays, DataFrames, Panels or DataArrays '
                            'supported.')
        if convert_dummies:
            self._frame = expand_categoricals(self._frame, drop_first)
            self._frame = self._frame.astype(np.float64)

        time_index = Series(self._frame.index.levels[1])
        if not (is_numeric_dtype(time_index.dtype)
                or is_datetime64_any_dtype(time_index.dtype)):
            raise ValueError('The index on the time dimension must be either '
                             'numeric or date-like')
        self._k, self._t, self._n = self.panel.shape
        self._frame.index.levels[0].name = 'entity'
        self._frame.index.levels[1].name = 'time'
Exemplo n.º 2
0
    def __init__(self, x, var_name='x', nobs=None, convert_dummies=True, drop_first=True):

        if isinstance(x, IVData):
            self.__dict__.update(copy.deepcopy(x.__dict__))
            return
        if x is None and nobs is not None:
            x = np.empty((nobs, 0))
        elif x is None:
            raise ValueError('nobs required when x is None')

        self.original = x
        xndim = x.ndim
        if xndim > 2:
            raise ValueError(dim_err.format(var_name, xndim))

        if isinstance(x, np.ndarray):
            x = x.astype(dtype=np.float64)
            if xndim == 1:
                x.shape = (x.shape[0], -1)

            self._ndarray = x.astype(np.float64)
            index = list(range(x.shape[0]))
            if x.shape[1] == 1:
                cols = [var_name]
            else:
                cols = [var_name + '.{0}'.format(i) for i in range(x.shape[1])]
            self._pandas = pd.DataFrame(x, index=index, columns=cols)
            self._labels = {0: index, 1: cols}

        elif isinstance(x, (pd.Series, pd.DataFrame)):
            if isinstance(x, pd.Series):
                name = var_name if not x.name else x.name
                x = pd.DataFrame({name: x})
            copied = False
            columns = list(x.columns)
            if len(set(columns)) != len(columns):
                raise ValueError('DataFrame contains duplicate column names. '
                                 'All column names must be distinct')
            all_numeric = True
            for col in x:
                c = x[col]
                if is_string_dtype(c.dtype) and \
                        c.map(lambda v: is_string_like(v)).all():

                    c = c.astype('category')
                    if not copied:
                        x = x.copy()
                        copied = True
                    x[col] = c
                dt = c.dtype
                all_numeric = all_numeric and is_numeric_dtype(dt)
                if not (is_numeric_dtype(dt) or is_categorical_dtype(dt)):
                    raise ValueError('Only numeric, string  or categorical '
                                     'data permitted')

            if convert_dummies:
                x = expand_categoricals(x, drop_first)

            self._pandas = x
            self._ndarray = self._pandas.values
            if all_numeric or convert_dummies:
                self._ndarray = self._ndarray.astype(np.float64)
            self._labels = {i: list(label) for i, label in zip(range(x.ndim), x.axes)}

        else:
            try:
                import xarray as xr
            except ImportError:
                raise TypeError(type_err)
            if isinstance(x, xr.DataArray):
                if x.ndim == 1:
                    x = xr.concat([x], dim=var_name).transpose()

                index = list(x.coords[x.dims[0]].values)
                cols = x.coords[x.dims[1]].values
                if is_numeric_dtype(cols.dtype):
                    cols = [var_name + '.{0}'.format(i) for i in range(x.shape[1])]
                cols = list(cols)
                self._ndarray = x.values.astype(np.float64)
                self._pandas = pd.DataFrame(self._ndarray, columns=cols,
                                            index=index)
                self._labels = {0: index, 1: cols}
            else:
                raise TypeError(type_err)

        if nobs is not None:
            if self._ndarray.shape[0] != nobs:
                msg = 'Array required to have {nobs} obs, has ' \
                      '{act}'.format(nobs=nobs, act=self._ndarray.shape[0])
                raise ValueError(msg)
Exemplo n.º 3
0
    def __init__(self,
                 x,
                 var_name='x',
                 convert_dummies=True,
                 drop_first=True,
                 copy=True):
        self._var_name = var_name
        self._convert_dummies = convert_dummies
        self._drop_first = drop_first
        self._panel = None
        self._shape = None
        index_names = ['entity', 'time']
        if isinstance(x, PanelData):
            x = x.dataframe
        self._original = x

        if not isinstance(x, (Series, DataFrame, Panel, np.ndarray)):
            try:
                from xarray import DataArray
                if isinstance(x, DataArray):
                    if x.ndim not in (2, 3):
                        raise ValueError(
                            'Only 2-d or 3-d DataArrays are supported')
                    if x.ndim == 2:
                        x = x.to_pandas()
                    else:
                        items = x.coords[x.dims[0]].values.tolist()
                        major = x.coords[x.dims[1]].values.tolist()
                        minor = x.coords[x.dims[2]].values.tolist()
                        values = x.values
                        x = panel_to_frame(values, items, major, minor, True)
            except ImportError:
                pass

        if isinstance(x, Series) and isinstance(x.index, MultiIndex):
            x = DataFrame(x)
        elif isinstance(x, Series):
            raise ValueError(
                'Series can only be used with a 2-level MultiIndex')

        if isinstance(x, (Panel, DataFrame)):
            if isinstance(x, DataFrame):
                if isinstance(x.index, MultiIndex):
                    if len(x.index.levels) != 2:
                        raise ValueError('DataFrame input must have a '
                                         'MultiIndex with 2 levels')
                    if isinstance(self._original,
                                  (DataFrame, PanelData, Series)):
                        for i in range(2):
                            index_names[
                                i] = x.index.levels[i].name or index_names[i]
                    self._frame = x
                    if copy:
                        self._frame = self._frame.copy()
                else:
                    self._frame = DataFrame(
                        {var_name: x.T.stack(dropna=False)})
            else:
                self._frame = x.swapaxes(1,
                                         2).to_frame(filter_observations=False)
        elif isinstance(x, np.ndarray):
            if x.ndim not in (2, 3):
                raise ValueError('2 or 3-d array required for numpy input')
            if x.ndim == 2:
                x = x[None, :, :]

            k, t, n = x.shape
            var_str = var_name + '.{0:0>' + str(int(np.log10(k) + .01)) + '}'
            variables = [var_name] if k == 1 else [
                var_str.format(i) for i in range(k)
            ]
            entity_str = 'entity.{0:0>' + str(int(np.log10(n) + .01)) + '}'
            entities = [entity_str.format(i) for i in range(n)]
            time = list(range(t))
            x = x.astype(np.float64, copy=False)
            panel = _Panel.from_array(x,
                                      items=variables,
                                      major_axis=time,
                                      minor_axis=entities)
            self._fake_panel = panel
            self._frame = panel.to_frame()
        else:
            raise TypeError('Only ndarrays, DataFrames, Panels or DataArrays '
                            'are supported')
        if convert_dummies:
            self._frame = expand_categoricals(self._frame, drop_first)
            self._frame = self._frame.astype(np.float64, copy=False)

        time_index = Series(self._frame.index.levels[1])
        if not (is_numeric_dtype(time_index.dtype)
                or is_datetime64_any_dtype(time_index.dtype)):
            raise ValueError('The index on the time dimension must be either '
                             'numeric or date-like')
        # self._k, self._t, self._n = self.panel.shape
        self._k, self._t, self._n = self.shape
        levels = self._frame.index.levels
        for i in range(2):
            levels[i].name = index_names[i]