Exemplo n.º 1
0
    def _maybe_convert_data(self, data, target,
                            *args, **kwargs):
        """
        Internal function to instanciate data and target

        Parameters
        ----------
        data : instance converted to ``pandas.DataFrame``
        target : instance converted to ``pandas.Series``
        args : argument passed from ``__init__``
        kwargs : argument passed from ``__init__``
        """

        init_df = isinstance(data, pd.DataFrame)
        init_target = isinstance(target, (pd.Series, pd.DataFrame))

        def _maybe_convert_target(data, target, index=None):
            if data is not None:
                index = data.index

            target = np.array(target)
            if len(target.shape) == 1:
                target = pd.Series(target, index=index)
            else:
                target = pd.DataFrame(target, index=index)
            return target

        if not init_df and not init_target:
            if data is not None:
                data = pd.DataFrame(data, *args, **kwargs)

            if is_list_like(target):
                target = _maybe_convert_target(data, target)

        elif not init_df:
            if data is not None:
                index = kwargs.pop('index', target.index)
                data = pd.DataFrame(data, index=index, *args, **kwargs)

        elif not init_target:
            if is_list_like(target):
                target = _maybe_convert_target(data, target)

        else:
            # no conversion required
            pass

        if isinstance(target, pd.Series) and target.name is None:
            target = pd.Series(target, name=self._TARGET_NAME)

        return data, target
Exemplo n.º 2
0
    def _maybe_convert_data(self, data, target,
                            *args, **kwargs):
        """
        Internal function to instanciate data and target

        Parameters
        ----------
        data : instance converted to ``pandas.DataFrame``
        target : instance converted to ``pandas.Series``
        args : argument passed from ``__init__``
        kwargs : argument passed from ``__init__``
        """

        init_df = isinstance(data, pd.DataFrame)
        init_target = isinstance(target, (pd.Series, pd.DataFrame))

        def _maybe_convert_target(data, target, index=None):
            if data is not None:
                index = data.index

            target = np.array(target)
            if len(target.shape) == 1:
                target = pd.Series(target, index=index)
            else:
                target = pd.DataFrame(target, index=index)
            return target

        if not init_df and not init_target:
            if data is not None:
                data = pd.DataFrame(data, *args, **kwargs)

            if is_list_like(target):
                target = _maybe_convert_target(data, target)

        elif not init_df:
            if data is not None:
                index = kwargs.pop('index', target.index)
                data = pd.DataFrame(data, index=index, *args, **kwargs)

        elif not init_target:
            if is_list_like(target):
                target = _maybe_convert_target(data, target)

        else:
            # no conversion required
            pass

        if isinstance(target, pd.Series) and target.name is None:
            target = pd.Series(target, name=self._TARGET_NAME)

        return data, target
Exemplo n.º 3
0
    def __init__(self, data, target=None,
                 *args, **kwargs):

        if data is None and target is None:
            msg = '{0} must have either data or target'
            raise ValueError(msg.format(self.__class__.__name__))
        elif data is None and not is_list_like(target):
            msg = 'target must be list-like when data is None'
            raise ValueError(msg)

        data, target = skaccessors._maybe_sklearn_data(data, target)
        data, target = smaccessors._maybe_statsmodels_data(data, target)

        # retrieve target_name
        if isinstance(data, ModelFrame):
            target_name = data.target_name

        data, target = self._maybe_convert_data(data, target, *args, **kwargs)

        if target is not None and not is_list_like(target):
            if target in data.columns:
                target_name = target
                df = data
            else:
                msg = "Specified target '{0}' is not included in data"
                raise ValueError(msg.format(target))
            self._target_name = target_name
        else:
            df, target = self._concat_target(data, target)

            if isinstance(target, pd.Series):
                self._target_name = target.name

            elif isinstance(target, pd.DataFrame):
                if len(target.columns) > 1:
                    self._target_name = target.columns
                else:
                    self._target_name = target.columns[0]
            else:
                # target may be None
                self._target_name = self._TARGET_NAME

        pd.DataFrame.__init__(self, df)
Exemplo n.º 4
0
    def __init__(self, data, target=None,
                 *args, **kwargs):

        if data is None and target is None:
            msg = '{0} must have either data or target'
            raise ValueError(msg.format(self.__class__.__name__))
        elif data is None and not is_list_like(target):
            msg = 'target must be list-like when data is None'
            raise ValueError(msg)

        data, target = skaccessors._maybe_sklearn_data(data, target)
        data, target = smaccessors._maybe_statsmodels_data(data, target)

        # retrieve target_name
        if isinstance(data, ModelFrame):
            target_name = data.target_name

        data, target = self._maybe_convert_data(data, target, *args, **kwargs)

        if target is not None and not is_list_like(target):
            if target in data.columns:
                target_name = target
                df = data
            else:
                msg = "Specified target '{0}' is not included in data"
                raise ValueError(msg.format(target))
            self._target_name = target_name
        else:
            df, target = self._concat_target(data, target)

            if isinstance(target, pd.Series):
                self._target_name = target.name

            elif isinstance(target, pd.DataFrame):
                if len(target.columns) > 1:
                    self._target_name = target.columns
                else:
                    self._target_name = target.columns[0]
            else:
                # target may be None
                self._target_name = self._TARGET_NAME

        pd.DataFrame.__init__(self, df)
Exemplo n.º 5
0
    def target(self, target):
        if target is None:
            del self.target
            return

        if not self.has_target():
            # allow to update target_name only when target attibute doesn't exist
            if isinstance(target, pd.Series):
                # Series.name may be blank
                if target.name is not None:
                    self.target_name = target.name
            elif isinstance(target, pd.DataFrame):
                # DataFrame.columns should have values
                self.target_name = target.columns

        if not is_list_like(target):
            if target in self.columns:
                self.target_name = target
            else:
                msg = "Specified target '{0}' is not included in data"
                raise ValueError(msg.format(target))
            return

        if isinstance(target, pd.Series):
            if target.name != self.target_name:
                msg = "Passed data is being renamed to '{0}'".format(
                    self.target_name)
                warnings.warn(msg)
                target = pd.Series(target, name=self.target_name)
        elif isinstance(target, pd.DataFrame):
            if not target.columns.equals(self.target_name):
                if len(target.columns) == len(self.target_name):
                    msg = "Passed data is being renamed to '{0}'".format(
                        self.target_name)
                    warnings.warn(msg)
                    target = target.copy()
                    target.columns = self.target_name
                else:
                    msg = 'target and target_name are unmatched, target_name will be updated'
                    warnings.warn(msg)
                    data = self.data  # hack
                    self.target_name = target.columns
                    self.data = data
        else:
            _, target = self._maybe_convert_data(self.data, target,
                                                 self.target_name)

        df, _ = self._concat_target(self.data, target)
        self._update_inplace(df)
Exemplo n.º 6
0
    def target(self, target):
        if target is None:
            del self.target
            return

        if not self.has_target():
            # allow to update target_name only when target attibute doesn't exist
            if isinstance(target, pd.Series):
                # Series.name may be blank
                if target.name is not None:
                    self.target_name = target.name
            elif isinstance(target, pd.DataFrame):
                # DataFrame.columns should have values
                self.target_name = target.columns

        if not is_list_like(target):
            if target in self.columns:
                self.target_name = target
            else:
                msg = "Specified target '{0}' is not included in data"
                raise ValueError(msg.format(target))
            return

        if isinstance(target, pd.Series):
            if target.name != self.target_name:
                msg = "Passed data is being renamed to '{0}'".format(self.target_name)
                warnings.warn(msg)
                target = pd.Series(target, name=self.target_name)
        elif isinstance(target, pd.DataFrame):
            if not target.columns.equals(self.target_name):
                if len(target.columns) == len(self.target_name):
                    msg = "Passed data is being renamed to '{0}'".format(self.target_name)
                    warnings.warn(msg)
                    target = target.copy()
                    target.columns = self.target_name
                else:
                    msg = 'target and target_name are unmatched, target_name will be updated'
                    warnings.warn(msg)
                    data = self.data        # hack
                    self.target_name = target.columns
                    self.data = data
        else:
            _, target = self._maybe_convert_data(self.data, target, self.target_name)

        df, _ = self._concat_target(self.data, target)
        self._update_inplace(df)
Exemplo n.º 7
0
 def _add_meta_columns(df, meta_name):
     df = df.copy()
     if not is_list_like(meta_name):
         meta_name = [meta_name]
     df.columns = pd.MultiIndex.from_product([meta_name, df.columns])
     return df
Exemplo n.º 8
0
 def _add_meta_columns(df, meta_name):
     df = df.copy()
     if not is_list_like(meta_name):
         meta_name = [meta_name]
     df.columns = pd.MultiIndex.from_product([meta_name, df.columns])
     return df