Пример #1
0
    def convert_variable_type(self, variable_id, new_type,
                              convert_data=True,
                              **kwargs):
        """Convert variable in dataframe to different type

        Args:
            variable_id (str) : Id of variable to convert.
            new_type (subclass of `Variable`) : Type of variable to convert to.
            entityset (:class:`.BaseEntitySet`) : EntitySet associated with this entity.
            convert_data (bool) : If True, convert underlying data in the EntitySet.

        Raises:
            RuntimeError : Raises if it cannot convert the underlying data

        Examples:
            >>> es["customer"].convert_variable_type("education_level", vtypes.Categorical, EntitySet)
                True
        """
        if convert_data:
            # first, convert the underlying data (or at least try to)
            self.df = convert_variable_data(df=self.df,
                                            column_id=variable_id,
                                            new_type=new_type,
                                            **kwargs)

        # replace the old variable with the new one, maintaining order
        variable = self._get_variable(variable_id)
        new_variable = new_type.create_from(variable)
        self.variables[self.variables.index(variable)] = new_variable
def test_convert_variable_data():

    df = pd.DataFrame({
        'id': [0, 1, 2],
        'category': ['a', 'b', 'a'],
        'ints': ['1', '2', '1'],
        'boolean': [True, False, True],
        'date': ['3/11/2000', '3/12/2000', '3/13/2000'],
        'integers': [1, 2, 1]
    })

    # Categorical -> Numeric
    init_dtype = df['ints'].dtype.name
    df = convert_variable_data(df=df,
                               column_id='ints',
                               new_type=vtypes.Numeric)

    assert init_dtype != df['ints'].dtype.name
    assert df['ints'].dtype.name in vtypes.PandasTypes._pandas_numerics

    # Numeric -> Boolean
    init_dtype = df['ints'].dtype.name
    df = convert_variable_data(df=df,
                               column_id='ints',
                               new_type=vtypes.Boolean,
                               true_val=1,
                               false_val=2)

    assert init_dtype != df['ints'].dtype.name

    # Categorical -> Datetime
    init_dtype = df['date'].dtype.name
    df = convert_variable_data(df=df,
                               column_id='date',
                               new_type=vtypes.Datetime)

    assert init_dtype != df['date'].dtype.name
    assert df['date'].dtype.name in vtypes.PandasTypes._pandas_datetimes