Python Ordinal 예제들, woodwork.logical_types.Ordinal Python 예제들

예제 #1

0

파일 보기

def test_schema_equality():
    col = ColumnSchema(logical_type=Categorical)
    diff_description_col = ColumnSchema(logical_type=Categorical, description='description')
    diff_metadata_col = ColumnSchema(logical_type=Categorical, metadata={'interesting_values': ['a', 'b']})
    use_standard_tags_col = ColumnSchema(logical_type=Categorical, use_standard_tags=True)
    diff_tags_col = ColumnSchema(logical_type=Categorical, semantic_tags={'new_tag'})

    assert col != diff_description_col
    assert col != diff_metadata_col
    assert col != use_standard_tags_col
    assert col != diff_tags_col

    # Check columns with same logical types but different parameters
    ordinal_ltype_1 = Ordinal(order=['a', 'b', 'c'])
    ordinal_ltype_2 = Ordinal(order=['b', 'a', 'c'])
    ordinal_col_1 = ColumnSchema(logical_type=ordinal_ltype_1)
    ordinal_col_2 = ColumnSchema(logical_type=ordinal_ltype_2)

    assert col != ordinal_col_1
    assert ordinal_col_1 != ordinal_col_2
    assert ordinal_col_1 == ordinal_col_1

    datetime_ltype_instantiated = Datetime(datetime_format='%Y-%m%d')

    datetime_col_format = ColumnSchema(logical_type=datetime_ltype_instantiated)
    datetime_col_param = ColumnSchema(logical_type=Datetime(datetime_format=None))
    datetime_col_instantiated = ColumnSchema(logical_type=Datetime())
    datetime_col = ColumnSchema(logical_type=Datetime)

    assert datetime_col != datetime_col_instantiated
    assert datetime_col_instantiated != datetime_col_format
    assert datetime_col_instantiated == datetime_col_param

예제 #2

0

파일 보기

파일: test_logical_types.py 프로젝트: alteryx/woodwork

def test_ordinal_transform_dask(ordinal_transform_series_dask) -> None:
    order = [2, 1, 3]
    typ = Ordinal(order=order)
    ser_ = typ.transform(ordinal_transform_series_dask).compute()

    assert ser_.dtype == "category"
    pd.testing.assert_index_equal(ser_.cat.categories, pd.Int64Index(order))

예제 #3

0

파일 보기

def test_categorical_dtype_serialization(serialize_df, tmpdir):
    ltypes = {
        "cat_int": Categorical,
        "ord_int": Ordinal(order=[1, 2]),
        "cat_float": Categorical,
        "ord_float": Ordinal(order=[1.0, 2.0]),
        "cat_bool": Categorical,
        "ord_bool": Ordinal(order=[True, False]),
    }
    if isinstance(serialize_df, pd.DataFrame):
        formats = ["csv", "pickle", "parquet"]
    else:
        formats = ["csv"]

    for format in formats:
        df = serialize_df.copy()
        df.ww.init(index="id", logical_types=ltypes)
        df.ww.to_disk(str(tmpdir), format=format)
        deserialized_df = read_woodwork_table(str(tmpdir),
                                              filename=f"data.{format}")
        pd.testing.assert_frame_equal(
            to_pandas(deserialized_df,
                      index=deserialized_df.ww.index,
                      sort_index=True),
            to_pandas(df, index=df.ww.index, sort_index=True),
        )
        assert deserialized_df.ww.schema == df.ww.schema
        shutil.rmtree(str(tmpdir))

예제 #4

0

파일 보기

파일: test_logical_types.py 프로젝트: mikewcasale/woodwork

def test_ordinal_init_with_order():
    order = ['bronze', 'silver', 'gold']
    ordinal_from_list = Ordinal(order=order)
    assert ordinal_from_list.order == order

    order = ('bronze', 'silver', 'gold')
    ordinal_from_tuple = Ordinal(order=order)
    assert ordinal_from_tuple.order == order

예제 #5

0

파일 보기

파일: test_logical_types.py 프로젝트: mikewcasale/woodwork

def test_ordinal_order_errors():
    with pytest.raises(
            TypeError,
            match='Order values must be specified in a list or tuple'):
        Ordinal(order='not_valid')

    with pytest.raises(ValueError,
                       match='Order values cannot contain duplicates'):
        Ordinal(order=['a', 'b', 'b'])

예제 #6

0

파일 보기

파일: test_logical_types.py 프로젝트: alteryx/woodwork

def test_ordinal_init_with_order():
    order = ["bronze", "silver", "gold"]
    ordinal_from_list = Ordinal(order=order)
    assert ordinal_from_list.order == order
    assert str(ordinal_from_list) == "Ordinal: ['bronze', 'silver', 'gold']"

    order = ("bronze", "silver", "gold")
    ordinal_from_tuple = Ordinal(order=order)
    assert ordinal_from_tuple.order == order
    assert str(ordinal_from_list) == "Ordinal: ['bronze', 'silver', 'gold']"

예제 #7

0

파일 보기

파일: test_logical_types.py 프로젝트: alteryx/woodwork

def test_ordinal_order_errors():
    series = pd.Series([1, 2, 3]).astype("category")

    with pytest.raises(
            TypeError,
            match="Order values must be specified in a list or tuple"):
        Ordinal(order="not_valid").transform(series)

    with pytest.raises(ValueError,
                       match="Order values cannot contain duplicates"):
        Ordinal(order=["a", "b", "b"]).transform(series)

예제 #8

0

파일 보기

파일: test_datatable_dtypes.py 프로젝트: kaidisn/woodwork

def test_sets_category_dtype_on_init():
    column_name = 'test_series'
    series_list = [
        pd.Series(['a', 'b', 'c'], name=column_name),
        pd.Series(['a', None, 'c'], name=column_name),
        pd.Series(['a', np.nan, 'c'], name=column_name),
        pd.Series(['a', pd.NA, 'c'], name=column_name),
        pd.Series(['a', pd.NaT, 'c'], name=column_name),
    ]

    logical_types = [
        Categorical,
        CountryCode,
        Ordinal(order=['a', 'b', 'c']),
        SubRegionCode,
        ZIPCode,
    ]

    for series in series_list:
        series = series.astype('object')
        for logical_type in logical_types:
            ltypes = {
                column_name: logical_type,
            }
            dt = DataTable(pd.DataFrame(series), logical_types=ltypes)
            assert dt.columns[column_name].logical_type == logical_type
            assert dt.columns[column_name].dtype == logical_type.pandas_dtype
            assert dt.to_dataframe(
            )[column_name].dtype == logical_type.pandas_dtype

예제 #9

0

파일 보기

파일: test_serialization.py 프로젝트: mikewcasale/woodwork

def test_to_csv(sample_df, tmpdir):
    sample_df.ww.init(name='test_data',
                      index='id',
                      semantic_tags={'id': 'tag1'},
                      logical_types={'age': Ordinal(order=[25, 33, 57])},
                      column_descriptions={
                          'signup_date': 'original signup date',
                          'age': 'age of the user'
                      },
                      column_metadata={
                          'id': {
                              'is_sorted': True
                          },
                          'age': {
                              'interesting_values': [33, 57]
                          }
                      })

    sample_df.ww.to_csv(str(tmpdir), encoding='utf-8', engine='python')
    deserialized_df = deserialize.read_woodwork_table(str(tmpdir))

    pd.testing.assert_frame_equal(
        to_pandas(deserialized_df,
                  index=deserialized_df.ww.index,
                  sort_index=True),
        to_pandas(sample_df, index=sample_df.ww.index, sort_index=True))
    assert deserialized_df.ww.schema == sample_df.ww.schema

예제 #10

0

파일 보기

class Week(TransformPrimitive):
    """Determines the week of the year from a datetime.

    Description:
        Returns the week of the year from a datetime value. The first week
        of the year starts on January 1, and week numbers increment each
        Monday.

    Examples:
        >>> from datetime import datetime
        >>> dates = [datetime(2019, 1, 3),
        ...          datetime(2019, 6, 17, 11, 10, 50),
        ...          datetime(2019, 11, 30, 19, 45, 15)]
        >>> week = Week()
        >>> week(dates).tolist()
        [1, 25, 48]
    """

    name = "week"
    input_types = [ColumnSchema(logical_type=Datetime)]
    return_type = ColumnSchema(logical_type=Ordinal(order=list(range(1, 54))),
                               semantic_tags={"category"})
    compatibility = [Library.PANDAS, Library.DASK, Library.SPARK]
    description_template = "the week of the year of {}"

    def get_function(self):
        def week(vals):
            if hasattr(vals.dt, "isocalendar"):
                return vals.dt.isocalendar().week
            else:
                return vals.dt.week

        return week

예제 #11

0

파일 보기

class Week(TransformPrimitive):
    """Determines the week of the year from a datetime.

    Description:
        Returns the week of the year from a datetime value. The first week
        of the year starts on January 1, and week numbers increment each
        Monday.

    Examples:
        >>> from datetime import datetime
        >>> dates = [datetime(2019, 1, 3),
        ...          datetime(2019, 6, 17, 11, 10, 50),
        ...          datetime(2019, 11, 30, 19, 45, 15)]
        >>> week = Week()
        >>> week(dates).tolist()
        [1, 25, 48]
        """
    name = "week"
    input_types = [ColumnSchema(logical_type=Datetime)]
    return_type = ColumnSchema(logical_type=Ordinal(order=list(range(1, 54))),
                               semantic_tags={'category'})
    compatibility = [Library.PANDAS, Library.DASK, Library.KOALAS]
    description_template = "the week of the year of {}"

    def get_function(self):
        def week(vals):
            warnings.filterwarnings(
                "ignore",
                message=("Series.dt.weekofyear and Series.dt.week "
                         "have been deprecated."),
                module="featuretools")
            return vals.dt.week

        return week

예제 #12

0

파일 보기

파일: datetime_transform_primitives.py 프로젝트: RomaKoks/featuretools

class Year(TransformPrimitive):
    """Determines the year value of a datetime.

    Examples:
        >>> from datetime import datetime
        >>> dates = [datetime(2019, 3, 1),
        ...          datetime(2048, 6, 17, 11, 10, 50),
        ...          datetime(1950, 11, 30, 19, 45, 15)]
        >>> year = Year()
        >>> year(dates).tolist()
        [2019, 2048, 1950]
    """

    name = "year"
    input_types = [ColumnSchema(logical_type=Datetime)]
    return_type = ColumnSchema(
        logical_type=Ordinal(order=list(range(1, 3000))),
        semantic_tags={"category"})
    compatibility = [Library.PANDAS, Library.DASK, Library.SPARK]
    description_template = "the year of {}"

    def get_function(self):
        def year(vals):
            return vals.dt.year

        return year

예제 #13

0

파일 보기

class DayOfYear(TransformPrimitive):
    """Determines the ordinal day of the year from the given datetime

    Description:
        For a list of dates, return the ordinal day of the year
        from the given datetime.

    Examples:
        >>> from datetime import datetime
        >>> dates = [datetime(2019, 1, 1),
        ...          datetime(2020, 12, 31),
        ...          datetime(2020, 2, 28)]
        >>> dayOfYear = DayOfYear()
        >>> dayOfYear(dates).tolist()
        [1, 366, 59]
    """

    name = "day_of_year"
    input_types = [ColumnSchema(logical_type=Datetime)]
    return_type = ColumnSchema(logical_type=Ordinal(order=list(range(1, 367))),
                               semantic_tags={"category"})
    compatibility = [Library.PANDAS, Library.DASK, Library.SPARK]
    description_template = "the day of year from {}"

    def get_function(self):
        def dayOfYear(vals):
            return vals.dt.dayofyear

        return dayOfYear

예제 #14

0

파일 보기

파일: datetime_transform_primitives.py 프로젝트: RomaKoks/featuretools

class Second(TransformPrimitive):
    """Determines the seconds value of a datetime.

    Examples:
        >>> from datetime import datetime
        >>> dates = [datetime(2019, 3, 1),
        ...          datetime(2019, 3, 3, 11, 10, 50),
        ...          datetime(2019, 3, 31, 19, 45, 15)]
        >>> second = Second()
        >>> second(dates).tolist()
        [0, 50, 15]
    """

    name = "second"
    input_types = [ColumnSchema(logical_type=Datetime)]
    return_type = ColumnSchema(logical_type=Ordinal(order=list(range(60))),
                               semantic_tags={"category"})
    compatibility = [Library.PANDAS, Library.DASK, Library.SPARK]
    description_template = "the seconds value of {}"

    def get_function(self):
        def second(vals):
            return vals.dt.second

        return second

예제 #15

0

파일 보기

파일: datetime_transform_primitives.py 프로젝트: RomaKoks/featuretools

class Weekday(TransformPrimitive):
    """Determines the day of the week from a datetime.

    Description:
        Returns the day of the week from a datetime value. Weeks
        start on Monday (day 0) and run through Sunday (day 6).

    Examples:
        >>> from datetime import datetime
        >>> dates = [datetime(2019, 3, 1),
        ...          datetime(2019, 6, 17, 11, 10, 50),
        ...          datetime(2019, 11, 30, 19, 45, 15)]
        >>> weekday = Weekday()
        >>> weekday(dates).tolist()
        [4, 0, 5]
    """

    name = "weekday"
    input_types = [ColumnSchema(logical_type=Datetime)]
    return_type = ColumnSchema(logical_type=Ordinal(order=list(range(7))),
                               semantic_tags={"category"})
    compatibility = [Library.PANDAS, Library.DASK, Library.SPARK]
    description_template = "the day of the week of {}"

    def get_function(self):
        def weekday(vals):
            return vals.dt.weekday

        return weekday

예제 #16

0

파일 보기

파일: datetime_transform_primitives.py 프로젝트: RomaKoks/featuretools

class Day(TransformPrimitive):
    """Determines the day of the month from a datetime.

    Examples:
        >>> from datetime import datetime
        >>> dates = [datetime(2019, 3, 1),
        ...          datetime(2019, 3, 3),
        ...          datetime(2019, 3, 31)]
        >>> day = Day()
        >>> day(dates).tolist()
        [1, 3, 31]
    """

    name = "day"
    input_types = [ColumnSchema(logical_type=Datetime)]
    return_type = ColumnSchema(logical_type=Ordinal(order=list(range(1, 32))),
                               semantic_tags={"category"})
    compatibility = [Library.PANDAS, Library.DASK, Library.SPARK]
    description_template = "the day of the month of {}"

    def get_function(self):
        def day(vals):
            return vals.dt.day

        return day

예제 #17

0

파일 보기

파일: datetime_transform_primitives.py 프로젝트: RomaKoks/featuretools

class Month(TransformPrimitive):
    """Determines the month value of a datetime.

    Examples:
        >>> from datetime import datetime
        >>> dates = [datetime(2019, 3, 1),
        ...          datetime(2019, 6, 17, 11, 10, 50),
        ...          datetime(2019, 11, 30, 19, 45, 15)]
        >>> month = Month()
        >>> month(dates).tolist()
        [3, 6, 11]
    """

    name = "month"
    input_types = [ColumnSchema(logical_type=Datetime)]
    return_type = ColumnSchema(logical_type=Ordinal(order=list(range(1, 13))),
                               semantic_tags={"category"})
    compatibility = [Library.PANDAS, Library.DASK, Library.SPARK]
    description_template = "the month of {}"

    def get_function(self):
        def month(vals):
            return vals.dt.month

        return month

예제 #18

0

파일 보기

def test_to_csv_S3(sample_df, s3_client, s3_bucket, profile_name):
    xfail_tmp_disappears(sample_df)

    sample_df.ww.init(
        name="test_data",
        index="id",
        semantic_tags={"id": "tag1"},
        logical_types={"age": Ordinal(order=[25, 33, 57])},
    )
    sample_df.ww.to_disk(
        TEST_S3_URL,
        format="csv",
        encoding="utf-8",
        engine="python",
        profile_name=profile_name,
    )
    make_public(s3_client, s3_bucket)

    deserialized_df = read_woodwork_table(TEST_S3_URL,
                                          profile_name=profile_name)

    pd.testing.assert_frame_equal(
        to_pandas(sample_df, index=sample_df.ww.index, sort_index=True),
        to_pandas(deserialized_df,
                  index=deserialized_df.ww.index,
                  sort_index=True),
    )
    assert sample_df.ww.schema == deserialized_df.ww.schema

예제 #19

0

파일 보기

class Quarter(TransformPrimitive):
    """Determines the quarter a datetime column falls into (1, 2, 3, 4)

    Examples:
        >>> from datetime import datetime
        >>> dates = [datetime(2019,12,1),
        ...          datetime(2019,1,3),
        ...          datetime(2020,2,1)]
        >>> q = Quarter()
        >>> q(dates).tolist()
        [4, 1, 1]
    """

    name = "quarter"
    input_types = [ColumnSchema(logical_type=Datetime)]
    return_type = ColumnSchema(logical_type=Ordinal(order=list(range(1, 5))),
                               semantic_tags={"category"})
    compatibility = [Library.PANDAS, Library.DASK, Library.SPARK]
    description_template = "the quarter that describes {}"

    def get_function(self):
        def quarter(vals):
            return vals.dt.quarter

        return quarter

예제 #20

0

파일 보기

def test_datacolumn_equality(sample_series, sample_datetime_series):
    # Check different parameters to DataColumn
    str_col = DataColumn(sample_series, logical_type='Categorical')
    str_col_2 = DataColumn(sample_series, logical_type=Categorical)
    str_col_diff_tags = DataColumn(sample_series, logical_type=Categorical, semantic_tags={'test'})
    diff_name_col = DataColumn(sample_datetime_series, logical_type=Categorical)
    diff_dtype_col = DataColumn(sample_series, logical_type=NaturalLanguage)
    diff_description_col = DataColumn(sample_series, logical_type='Categorical', description='description')
    diff_metadata_col = DataColumn(sample_series, logical_type='Categorical', metadata={'interesting_values': ['a', 'b']})

    assert str_col == str_col_2
    assert str_col != str_col_diff_tags
    assert str_col != diff_name_col
    assert str_col != diff_dtype_col
    assert str_col != diff_description_col
    assert str_col != diff_metadata_col

    # Check columns with same logical types but different parameters
    ordinal_ltype_1 = Ordinal(order=['a', 'b', 'c'])
    ordinal_ltype_2 = Ordinal(order=['b', 'a', 'c'])
    ordinal_col_1 = DataColumn(sample_series, logical_type=ordinal_ltype_1)
    ordinal_col_2 = DataColumn(sample_series, logical_type=ordinal_ltype_2)

    assert str_col != ordinal_col_1
    assert ordinal_col_1 != ordinal_col_2
    assert ordinal_col_1 == ordinal_col_1

    datetime_ltype_instantiated = Datetime(datetime_format='%Y-%m%d')
    datetime_col_format = DataColumn(sample_datetime_series, logical_type=datetime_ltype_instantiated)
    datetime_col_param = DataColumn(sample_datetime_series, logical_type=Datetime(datetime_format=None))
    datetime_col_instantiated = DataColumn(sample_datetime_series, logical_type=Datetime())
    datetime_col = DataColumn(sample_datetime_series, logical_type=Datetime)

    assert datetime_col != datetime_col_instantiated
    assert datetime_col_instantiated != datetime_col_format
    assert datetime_col_instantiated == datetime_col_param

    # Check different underlying series
    str_col = DataColumn(sample_series, logical_type='NaturalLanguage')
    changed_series = sample_series.copy().replace(to_replace='a', value='test')
    null_col = DataColumn(changed_series, logical_type='NaturalLanguage')

    # We only check underlying data for equality with pandas dataframes
    if isinstance(str_col.to_series(), pd.Series):
        assert str_col != null_col
    else:
        assert str_col == null_col

예제 #21

0

파일 보기

파일: test_logical_types.py 프로젝트: alteryx/woodwork

def test_ordinal_transform(sample_series):
    series_type = str(type(sample_series))
    dask = "dask" in series_type
    spark = "spark" in series_type

    if dask or spark:
        pytest.xfail(
            "Fails with Dask and Spark - ordinal data validation not supported"
        )

    ordinal_incomplete_order = Ordinal(order=["a", "b"])
    error_msg = re.escape(
        "Ordinal column sample_series contains values that are not "
        "present in the order values provided: ['c']")

    with pytest.raises(ValueError, match=error_msg):
        ordinal_incomplete_order.transform(sample_series)

예제 #22

0

파일 보기

def test_adds_category_standard_tag():
    series = pd.Series([1, 2, 3])
    semantic_tags = 'custom_tag'

    logical_types = [Categorical, CountryCode, Ordinal(order=(1, 2, 3)), SubRegionCode, ZIPCode]
    for logical_type in logical_types:
        data_col = DataColumn(series, logical_type=logical_type, semantic_tags=semantic_tags)
        assert data_col.semantic_tags == {'custom_tag', 'category'}

예제 #23

0

파일 보기

def test_ordinal_validation_methods_called_init(mock_validate, sample_series):
    assert not mock_validate.called

    not_validated = sample_series.copy()
    not_validated.ww.init(logical_type=Ordinal(order=["a", "b", "c"]),
                          validate=False)

    assert not mock_validate.called

    validated = sample_series.copy()
    validated.ww.init(logical_type=Ordinal(order=["a", "b", "c"]),
                      validate=True)

    assert mock_validate.called
    assert validated.ww == not_validated.ww
    pd.testing.assert_series_equal(to_pandas(validated),
                                   to_pandas(not_validated))

예제 #24

0

파일 보기

def test_adds_category_standard_tag():
    semantic_tags = 'custom_tag'

    logical_types = [Categorical, CountryCode, Ordinal(order=(1, 2, 3)), PostalCode, SubRegionCode]
    for logical_type in logical_types:
        series = pd.Series([1, 2, 3], dtype='category')
        series.ww.init(logical_type=logical_type, semantic_tags=semantic_tags)
        assert series.ww.semantic_tags == {'custom_tag', 'category'}

예제 #25

0

파일 보기

def test_ordinal_with_incomplete_ranking(sample_series):
    if (ks and isinstance(sample_series, ks.Series)) or (dd and isinstance(sample_series, dd.Series)):
        pytest.xfail('Fails with Dask and Koalas - ordinal data validation not supported')

    ordinal_incomplete_order = Ordinal(order=['a', 'b'])
    error_msg = re.escape("Ordinal column sample_series contains values that are not "
                          "present in the order values provided: ['c']")
    with pytest.raises(ValueError, match=error_msg):
        sample_series.ww.init(logical_type=ordinal_incomplete_order)

예제 #26

0

파일 보기

def test_ordinal_with_incomplete_ranking(sample_series):
    if _is_spark_series(sample_series) or _is_dask_series(sample_series):
        pytest.xfail(
            "Fails with Dask and Spark - ordinal data validation not supported"
        )

    ordinal_incomplete_order = Ordinal(order=["a", "b"])
    error_msg = re.escape(
        "Ordinal column sample_series contains values that are not "
        "present in the order values provided: ['c']")
    with pytest.raises(ValueError, match=error_msg):
        sample_series.ww.init(logical_type=ordinal_incomplete_order)

예제 #27

0

파일 보기

def test_is_col_boolean():
    boolean_column = ColumnSchema(logical_type=Boolean)
    assert _is_col_boolean(boolean_column)

    instantiated_column = ColumnSchema(logical_type=Boolean())
    assert _is_col_boolean(instantiated_column)

    ordinal_column = ColumnSchema(logical_type=Ordinal(order=['a', 'b']))
    assert not _is_col_boolean(ordinal_column)

    nl_column = ColumnSchema(logical_type=NaturalLanguage)
    assert not _is_col_boolean(nl_column)

예제 #28

0

파일 보기

def test_all_ww_logical_types():
    logical_types = list_logical_types()['type_string'].to_list()
    dataframe = pd.DataFrame(columns=logical_types)
    es = EntitySet()
    ltype_dict = {ltype: ltype for ltype in logical_types}
    ltype_dict['ordinal'] = Ordinal(order=[])
    es.add_dataframe(dataframe=dataframe,
                     dataframe_name='all_types',
                     index='integer',
                     logical_types=ltype_dict)
    description = serialize.entityset_to_description(es)
    _es = deserialize.description_to_entityset(description)
    assert es.__eq__(_es, deep=True)

예제 #29

0

파일 보기

def test_ordinal_with_order(sample_series):
    if (ks and isinstance(sample_series, ks.Series)) or (dd and isinstance(sample_series, dd.Series)):
        pytest.xfail('Fails with Dask and Koalas - ordinal data validation not compatible')

    ordinal_with_order = Ordinal(order=['a', 'b', 'c'])
    dc = DataColumn(sample_series, logical_type=ordinal_with_order)
    assert isinstance(dc.logical_type, Ordinal)
    assert dc.logical_type.order == ['a', 'b', 'c']

    dc = DataColumn(sample_series, logical_type="NaturalLanguage")
    new_dc = dc.set_logical_type(ordinal_with_order)
    assert isinstance(new_dc.logical_type, Ordinal)
    assert new_dc.logical_type.order == ['a', 'b', 'c']

예제 #30

0

파일 보기

파일: test_column_schema.py 프로젝트: alteryx/woodwork

def test_schema_equality():
    col = ColumnSchema(logical_type=Categorical)
    diff_description_col = ColumnSchema(logical_type=Categorical,
                                        description="description")
    diff_origin_col = ColumnSchema(logical_type=Categorical, origin="base")
    diff_metadata_col = ColumnSchema(
        logical_type=Categorical, metadata={"interesting_values": ["a", "b"]})
    use_standard_tags_col = ColumnSchema(logical_type=Categorical,
                                         use_standard_tags=True)
    diff_tags_col = ColumnSchema(logical_type=Categorical,
                                 semantic_tags={"new_tag"})

    assert col != diff_description_col
    assert col != diff_origin_col
    assert col != diff_metadata_col
    assert col != use_standard_tags_col
    assert col != diff_tags_col

    # Check columns with same logical types but different parameters
    ordinal_ltype_1 = Ordinal(order=["a", "b", "c"])
    ordinal_ltype_2 = Ordinal(order=["b", "a", "c"])
    ordinal_col_1 = ColumnSchema(logical_type=ordinal_ltype_1)
    ordinal_col_2 = ColumnSchema(logical_type=ordinal_ltype_2)

    assert col != ordinal_col_1
    assert ordinal_col_1 != ordinal_col_2
    assert ordinal_col_1 == ordinal_col_1

    datetime_ltype_instantiated = Datetime(datetime_format="%Y-%m%d")

    datetime_col_format = ColumnSchema(
        logical_type=datetime_ltype_instantiated)
    datetime_col_param = ColumnSchema(logical_type=Datetime(
        datetime_format=None))
    datetime_col_instantiated = ColumnSchema(logical_type=Datetime())

    assert datetime_col_instantiated != datetime_col_format
    assert datetime_col_instantiated == datetime_col_param