def test_feature_definition():
    definition = FeatureDefinition(feature_name="MyFeature",
                                   feature_type=FeatureTypeEnum.INTEGRAL)
    assert ordered(definition.to_dict()) == ordered({
        "FeatureName": "MyFeature",
        "FeatureType": "Integral",
    })
Пример #2
0
    def load_feature_definitions(
        self,
        data_frame: DataFrame,
    ) -> Sequence[FeatureDefinition]:
        """Load feature definitions from a Pandas DataFrame.

        Column name is used as feature name. Feature type is inferred from the dtype
        of the column. Dtype int_, int8, int16, int32, int64, uint8, uint16, uint32
        and uint64 are mapped to Integral feature type. Dtype float_, float16, float32
        and float64 are mapped to Fractional feature type. string dtype is mapped to
        String feature type.

        No feature definitions will be loaded if the given data_frame contains
        unsupported dtypes.

        Args:
            data_frame (DataFrame):

        Returns:
            list of FeatureDefinition
        """
        feature_definitions = []
        for column in data_frame:
            feature_type = self._DTYPE_TO_FEATURE_DEFINITION_CLS_MAP.get(
                str(data_frame[column].dtype), None)
            if feature_type:
                feature_definitions.append(
                    FeatureDefinition(feature_name=column,
                                      feature_type=feature_type))
            else:
                raise ValueError(
                    f"Failed to infer Feature type based on dtype {data_frame[column].dtype} "
                    f"for column {column}.")
        self.feature_definitions = feature_definitions
        return self.feature_definitions
def get_feature_definitions(df, feature_group):
    """
    Get datatypes from pandas DataFrame and map them
    to Feature Store datatypes.

    :param df: pandas.DataFrame
    :param  feature_group: FeatureGroup
    :return: list
    """
    # Dtype int_, int8, int16, int32, int64, uint8, uint16, uint32
    # and uint64 are mapped to Integral feature type.

    # Dtype float_, float16, float32 and float64
    # are mapped to Fractional feature type.

    # string dtype is mapped to String feature type.

    # Our schema of our data that we expect
    # _after_ SageMaker Processing
    feature_definitions = []
    for column in df.columns:
        feature_type = feature_group._DTYPE_TO_FEATURE_DEFINITION_CLS_MAP.get(
            str(df[column].dtype), None)
        feature_definitions.append(FeatureDefinition(
            column,
            feature_type))  # you can alternatively define your own schema
    return feature_definitions
Пример #4
0
def create_or_load_feature_group(prefix, feature_group_name):

    # Feature Definitions for our records
    feature_definitions = [
        FeatureDefinition(feature_name="input_ids", feature_type=FeatureTypeEnum.STRING),
        FeatureDefinition(feature_name="input_mask", feature_type=FeatureTypeEnum.STRING),
        FeatureDefinition(feature_name="segment_ids", feature_type=FeatureTypeEnum.STRING),
        FeatureDefinition(feature_name="label_id", feature_type=FeatureTypeEnum.INTEGRAL),
        FeatureDefinition(feature_name="review_id", feature_type=FeatureTypeEnum.STRING),
        FeatureDefinition(feature_name="date", feature_type=FeatureTypeEnum.STRING),
        FeatureDefinition(feature_name="label", feature_type=FeatureTypeEnum.INTEGRAL),
        #        FeatureDefinition(feature_name='review_body', feature_type=FeatureTypeEnum.STRING),
        FeatureDefinition(feature_name="split_type", feature_type=FeatureTypeEnum.STRING),
    ]

    feature_group = FeatureGroup(
        name=feature_group_name, feature_definitions=feature_definitions, sagemaker_session=sagemaker_session
    )

    print("Feature Group: {}".format(feature_group))

    try:
        print(
            "Waiting for existing Feature Group to become available if it is being created by another instance in our cluster..."
        )
        wait_for_feature_group_creation_complete(feature_group)
    except Exception as e:
        print("Before CREATE FG wait exeption: {}".format(e))
    #        pass

    try:
        record_identifier_feature_name = "review_id"
        event_time_feature_name = "date"

        print("Creating Feature Group with role {}...".format(role))
        feature_group.create(
            s3_uri=f"s3://{bucket}/{prefix}",
            record_identifier_name=record_identifier_feature_name,
            event_time_feature_name=event_time_feature_name,
            role_arn=role,
            enable_online_store=True,
        )
        print("Creating Feature Group. Completed.")

        print("Waiting for new Feature Group to become available...")
        wait_for_feature_group_creation_complete(feature_group)
        print("Feature Group available.")
        feature_group.describe()

    except Exception as e:
        print("Exception: {}".format(e))

    return feature_group
def create_or_load_feature_group(prefix, feature_group_name):

    # Feature Definitions for our records
    feature_definitions = [
        FeatureDefinition(feature_name='input_ids',
                          feature_type=FeatureTypeEnum.STRING),
        FeatureDefinition(feature_name='input_mask',
                          feature_type=FeatureTypeEnum.STRING),
        FeatureDefinition(feature_name='segment_ids',
                          feature_type=FeatureTypeEnum.STRING),
        FeatureDefinition(feature_name='label_id',
                          feature_type=FeatureTypeEnum.INTEGRAL),
        FeatureDefinition(feature_name='review_id',
                          feature_type=FeatureTypeEnum.STRING),
        FeatureDefinition(feature_name='date',
                          feature_type=FeatureTypeEnum.STRING),
        FeatureDefinition(feature_name='label',
                          feature_type=FeatureTypeEnum.INTEGRAL),
        #        FeatureDefinition(feature_name='review_body', feature_type=FeatureTypeEnum.STRING),
        FeatureDefinition(feature_name='split_type',
                          feature_type=FeatureTypeEnum.STRING)
    ]

    feature_group = FeatureGroup(name=feature_group_name,
                                 feature_definitions=feature_definitions,
                                 sagemaker_session=sagemaker_session)

    print('Feature Group: {}'.format(feature_group))

    try:
        print(
            'Waiting for existing Feature Group to become available if it is being created by another instance in our cluster...'
        )
        wait_for_feature_group_creation_complete(feature_group)
    except Exception as e:
        print('Before CREATE FG wait exeption: {}'.format(e))
#        pass

    try:
        record_identifier_feature_name = "review_id"
        event_time_feature_name = "date"

        print('Creating Feature Group with role {}...'.format(role))
        feature_group.create(
            s3_uri=f"s3://{bucket}/{prefix}",
            record_identifier_name=record_identifier_feature_name,
            event_time_feature_name=event_time_feature_name,
            role_arn=role,
            enable_online_store=True)
        print('Creating Feature Group. Completed.')

        print('Waiting for new Feature Group to become available...')
        wait_for_feature_group_creation_complete(feature_group)
        print('Feature Group available.')
        feature_group.describe()

    except Exception as e:
        print('Exception: {}'.format(e))


#        pass

#         print('FAILED - NOW Creating Feature Group with service-role {}...'.format('arn:aws:iam::231218423789:role/service-role/AmazonSageMakerServiceCatalogProductsUseRole'))
#         feature_group.create(
#             s3_uri=f"s3://{bucket}/{prefix}",
#             record_identifier_name=record_identifier_feature_name,
#             event_time_feature_name=event_time_feature_name,
#             role_arn='arn:aws:iam::231218423789:role/service-role/AmazonSageMakerServiceCatalogProductsUseRole',
#             enable_online_store=True
#         )
#         print('Creating Feature Group. Completed.')

#    feature_group.describe()

    return feature_group
Пример #6
0
def create_or_load_feature_group(prefix, feature_group_name):

    # Feature Definitions for the records
    feature_definitions = [
        FeatureDefinition(feature_name='review_id',
                          feature_type=FeatureTypeEnum.STRING),
        FeatureDefinition(feature_name='date',
                          feature_type=FeatureTypeEnum.STRING),
        FeatureDefinition(feature_name='sentiment',
                          feature_type=FeatureTypeEnum.STRING),
        FeatureDefinition(feature_name='label_id',
                          feature_type=FeatureTypeEnum.STRING),
        FeatureDefinition(feature_name='input_ids',
                          feature_type=FeatureTypeEnum.STRING),
        FeatureDefinition(feature_name='review_body',
                          feature_type=FeatureTypeEnum.STRING),
        FeatureDefinition(feature_name='split_type',
                          feature_type=FeatureTypeEnum.STRING)
    ]

    # setup the Feature Group
    feature_group = FeatureGroup(name=feature_group_name,
                                 feature_definitions=feature_definitions,
                                 sagemaker_session=sagemaker_session)

    print('Feature Group: {}'.format(feature_group))

    try:
        print(
            'Waiting for existing Feature Group to become available if it is being created by another instance in our cluster...'
        )
        wait_for_feature_group_creation_complete(feature_group)
    except Exception as e:
        print('Before CREATE FG wait exeption: {}'.format(e))

    try:
        record_identifier_feature_name = "review_id"
        event_time_feature_name = "date"

        print('Creating Feature Group with role {}...'.format(role))

        # create Feature Group
        feature_group.create(
            s3_uri=f"s3://{bucket}/{prefix}",
            record_identifier_name=record_identifier_feature_name,
            event_time_feature_name=event_time_feature_name,
            role_arn=role,
            enable_online_store=False)
        print('Creating Feature Group. Completed.')

        print('Waiting for new Feature Group to become available...')
        wait_for_feature_group_creation_complete(feature_group)
        print('Feature Group available.')

        # the information about the Feature Group
        feature_group.describe()

    except Exception as e:
        print('Exception: {}'.format(e))

    return feature_group