Exemplo n.º 1
0
def record():
    return [
        FeatureValue(feature_name="feature1", value_as_string="10.0"),
        FeatureValue(feature_name="feature2", value_as_string="10"),
        FeatureValue(feature_name="feature3",
                     value_as_string="2020-10-30T03:43:21Z"),
    ]
Exemplo n.º 2
0
    def _ingest_single_batch(
        data_frame: DataFrame,
        feature_group_name: str,
        sagemaker_session: Session,
        start_index: int,
        end_index: int,
    ):
        """Ingest a single batch of DataFrame rows into FeatureStore.

        Args:
            data_frame (DataFrame): source DataFrame to be ingested.
            feature_group_name (str): name of the Feature Group.
            sagemaker_session (Session): session instance to perform boto calls.
            start_index (int): starting position to ingest in this batch.
            end_index (int): ending position to ingest in this batch.
        """
        logger.info("Started ingesting index %d to %d", start_index, end_index)
        for row in data_frame[start_index:end_index].itertuples(index=False):
            record = [
                FeatureValue(feature_name=data_frame.columns[index],
                             value_as_string=str(row[index]))
                for index in range(len(row))
            ]
            sagemaker_session.put_record(
                feature_group_name=feature_group_name,
                record=[value.to_dict() for value in record])
Exemplo n.º 3
0
    def _ingest_single_batch(
        data_frame: DataFrame,
        feature_group_name: str,
        client_config: Config,
        start_index: int,
        end_index: int,
        profile_name: str = None,
    ) -> List[int]:
        """Ingest a single batch of DataFrame rows into FeatureStore.

        Args:
            data_frame (DataFrame): source DataFrame to be ingested.
            feature_group_name (str): name of the Feature Group.
            client_config (Config): Configuration for the sagemaker feature store runtime
                client to perform boto calls.
            start_index (int): starting position to ingest in this batch.
            end_index (int): ending position to ingest in this batch.
            profile_name (str): the profile credential should be used for ``PutRecord``
                (default: None).

        Returns:
            List of row indices that failed to be ingested.
        """
        retry_config = client_config.retries
        if "max_attempts" not in retry_config and "total_max_attempts" not in retry_config:
            client_config = copy.deepcopy(client_config)
            client_config.retries = {"max_attempts": 10, "mode": "standard"}
        sagemaker_featurestore_runtime_client = boto3.Session(
            profile_name=profile_name).client(
                service_name="sagemaker-featurestore-runtime",
                config=client_config)

        logger.info("Started ingesting index %d to %d", start_index, end_index)
        failed_rows = list()
        for row in data_frame[start_index:end_index].itertuples():
            record = [
                FeatureValue(
                    feature_name=data_frame.columns[index - 1],
                    value_as_string=str(row[index]),
                ) for index in range(1, len(row)) if pd.notna(row[index])
            ]
            try:
                sagemaker_featurestore_runtime_client.put_record(
                    FeatureGroupName=feature_group_name,
                    Record=[value.to_dict() for value in record],
                )
            except Exception as e:  # pylint: disable=broad-except
                logger.error("Failed to ingest row %d: %s", row[0], e)
                failed_rows.append(row[0])
        return failed_rows
Exemplo n.º 4
0
    def _ingest_single_batch(
        data_frame: DataFrame,
        feature_group_name: str,
        client_config: Config,
        start_index: int,
        end_index: int,
    ) -> List[int]:
        """Ingest a single batch of DataFrame rows into FeatureStore.

        Args:
            data_frame (DataFrame): source DataFrame to be ingested.
            feature_group_name (str): name of the Feature Group.
            client_config (Config): Configuration for the sagemaker feature store runtime
                client to perform boto calls.
            start_index (int): starting position to ingest in this batch.
            end_index (int): ending position to ingest in this batch.

        Returns:
            List of row indices that failed to be ingested.
        """
        sagemaker_featurestore_runtime_client = boto3.Session().client(
            service_name="sagemaker-featurestore-runtime",
            config=client_config)

        logger.info("Started ingesting index %d to %d", start_index, end_index)
        failed_rows = list()
        for row in data_frame[start_index:end_index].itertuples():
            record = [
                FeatureValue(feature_name=data_frame.columns[index - 1],
                             value_as_string=str(row[index]))
                for index in range(1, len(row)) if pd.notna(row[index])
            ]
            try:
                sagemaker_featurestore_runtime_client.put_record(
                    FeatureGroupName=feature_group_name,
                    Record=[value.to_dict() for value in record],
                )
            except Exception as e:  # pylint: disable=broad-except
                logger.error("Failed to ingest row %d: %s", row[0], e)
                failed_rows.append(row[0])
        return failed_rows
    def _ingest_single_batch(
        data_frame: DataFrame,
        feature_group_name: str,
        sagemaker_session: Session,
        start_index: int,
        end_index: int,
    ) -> List[int]:
        """Ingest a single batch of DataFrame rows into FeatureStore.

        Args:
            data_frame (DataFrame): source DataFrame to be ingested.
            feature_group_name (str): name of the Feature Group.
            sagemaker_session (Session): session instance to perform boto calls.
            start_index (int): starting position to ingest in this batch.
            end_index (int): ending position to ingest in this batch.

        Returns:
            List of row indices that failed to be ingested.
        """
        logger.info("Started ingesting index %d to %d", start_index, end_index)
        failed_rows = list()
        for row in data_frame[start_index:end_index].itertuples():
            record = [
                FeatureValue(feature_name=data_frame.columns[index - 1],
                             value_as_string=str(row[index]))
                for index in range(1, len(row)) if pd.notna(row[index])
            ]
            try:
                sagemaker_session.put_record(
                    feature_group_name=feature_group_name,
                    record=[value.to_dict() for value in record],
                )
            except Exception as e:  # pylint: disable=broad-except
                logger.error("Failed to ingest row %d: %s", row[0], e)
                failed_rows.append(row[0])
        return failed_rows