def record(): return [ FeatureValue(feature_name="feature1", value_as_string="10.0"), FeatureValue(feature_name="feature2", value_as_string="10"), FeatureValue(feature_name="feature3", value_as_string="2020-10-30T03:43:21Z"), ]
def _ingest_single_batch( data_frame: DataFrame, feature_group_name: str, sagemaker_session: Session, start_index: int, end_index: int, ): """Ingest a single batch of DataFrame rows into FeatureStore. Args: data_frame (DataFrame): source DataFrame to be ingested. feature_group_name (str): name of the Feature Group. sagemaker_session (Session): session instance to perform boto calls. start_index (int): starting position to ingest in this batch. end_index (int): ending position to ingest in this batch. """ logger.info("Started ingesting index %d to %d", start_index, end_index) for row in data_frame[start_index:end_index].itertuples(index=False): record = [ FeatureValue(feature_name=data_frame.columns[index], value_as_string=str(row[index])) for index in range(len(row)) ] sagemaker_session.put_record( feature_group_name=feature_group_name, record=[value.to_dict() for value in record])
def _ingest_single_batch( data_frame: DataFrame, feature_group_name: str, client_config: Config, start_index: int, end_index: int, profile_name: str = None, ) -> List[int]: """Ingest a single batch of DataFrame rows into FeatureStore. Args: data_frame (DataFrame): source DataFrame to be ingested. feature_group_name (str): name of the Feature Group. client_config (Config): Configuration for the sagemaker feature store runtime client to perform boto calls. start_index (int): starting position to ingest in this batch. end_index (int): ending position to ingest in this batch. profile_name (str): the profile credential should be used for ``PutRecord`` (default: None). Returns: List of row indices that failed to be ingested. """ retry_config = client_config.retries if "max_attempts" not in retry_config and "total_max_attempts" not in retry_config: client_config = copy.deepcopy(client_config) client_config.retries = {"max_attempts": 10, "mode": "standard"} sagemaker_featurestore_runtime_client = boto3.Session( profile_name=profile_name).client( service_name="sagemaker-featurestore-runtime", config=client_config) logger.info("Started ingesting index %d to %d", start_index, end_index) failed_rows = list() for row in data_frame[start_index:end_index].itertuples(): record = [ FeatureValue( feature_name=data_frame.columns[index - 1], value_as_string=str(row[index]), ) for index in range(1, len(row)) if pd.notna(row[index]) ] try: sagemaker_featurestore_runtime_client.put_record( FeatureGroupName=feature_group_name, Record=[value.to_dict() for value in record], ) except Exception as e: # pylint: disable=broad-except logger.error("Failed to ingest row %d: %s", row[0], e) failed_rows.append(row[0]) return failed_rows
def _ingest_single_batch( data_frame: DataFrame, feature_group_name: str, client_config: Config, start_index: int, end_index: int, ) -> List[int]: """Ingest a single batch of DataFrame rows into FeatureStore. Args: data_frame (DataFrame): source DataFrame to be ingested. feature_group_name (str): name of the Feature Group. client_config (Config): Configuration for the sagemaker feature store runtime client to perform boto calls. start_index (int): starting position to ingest in this batch. end_index (int): ending position to ingest in this batch. Returns: List of row indices that failed to be ingested. """ sagemaker_featurestore_runtime_client = boto3.Session().client( service_name="sagemaker-featurestore-runtime", config=client_config) logger.info("Started ingesting index %d to %d", start_index, end_index) failed_rows = list() for row in data_frame[start_index:end_index].itertuples(): record = [ FeatureValue(feature_name=data_frame.columns[index - 1], value_as_string=str(row[index])) for index in range(1, len(row)) if pd.notna(row[index]) ] try: sagemaker_featurestore_runtime_client.put_record( FeatureGroupName=feature_group_name, Record=[value.to_dict() for value in record], ) except Exception as e: # pylint: disable=broad-except logger.error("Failed to ingest row %d: %s", row[0], e) failed_rows.append(row[0]) return failed_rows
def _ingest_single_batch( data_frame: DataFrame, feature_group_name: str, sagemaker_session: Session, start_index: int, end_index: int, ) -> List[int]: """Ingest a single batch of DataFrame rows into FeatureStore. Args: data_frame (DataFrame): source DataFrame to be ingested. feature_group_name (str): name of the Feature Group. sagemaker_session (Session): session instance to perform boto calls. start_index (int): starting position to ingest in this batch. end_index (int): ending position to ingest in this batch. Returns: List of row indices that failed to be ingested. """ logger.info("Started ingesting index %d to %d", start_index, end_index) failed_rows = list() for row in data_frame[start_index:end_index].itertuples(): record = [ FeatureValue(feature_name=data_frame.columns[index - 1], value_as_string=str(row[index])) for index in range(1, len(row)) if pd.notna(row[index]) ] try: sagemaker_session.put_record( feature_group_name=feature_group_name, record=[value.to_dict() for value in record], ) except Exception as e: # pylint: disable=broad-except logger.error("Failed to ingest row %d: %s", row[0], e) failed_rows.append(row[0]) return failed_rows