def online_read( self, config: RepoConfig, table: FeatureView, entity_keys: List[EntityKeyProto], requested_features: Optional[List[str]] = None, ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]: online_config = config.online_store assert isinstance(online_config, DynamoDBOnlineStoreConfig) dynamodb_resource = self._get_dynamodb_resource(online_config.region) result: List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]] = [] for entity_key in entity_keys: table_instance = dynamodb_resource.Table( _get_table_name(config, table)) entity_id = compute_entity_id(entity_key) with tracing_span(name="remote_call"): response = table_instance.get_item( Key={"entity_id": entity_id}) value = response.get("Item") if value is not None: res = {} for feature_name, value_bin in value["values"].items(): val = ValueProto() val.ParseFromString(value_bin.value) res[feature_name] = val result.append((datetime.fromisoformat(value["event_ts"]), res)) else: result.append((None, None)) return result
def online_read( self, config: RepoConfig, table: Union[FeatureTable, FeatureView], entity_keys: List[EntityKeyProto], requested_features: Optional[List[str]] = None, ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]: online_config = config.online_store assert isinstance(online_config, DynamoDBOnlineStoreConfig) _, dynamodb_resource = self._initialize_dynamodb(online_config) result: List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]] = [] for entity_key in entity_keys: table_instance = dynamodb_resource.Table( f"{config.project}.{table.name}") entity_id = compute_entity_id(entity_key) response = table_instance.get_item(Key={"entity_id": entity_id}) value = response.get("Item") if value is not None: res = {} for feature_name, value_bin in value["values"].items(): val = ValueProto() val.ParseFromString(value_bin.value) res[feature_name] = val result.append((value["event_ts"], res)) else: result.append((None, None)) return result
def online_write_batch( self, config: RepoConfig, table: FeatureView, data: List[Tuple[EntityKeyProto, Dict[str, ValueProto], datetime, Optional[datetime]]], progress: Optional[Callable[[int], Any]], ) -> None: online_config = config.online_store assert isinstance(online_config, DynamoDBOnlineStoreConfig) dynamodb_resource = self._get_dynamodb_resource(online_config.region) table_instance = dynamodb_resource.Table(_get_table_name( config, table)) with table_instance.batch_writer() as batch: for entity_key, features, timestamp, created_ts in data: entity_id = compute_entity_id(entity_key) batch.put_item( Item={ "entity_id": entity_id, # PartitionKey "event_ts": str(utils.make_tzaware(timestamp)), "values": { k: v.SerializeToString() for k, v in features.items() # Serialized Features }, }) if progress: progress(1)
def online_read( self, config: RepoConfig, table: Union[FeatureTable, FeatureView], entity_keys: List[EntityKeyProto], requested_features: Optional[List[str]] = None, ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]: online_config = config.online_store assert isinstance(online_config, DatastoreOnlineStoreConfig) client = self._get_client(online_config) feast_project = config.project result: List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]] = [] for entity_key in entity_keys: document_id = compute_entity_id(entity_key) key = client.key("Project", feast_project, "Table", table.name, "Row", document_id) value = client.get(key) if value is not None: res = {} for feature_name, value_bin in value["values"].items(): val = ValueProto() val.ParseFromString(value_bin) res[feature_name] = val result.append((value["event_ts"], res)) else: result.append((None, None)) return result
def _insert_data_test_table(data, project, tbl_name, region): dynamodb_resource = boto3.resource("dynamodb", region_name=region) table_instance = dynamodb_resource.Table(f"{project}.{tbl_name}") for entity_key, features, timestamp, created_ts in data: entity_id = compute_entity_id(entity_key) with table_instance.batch_writer() as batch: batch.put_item( Item={ "entity_id": entity_id, "event_ts": str(utils.make_tzaware(timestamp)), "values": {k: v.SerializeToString() for k, v in features.items()}, })
def _write_minibatch( client, project: str, table: Union[FeatureTable, FeatureView], data: Sequence[Tuple[EntityKeyProto, Dict[str, ValueProto], datetime, Optional[datetime]]], progress: Optional[Callable[[int], Any]], ): entities = [] for entity_key, features, timestamp, created_ts in data: document_id = compute_entity_id(entity_key) key = client.key( "Project", project, "Table", table.name, "Row", document_id, ) entity = datastore.Entity(key=key, exclude_from_indexes=("created_ts", "event_ts", "values")) entity.update( dict( key=entity_key.SerializeToString(), values={ k: v.SerializeToString() for k, v in features.items() }, event_ts=utils.make_tzaware(timestamp), created_ts=(utils.make_tzaware(created_ts) if created_ts is not None else None), )) entities.append(entity) with client.transaction(): client.put_multi(entities) if progress: progress(len(entities))
def online_read( self, config: RepoConfig, table: Union[FeatureTable, FeatureView], entity_keys: List[EntityKeyProto], requested_features: Optional[List[str]] = None, ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]: online_config = config.online_store assert isinstance(online_config, DatastoreOnlineStoreConfig) client = self._get_client(online_config) feast_project = config.project keys: List[Key] = [] result: List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]] = [] for entity_key in entity_keys: document_id = compute_entity_id(entity_key) key = client.key("Project", feast_project, "Table", table.name, "Row", document_id) keys.append(key) # NOTE: get_multi doesn't return values in the same order as the keys in the request. # Also, len(values) can be less than len(keys) in the case of missing values. with tracing_span(name="remote_call"): values = client.get_multi(keys) values_dict = {v.key: v for v in values} if values is not None else {} for key in keys: if key in values_dict: value = values_dict[key] res = {} for feature_name, value_bin in value["values"].items(): val = ValueProto() val.ParseFromString(value_bin) res[feature_name] = val result.append((value["event_ts"], res)) else: result.append((None, None)) return result
def online_read( self, config: RepoConfig, table: Union[FeatureTable, FeatureView], entity_keys: List[EntityKeyProto], requested_features: Optional[List[str]] = None, ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]: online_config = config.online_store assert isinstance(online_config, DatastoreOnlineStoreConfig) client = self._get_client(online_config) feast_project = config.project keys: List[Key] = [] result: List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]] = [] for entity_key in entity_keys: document_id = compute_entity_id(entity_key) key = client.key("Project", feast_project, "Table", table.name, "Row", document_id) keys.append(key) values = client.get_multi(keys) if values is not None: keys_missing_from_response = set(keys) - set( [v.key for v in values]) values = sorted(values, key=lambda v: keys.index(v.key)) for value in values: res = {} for feature_name, value_bin in value["values"].items(): val = ValueProto() val.ParseFromString(value_bin) res[feature_name] = val result.append((value["event_ts"], res)) for missing_key_idx in sorted( [keys.index(k) for k in keys_missing_from_response]): result.insert(missing_key_idx, (None, None)) return result
def _write_batch_non_duplicates( self, table_instance, data: List[ Tuple[EntityKeyProto, Dict[str, ValueProto], datetime, Optional[datetime]] ], progress: Optional[Callable[[int], Any]], ): """Deduplicate write batch request items on ``entity_id`` primary key.""" with table_instance.batch_writer(overwrite_by_pkeys=["entity_id"]) as batch: for entity_key, features, timestamp, created_ts in data: entity_id = compute_entity_id(entity_key) batch.put_item( Item={ "entity_id": entity_id, # PartitionKey "event_ts": str(utils.make_tzaware(timestamp)), "values": { k: v.SerializeToString() for k, v in features.items() # Serialized Features }, } ) if progress: progress(1)
def online_read( self, config: RepoConfig, table: FeatureView, entity_keys: List[EntityKeyProto], requested_features: Optional[List[str]] = None, ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]: """ Retrieve feature values from the online DynamoDB store. Args: config: The RepoConfig for the current FeatureStore. table: Feast FeatureView. entity_keys: a list of entity keys that should be read from the FeatureStore. """ online_config = config.online_store assert isinstance(online_config, DynamoDBOnlineStoreConfig) dynamodb_resource = self._get_dynamodb_resource( online_config.region, online_config.endpoint_url ) table_instance = dynamodb_resource.Table( _get_table_name(online_config, config, table) ) result: List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]] = [] entity_ids = [compute_entity_id(entity_key) for entity_key in entity_keys] batch_size = online_config.batch_size entity_ids_iter = iter(entity_ids) while True: batch = list(itertools.islice(entity_ids_iter, batch_size)) # No more items to insert if len(batch) == 0: break batch_entity_ids = { table_instance.name: { "Keys": [{"entity_id": entity_id} for entity_id in batch] } } with tracing_span(name="remote_call"): response = dynamodb_resource.batch_get_item( RequestItems=batch_entity_ids ) response = response.get("Responses") table_responses = response.get(table_instance.name) if table_responses: table_responses = self._sort_dynamodb_response( table_responses, entity_ids ) entity_idx = 0 for tbl_res in table_responses: entity_id = tbl_res["entity_id"] while entity_id != batch[entity_idx]: result.append((None, None)) entity_idx += 1 res = {} for feature_name, value_bin in tbl_res["values"].items(): val = ValueProto() val.ParseFromString(value_bin.value) res[feature_name] = val result.append((datetime.fromisoformat(tbl_res["event_ts"]), res)) entity_idx += 1 # Not all entities in a batch may have responses # Pad with remaining values in batch that were not found batch_size_nones = ((None, None),) * (len(batch) - len(result)) result.extend(batch_size_nones) return result