示例#1
0
文件: dynamodb.py 项目: Shopify/feast
    def online_read(
        self,
        config: RepoConfig,
        table: FeatureView,
        entity_keys: List[EntityKeyProto],
        requested_features: Optional[List[str]] = None,
    ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]:
        online_config = config.online_store
        assert isinstance(online_config, DynamoDBOnlineStoreConfig)
        dynamodb_resource = self._get_dynamodb_resource(online_config.region)

        result: List[Tuple[Optional[datetime],
                           Optional[Dict[str, ValueProto]]]] = []
        for entity_key in entity_keys:
            table_instance = dynamodb_resource.Table(
                _get_table_name(config, table))
            entity_id = compute_entity_id(entity_key)
            with tracing_span(name="remote_call"):
                response = table_instance.get_item(
                    Key={"entity_id": entity_id})
            value = response.get("Item")

            if value is not None:
                res = {}
                for feature_name, value_bin in value["values"].items():
                    val = ValueProto()
                    val.ParseFromString(value_bin.value)
                    res[feature_name] = val
                result.append((datetime.fromisoformat(value["event_ts"]), res))
            else:
                result.append((None, None))
        return result
示例#2
0
    def online_read(
        self,
        config: RepoConfig,
        table: FeatureView,
        entity_keys: List[EntityKeyProto],
        requested_features: Optional[List[str]] = None,
    ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]:
        conn = self._get_conn(config)
        cur = conn.cursor()

        result: List[Tuple[Optional[datetime],
                           Optional[Dict[str, ValueProto]]]] = []

        with tracing_span(name="remote_call"):
            # Fetch all entities in one go
            cur.execute(
                f"SELECT entity_key, feature_name, value, event_ts "
                f"FROM {_table_id(config.project, table)} "
                f"WHERE entity_key IN ({','.join('?' * len(entity_keys))}) "
                f"ORDER BY entity_key",
                [
                    serialize_entity_key(entity_key)
                    for entity_key in entity_keys
                ],
            )
            rows = cur.fetchall()

        rows = {
            k: list(group)
            for k, group in itertools.groupby(rows, key=lambda r: r[0])
        }
        for entity_key in entity_keys:
            entity_key_bin = serialize_entity_key(entity_key)
            res = {}
            res_ts = None
            for _, feature_name, val_bin, ts in rows.get(entity_key_bin, []):
                val = ValueProto()
                val.ParseFromString(val_bin)
                res[feature_name] = val
                res_ts = ts

            if not res:
                result.append((None, None))
            else:
                result.append((res_ts, res))
        return result
示例#3
0
文件: datastore.py 项目: pyalex/feast
    def online_read(
        self,
        config: RepoConfig,
        table: Union[FeatureTable, FeatureView],
        entity_keys: List[EntityKeyProto],
        requested_features: Optional[List[str]] = None,
    ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]:

        online_config = config.online_store
        assert isinstance(online_config, DatastoreOnlineStoreConfig)
        client = self._get_client(online_config)

        feast_project = config.project

        keys: List[Key] = []
        result: List[Tuple[Optional[datetime],
                           Optional[Dict[str, ValueProto]]]] = []
        for entity_key in entity_keys:
            document_id = compute_entity_id(entity_key)
            key = client.key("Project", feast_project, "Table", table.name,
                             "Row", document_id)
            keys.append(key)

        # NOTE: get_multi doesn't return values in the same order as the keys in the request.
        # Also, len(values) can be less than len(keys) in the case of missing values.
        with tracing_span(name="remote_call"):
            values = client.get_multi(keys)
        values_dict = {v.key: v for v in values} if values is not None else {}
        for key in keys:
            if key in values_dict:
                value = values_dict[key]
                res = {}
                for feature_name, value_bin in value["values"].items():
                    val = ValueProto()
                    val.ParseFromString(value_bin)
                    res[feature_name] = val
                result.append((value["event_ts"], res))
            else:
                result.append((None, None))

        return result
示例#4
0
文件: redis.py 项目: Shopify/feast
    def online_read(
        self,
        config: RepoConfig,
        table: FeatureView,
        entity_keys: List[EntityKeyProto],
        requested_features: Optional[List[str]] = None,
    ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]:
        online_store_config = config.online_store
        assert isinstance(online_store_config, RedisOnlineStoreConfig)

        client = self._get_client(online_store_config)
        feature_view = table.name
        project = config.project

        result: List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]] = []

        if not requested_features:
            requested_features = [f.name for f in table.features]

        hset_keys = [_mmh3(f"{feature_view}:{k}") for k in requested_features]

        ts_key = f"_ts:{feature_view}"
        hset_keys.append(ts_key)
        requested_features.append(ts_key)

        keys = []
        for entity_key in entity_keys:
            redis_key_bin = _redis_key(project, entity_key)
            keys.append(redis_key_bin)
        with client.pipeline() as pipe:
            for redis_key_bin in keys:
                pipe.hmget(redis_key_bin, hset_keys)
            with tracing_span(name="remote_call"):
                redis_values = pipe.execute()
        for values in redis_values:
            features = self._get_features_for_entity(
                values, feature_view, requested_features
            )
            result.append(features)
        return result
示例#5
0
    def online_read(
        self,
        config: RepoConfig,
        table: FeatureView,
        entity_keys: List[EntityKeyProto],
        requested_features: Optional[List[str]] = None,
    ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]:
        """
        Retrieve feature values from the online DynamoDB store.

        Args:
            config: The RepoConfig for the current FeatureStore.
            table: Feast FeatureView.
            entity_keys: a list of entity keys that should be read from the FeatureStore.
        """
        online_config = config.online_store
        assert isinstance(online_config, DynamoDBOnlineStoreConfig)
        dynamodb_resource = self._get_dynamodb_resource(
            online_config.region, online_config.endpoint_url
        )
        table_instance = dynamodb_resource.Table(
            _get_table_name(online_config, config, table)
        )

        result: List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]] = []
        entity_ids = [compute_entity_id(entity_key) for entity_key in entity_keys]
        batch_size = online_config.batch_size
        entity_ids_iter = iter(entity_ids)
        while True:
            batch = list(itertools.islice(entity_ids_iter, batch_size))
            # No more items to insert
            if len(batch) == 0:
                break
            batch_entity_ids = {
                table_instance.name: {
                    "Keys": [{"entity_id": entity_id} for entity_id in batch]
                }
            }
            with tracing_span(name="remote_call"):
                response = dynamodb_resource.batch_get_item(
                    RequestItems=batch_entity_ids
                )
            response = response.get("Responses")
            table_responses = response.get(table_instance.name)
            if table_responses:
                table_responses = self._sort_dynamodb_response(
                    table_responses, entity_ids
                )
                entity_idx = 0
                for tbl_res in table_responses:
                    entity_id = tbl_res["entity_id"]
                    while entity_id != batch[entity_idx]:
                        result.append((None, None))
                        entity_idx += 1
                    res = {}
                    for feature_name, value_bin in tbl_res["values"].items():
                        val = ValueProto()
                        val.ParseFromString(value_bin.value)
                        res[feature_name] = val
                    result.append((datetime.fromisoformat(tbl_res["event_ts"]), res))
                    entity_idx += 1

            # Not all entities in a batch may have responses
            # Pad with remaining values in batch that were not found
            batch_size_nones = ((None, None),) * (len(batch) - len(result))
            result.extend(batch_size_nones)
        return result
示例#6
0
 def entrypoint():
     with tracing_span("custom_span"):
         time.sleep(0.1)