示例#1
0
    def __init__(self, base_features, parent_entity, primitive, use_previous=None,
                 where=None):
        if hasattr(base_features, '__iter__'):
            base_features = [_check_feature(bf) for bf in base_features]
            msg = "all base features must share the same entity"
            assert len(set([bf.entity for bf in base_features])) == 1, msg
        else:
            base_features = [_check_feature(base_features)]

        self.child_entity = base_features[0].entity
        self.parent_entity = parent_entity.entityset.metadata[parent_entity.id]

        if where is not None:
            self.where = _check_feature(where)
            msg = "Where feature must be defined on child entity {}".format(
                self.child_entity.id)
            assert self.where.entity.id == self.child_entity.id, msg

        if use_previous:
            assert self.child_entity.time_index is not None, (
                "Applying function that requires time index to entity that "
                "doesn't have one")

            self.use_previous = _check_timedelta(use_previous)
            assert len(base_features) > 0
            time_index = base_features[0].entity.time_index
            time_col = base_features[0].entity[time_index]
            assert time_index is not None, ("Use previous can only be defined "
                                            "on entities with a time index")
            assert _check_time_against_column(self.use_previous, time_col)

        super(AggregationFeature, self).__init__(parent_entity,
                                                 base_features,
                                                 primitive=primitive)
示例#2
0
    def __init__(self, entity, base_features, **kwargs):
        assert all(isinstance(f, PrimitiveBase) for f in base_features), \
            "All base features must be features"
        if len(set([bf.hash() for bf in base_features])) != len(base_features):
            raise ValueError(u"Duplicate base features ({}): {}".format(
                self.__class__, base_features))

        self.entity_id = entity.id
        self.entityset = entity.entityset.metadata

        # P TODO: where should this logic go?
        # not all primitives support use previous so doesn't make sense to have
        # in base
        if self.use_previous:
            self.use_previous = _check_timedelta(self.use_previous)
            assert len(self.base_features) > 0
            time_index = self.base_features[0].entity.time_index
            time_col = self.base_features[0].entity[time_index]
            assert time_index is not None, ("Use previous can only be defined "
                                            "on entities with a time index")
            assert _check_time_against_column(self.use_previous, time_col)

        self.base_features = base_features
        # variable type can be declared or inferred from first base feature
        self.additional_attributes = kwargs

        assert self._check_input_types(), ("Provided inputs don't match input "
                                           "type requirements")
        super(PrimitiveBase, self).__init__(**kwargs)
    def __init__(self, entity, base_features, **kwargs):
        assert all(isinstance(f, PrimitiveBase) for f in base_features), \
            "All base features must be features"
        if len(set([bf.hash() for bf in base_features])) != len(base_features):
            raise ValueError(u"Duplicate base features ({}): {}".format(
                self.__class__, base_features))

        self.entity_id = entity.id
        self.entityset = entity.entityset

        # P TODO: where should this logic go?
        # not all primitives support use previous so doesn't make sense to have
        # in base
        if self.use_previous:
            self.use_previous = _check_timedelta(self.use_previous)
            assert len(self.base_features) > 0
            time_index = self.base_features[0].entity.time_index
            time_col = self.base_features[0].entity[time_index]
            assert time_index is not None, ("Use previous can only be defined "
                                            "on entities with a time index")
            assert _check_time_against_column(self.use_previous, time_col)

        self.base_features = base_features
        # variable type can be declared or inferred from first base feature
        self.additional_attributes = kwargs

        assert self._check_input_types(), ("Provided inputs don't match input "
                                           "type requirements")
        super(PrimitiveBase, self).__init__(**kwargs)
示例#4
0
    def __init__(
        self,
        base_features,
        parent_dataframe_name,
        primitive,
        relationship_path=None,
        use_previous=None,
        where=None,
        name=None,
    ):
        base_features = _validate_base_features(base_features)

        for bf in base_features:
            if bf.number_output_features > 1:
                raise ValueError("Cannot stack on whole multi-output feature.")

        self.child_dataframe_name = base_features[0].dataframe_name
        entityset = base_features[0].entityset
        relationship_path, self._path_is_unique = self._handle_relationship_path(
            entityset, parent_dataframe_name, relationship_path
        )

        self.parent_dataframe_name = parent_dataframe_name

        if where is not None:
            self.where = _validate_base_features(where)[0]
            msg = "Where feature must be defined on child dataframe {}".format(
                self.child_dataframe_name
            )
            assert self.where.dataframe_name == self.child_dataframe_name, msg

        if use_previous:
            assert entityset[self.child_dataframe_name].ww.time_index is not None, (
                "Applying function that requires time index to dataframe that "
                "doesn't have one"
            )
            self.use_previous = _check_timedelta(use_previous)
            assert len(base_features) > 0
            time_index = base_features[0].dataframe.ww.time_index
            time_col = base_features[0].dataframe.ww[time_index]
            assert time_index is not None, (
                "Use previous can only be defined " "on dataframes with a time index"
            )
            assert _check_time_against_column(self.use_previous, time_col)

        super(AggregationFeature, self).__init__(
            dataframe=entityset[parent_dataframe_name],
            base_features=base_features,
            relationship_path=relationship_path,
            primitive=primitive,
            name=name,
        )
示例#5
0
    def __init__(self,
                 base_features,
                 parent_entity,
                 primitive,
                 relationship_path=None,
                 use_previous=None,
                 where=None,
                 name=None):
        if hasattr(base_features, '__iter__'):
            base_features = [_check_feature(bf) for bf in base_features]
            msg = "all base features must share the same entity"
            assert len(set([bf.entity for bf in base_features])) == 1, msg
        else:
            base_features = [_check_feature(base_features)]

        for bf in base_features:
            if bf.number_output_features > 1:
                raise ValueError("Cannot stack on whole multi-output feature.")

        self.child_entity = base_features[0].entity

        relationship_path, self._path_is_unique = \
            self._handle_relationship_path(parent_entity, relationship_path)

        self.parent_entity = parent_entity.entityset.metadata[parent_entity.id]

        if where is not None:
            self.where = _check_feature(where)
            msg = "Where feature must be defined on child entity {}".format(
                self.child_entity.id)
            assert self.where.entity.id == self.child_entity.id, msg

        if use_previous:
            assert self.child_entity.time_index is not None, (
                "Applying function that requires time index to entity that "
                "doesn't have one")

            self.use_previous = _check_timedelta(use_previous)
            assert len(base_features) > 0
            time_index = base_features[0].entity.time_index
            time_col = base_features[0].entity[time_index]
            assert time_index is not None, ("Use previous can only be defined "
                                            "on entities with a time index")
            assert _check_time_against_column(self.use_previous, time_col)

        super(AggregationFeature,
              self).__init__(entity=parent_entity,
                             base_features=base_features,
                             relationship_path=relationship_path,
                             primitive=primitive,
                             name=name)