def __init__(self, base_features, parent_entity, primitive, use_previous=None, where=None): if hasattr(base_features, '__iter__'): base_features = [_check_feature(bf) for bf in base_features] msg = "all base features must share the same entity" assert len(set([bf.entity for bf in base_features])) == 1, msg else: base_features = [_check_feature(base_features)] self.child_entity = base_features[0].entity self.parent_entity = parent_entity.entityset.metadata[parent_entity.id] if where is not None: self.where = _check_feature(where) msg = "Where feature must be defined on child entity {}".format( self.child_entity.id) assert self.where.entity.id == self.child_entity.id, msg if use_previous: assert self.child_entity.time_index is not None, ( "Applying function that requires time index to entity that " "doesn't have one") self.use_previous = _check_timedelta(use_previous) assert len(base_features) > 0 time_index = base_features[0].entity.time_index time_col = base_features[0].entity[time_index] assert time_index is not None, ("Use previous can only be defined " "on entities with a time index") assert _check_time_against_column(self.use_previous, time_col) super(AggregationFeature, self).__init__(parent_entity, base_features, primitive=primitive)
def __init__(self, entity, base_features, **kwargs): assert all(isinstance(f, PrimitiveBase) for f in base_features), \ "All base features must be features" if len(set([bf.hash() for bf in base_features])) != len(base_features): raise ValueError(u"Duplicate base features ({}): {}".format( self.__class__, base_features)) self.entity_id = entity.id self.entityset = entity.entityset.metadata # P TODO: where should this logic go? # not all primitives support use previous so doesn't make sense to have # in base if self.use_previous: self.use_previous = _check_timedelta(self.use_previous) assert len(self.base_features) > 0 time_index = self.base_features[0].entity.time_index time_col = self.base_features[0].entity[time_index] assert time_index is not None, ("Use previous can only be defined " "on entities with a time index") assert _check_time_against_column(self.use_previous, time_col) self.base_features = base_features # variable type can be declared or inferred from first base feature self.additional_attributes = kwargs assert self._check_input_types(), ("Provided inputs don't match input " "type requirements") super(PrimitiveBase, self).__init__(**kwargs)
def __init__(self, entity, base_features, **kwargs): assert all(isinstance(f, PrimitiveBase) for f in base_features), \ "All base features must be features" if len(set([bf.hash() for bf in base_features])) != len(base_features): raise ValueError(u"Duplicate base features ({}): {}".format( self.__class__, base_features)) self.entity_id = entity.id self.entityset = entity.entityset # P TODO: where should this logic go? # not all primitives support use previous so doesn't make sense to have # in base if self.use_previous: self.use_previous = _check_timedelta(self.use_previous) assert len(self.base_features) > 0 time_index = self.base_features[0].entity.time_index time_col = self.base_features[0].entity[time_index] assert time_index is not None, ("Use previous can only be defined " "on entities with a time index") assert _check_time_against_column(self.use_previous, time_col) self.base_features = base_features # variable type can be declared or inferred from first base feature self.additional_attributes = kwargs assert self._check_input_types(), ("Provided inputs don't match input " "type requirements") super(PrimitiveBase, self).__init__(**kwargs)
def __init__( self, base_features, parent_dataframe_name, primitive, relationship_path=None, use_previous=None, where=None, name=None, ): base_features = _validate_base_features(base_features) for bf in base_features: if bf.number_output_features > 1: raise ValueError("Cannot stack on whole multi-output feature.") self.child_dataframe_name = base_features[0].dataframe_name entityset = base_features[0].entityset relationship_path, self._path_is_unique = self._handle_relationship_path( entityset, parent_dataframe_name, relationship_path ) self.parent_dataframe_name = parent_dataframe_name if where is not None: self.where = _validate_base_features(where)[0] msg = "Where feature must be defined on child dataframe {}".format( self.child_dataframe_name ) assert self.where.dataframe_name == self.child_dataframe_name, msg if use_previous: assert entityset[self.child_dataframe_name].ww.time_index is not None, ( "Applying function that requires time index to dataframe that " "doesn't have one" ) self.use_previous = _check_timedelta(use_previous) assert len(base_features) > 0 time_index = base_features[0].dataframe.ww.time_index time_col = base_features[0].dataframe.ww[time_index] assert time_index is not None, ( "Use previous can only be defined " "on dataframes with a time index" ) assert _check_time_against_column(self.use_previous, time_col) super(AggregationFeature, self).__init__( dataframe=entityset[parent_dataframe_name], base_features=base_features, relationship_path=relationship_path, primitive=primitive, name=name, )
def __init__(self, base_features, parent_entity, primitive, relationship_path=None, use_previous=None, where=None, name=None): if hasattr(base_features, '__iter__'): base_features = [_check_feature(bf) for bf in base_features] msg = "all base features must share the same entity" assert len(set([bf.entity for bf in base_features])) == 1, msg else: base_features = [_check_feature(base_features)] for bf in base_features: if bf.number_output_features > 1: raise ValueError("Cannot stack on whole multi-output feature.") self.child_entity = base_features[0].entity relationship_path, self._path_is_unique = \ self._handle_relationship_path(parent_entity, relationship_path) self.parent_entity = parent_entity.entityset.metadata[parent_entity.id] if where is not None: self.where = _check_feature(where) msg = "Where feature must be defined on child entity {}".format( self.child_entity.id) assert self.where.entity.id == self.child_entity.id, msg if use_previous: assert self.child_entity.time_index is not None, ( "Applying function that requires time index to entity that " "doesn't have one") self.use_previous = _check_timedelta(use_previous) assert len(base_features) > 0 time_index = base_features[0].entity.time_index time_col = base_features[0].entity[time_index] assert time_index is not None, ("Use previous can only be defined " "on entities with a time index") assert _check_time_against_column(self.use_previous, time_col) super(AggregationFeature, self).__init__(entity=parent_entity, base_features=base_features, relationship_path=relationship_path, primitive=primitive, name=name)