def test_passing_strings_to_variable_types_dfs(): variable_types = find_variable_types() teams = pd.DataFrame({ 'id': range(3), 'name': ['Breakers', 'Spirit', 'Thorns'] }) games = pd.DataFrame({ 'id': range(5), 'home_team_id': [2, 2, 1, 0, 1], 'away_team_id': [1, 0, 2, 1, 0], 'home_team_score': [3, 0, 1, 0, 4], 'away_team_score': [2, 1, 2, 0, 0] }) entities = { 'teams': (teams, 'id', None, { 'name': 'text' }), 'games': (games, 'id') } relationships = [('teams', 'id', 'games', 'home_team_id')] features = ft.dfs(entities, relationships, target_entity="teams", features_only=True) name_class = features[0].entity['name'].__class__ assert name_class == variable_types['text']
def test_passing_strings_to_variable_types_from_dataframe(): variable_types = find_variable_types() reversed_variable_types = {str(v): k for k, v in variable_types.items()} reversed_variable_types['unknown variable'] = 'some unknown type string' es = EntitySet() dataframe = pd.DataFrame(columns=list(reversed_variable_types)) with pytest.warns( UserWarning, match= 'Variable type {} was unrecognized, Unknown variable type was used instead' .format('some unknown type string')): es.entity_from_dataframe( entity_id="reversed_variable_types", dataframe=dataframe, index="<class 'featuretools.variable_types.variable.Index'>", time_index= "<class 'featuretools.variable_types.variable.NumericTimeIndex'>", variable_types=reversed_variable_types) entity = es["reversed_variable_types"] reversed_variable_types["unknown variable"] = "unknown" for variable in entity.variables: variable_class = variable.__class__ assert variable_class.type_string == reversed_variable_types[ variable.id]
def _create_variables(self, variable_types, index, time_index, secondary_time_index): """Extracts the variables from a dataframe Args: variable_types (dict[str -> types/str/dict[str -> type]]) : An entity's variable_types dict maps string variable ids to types (:class:`.Variable`) or type_strings (str) or (type, kwargs) to pass keyword arguments to the Variable. index (str): Name of index column time_index (str or None): Name of time_index column secondary_time_index (dict[str: [str]]): Dictionary of secondary time columns that each map to a list of columns that depend on that secondary time """ variables = [] variable_types = variable_types.copy() or {} string_to_class_map = find_variable_types() for vid in variable_types.copy(): vtype = variable_types[vid] if isinstance(vtype, str): if vtype in string_to_class_map: variable_types[vid] = string_to_class_map[vtype] else: variable_types[vid] = string_to_class_map['unknown'] warnings.warn( "Variable type {} was unrecognized, Unknown variable type was used instead" .format(vtype)) if index not in variable_types: variable_types[index] = vtypes.Index link_vars = get_linked_vars(self) inferred_variable_types = infer_variable_types(self.df, link_vars, variable_types, time_index, secondary_time_index) inferred_variable_types.update(variable_types) for v in inferred_variable_types: # TODO document how vtype can be tuple vtype = inferred_variable_types[v] if isinstance(vtype, tuple): # vtype is (ft.Variable, dict_of_kwargs) _v = vtype[0](v, self, **vtype[1]) else: _v = inferred_variable_types[v](v, self) variables += [_v] # convert data once we've inferred self.df = convert_all_variable_data( df=self.df, variable_types=inferred_variable_types) # make sure index is at the beginning index_variable = [v for v in variables if v.id == index][0] self.variables = [index_variable ] + [v for v in variables if v.id != index]
def test_all_variable_descriptions(): variable_types = find_variable_types() es = EntitySet() dataframe = pd.DataFrame(columns=list(variable_types)) es.entity_from_dataframe( 'variable_types', dataframe, index='index', time_index='datetime_time_index', variable_types=variable_types, ) entity = es['variable_types'] for variable in entity.variables: description = variable.to_data_description() _variable = deserialize.description_to_variable(description, entity=entity) assert variable.__eq__(_variable)
def description_to_variable(description, entity=None): '''Deserialize variable from variable description. Args: description (dict) : Description of :class:`.Variable`. entity (Entity) : Instance of :class:`.Entity` to add :class:`.Variable`. If entity is None, :class:`.Variable` will not be instantiated. Returns: variable (Variable) : Returns :class:`.Variable`. ''' variable_types = find_variable_types() is_type_string = isinstance(description['type'], str) type = description['type'] if is_type_string else description['type'].pop('value') variable = variable_types.get(type, variable_types.get('None')) # 'None' will return the Unknown variable type if entity is not None: kwargs = {} if is_type_string else description['type'] variable = variable(description['id'], entity, **kwargs) variable.interesting_values = description['properties']['interesting_values'] return variable
def description_to_variable(description, entity=None): '''Deserialize variable from variable description. Args: description (dict) : Description of :class:`.Variable`. entity (Entity) : Instance of :class:`.Entity` to add :class:`.Variable`. If entity is None, :class:`.Variable` will not be instantiated. Returns: variable (Variable) : Returns :class:`.Variable`. ''' is_type_string = isinstance(description['type'], str) variable = description['type'] if is_type_string else description['type'].pop('value') if entity is not None: variable_types = find_variable_types() variable_class = variable_types.get(variable, variable_types.get('unknown')) kwargs = {} if is_type_string else description['type'] variable = variable_class(description['id'], entity, **kwargs) interesting_values = pd.read_json(description['properties']['interesting_values'], typ='series') variable.interesting_values = interesting_values return variable