示例#1
0
def description_to_entityset(description, **kwargs):
    '''Deserialize entityset from data description.

    Args:
        description (dict) : Description of an :class:`.EntitySet`. Likely generated using :meth:`.serialize.entityset_to_description`
        kwargs (keywords): Additional keyword arguments to pass as keywords arguments to the underlying deserialization method.

    Returns:
        entityset (EntitySet) : Instance of :class:`.EntitySet`.
    '''
    check_schema_version(description, 'entityset')

    from featuretools.entityset import EntitySet
    # If data description was not read from disk, path is None.
    path = description.get('path')
    entityset = EntitySet(description['id'])

    last_time_index = []
    for entity in description['entities'].values():
        entity['loading_info']['params'].update(kwargs)
        # If path is None, an empty dataframe will be created for entity.
        description_to_entity(entity, entityset, path=path)
        if entity['properties']['last_time_index']:
            last_time_index.append(entity['id'])

    for relationship in description['relationships']:
        relationship = Relationship.from_dictionary(relationship, entityset)
        entityset.add_relationship(relationship)

    if len(last_time_index):
        entityset.add_last_time_indexes(updated_entities=last_time_index)

    return entityset
示例#2
0
def description_to_entityset(description, **kwargs):
    '''Deserialize entityset from data description.

    Args:
        description (dict) : Description of an :class:`.EntitySet`. Likely generated using :meth:`.serialize.entityset_to_description`
        kwargs (keywords): Additional keyword arguments to pass as keywords arguments to the underlying deserialization method.

    Returns:
        entityset (EntitySet) : Instance of :class:`.EntitySet`.
    '''
    check_schema_version(description, 'entityset')

    from featuretools.entityset import EntitySet

    # If data description was not read from disk, path is None.
    path = description.get('path')
    entityset = EntitySet(description['id'])

    for df in description['dataframes'].values():
        if path is not None:
            data_path = os.path.join(path, 'data', df['name'])
            dataframe = read_woodwork_table(data_path,
                                            validate=False,
                                            **kwargs)
        else:
            dataframe = empty_dataframe(df)

        entityset.add_dataframe(dataframe)

    for relationship in description['relationships']:
        rel = Relationship.from_dictionary(relationship, entityset)
        entityset.add_relationship(relationship=rel)

    return entityset
示例#3
0
    def from_dictionary(cls, arguments, entityset, dependencies, primitive):
        base_features = [dependencies[name] for name in arguments["base_features"]]
        relationship_path = [
            Relationship.from_dictionary(r, entityset)
            for r in arguments["relationship_path"]
        ]
        parent_dataframe_name = relationship_path[0].parent_dataframe.ww.name
        relationship_path = RelationshipPath([(False, r) for r in relationship_path])

        use_previous_data = arguments["use_previous"]
        use_previous = use_previous_data and Timedelta.from_dictionary(
            use_previous_data
        )

        where_name = arguments["where"]
        where = where_name and dependencies[where_name]

        feat = cls(
            base_features=base_features,
            parent_dataframe_name=parent_dataframe_name,
            primitive=primitive,
            relationship_path=relationship_path,
            use_previous=use_previous,
            where=where,
            name=arguments["name"],
        )
        feat._names = arguments.get("feature_names")
        return feat
示例#4
0
    def from_dictionary(cls, arguments, entityset, dependencies,
                        primitives_deserializer):
        base_features = [
            dependencies[name] for name in arguments['base_features']
        ]
        relationship_path = [
            Relationship.from_dictionary(r, entityset)
            for r in arguments['relationship_path']
        ]
        parent_dataframe_name = relationship_path[0].parent_dataframe.ww.name
        relationship_path = RelationshipPath([(False, r)
                                              for r in relationship_path])

        primitive = primitives_deserializer.deserialize_primitive(
            arguments['primitive'])

        use_previous_data = arguments['use_previous']
        use_previous = use_previous_data and Timedelta.from_dictionary(
            use_previous_data)

        where_name = arguments['where']
        where = where_name and dependencies[where_name]

        return cls(base_features=base_features,
                   parent_dataframe_name=parent_dataframe_name,
                   primitive=primitive,
                   relationship_path=relationship_path,
                   use_previous=use_previous,
                   where=where,
                   name=arguments['name'])
示例#5
0
 def from_dictionary(cls, arguments, entityset, dependencies,
                     primitives_deserializer):
     base_feature = dependencies[arguments['base_feature']]
     relationship = Relationship.from_dictionary(arguments['relationship'],
                                                 entityset)
     child_dataframe_name = relationship.child_dataframe.ww.name
     return cls(base_feature=base_feature,
                child_dataframe_name=child_dataframe_name,
                relationship=relationship,
                name=arguments['name'])
示例#6
0
def test_relationship_serialization(es):
    relationship = Relationship(es, 'sessions', 'id', 'log', 'session_id')

    dictionary = {
        'parent_dataframe_name': 'sessions',
        'parent_column_name': 'id',
        'child_dataframe_name': 'log',
        'child_column_name': 'session_id',
    }
    assert relationship.to_dictionary() == dictionary
    assert Relationship.from_dictionary(dictionary, es) == relationship
示例#7
0
def test_relationship_serialization(es):
    relationship = Relationship(es['sessions']['id'], es['log']['session_id'])

    dictionary = {
        'parent_entity_id': 'sessions',
        'parent_variable_id': 'id',
        'child_entity_id': 'log',
        'child_variable_id': 'session_id',
    }
    assert relationship.to_dictionary() == dictionary
    assert Relationship.from_dictionary(dictionary, es) == relationship
def test_relationship_serialization(es):
    relationship = Relationship(es, "sessions", "id", "log", "session_id")

    dictionary = {
        "parent_dataframe_name": "sessions",
        "parent_column_name": "id",
        "child_dataframe_name": "log",
        "child_column_name": "session_id",
    }
    assert relationship.to_dictionary() == dictionary
    assert Relationship.from_dictionary(dictionary, es) == relationship
示例#9
0
 def from_dictionary(cls, arguments, entityset, dependencies, primitive):
     base_feature = dependencies[arguments["base_feature"]]
     relationship = Relationship.from_dictionary(
         arguments["relationship"], entityset
     )
     child_dataframe_name = relationship.child_dataframe.ww.name
     return cls(
         base_feature=base_feature,
         child_dataframe_name=child_dataframe_name,
         relationship=relationship,
         name=arguments["name"],
     )
示例#10
0
def description_to_entityset(description, **kwargs):
    """Deserialize entityset from data description.

    Args:
        description (dict) : Description of an :class:`.EntitySet`. Likely generated using :meth:`.serialize.entityset_to_description`
        kwargs (keywords): Additional keyword arguments to pass as keywords arguments to the underlying deserialization method.

    Returns:
        entityset (EntitySet) : Instance of :class:`.EntitySet`.
    """
    check_schema_version(description, "entityset")

    from featuretools.entityset import EntitySet

    # If data description was not read from disk, path is None.
    path = description.get("path")
    entityset = EntitySet(description["id"])

    for df in description["dataframes"].values():
        if path is not None:
            data_path = os.path.join(path, "data", df["name"])
            format = description.get("format")
            if format is not None:
                kwargs["format"] = format
                if format == "parquet" and df["loading_info"][
                        "table_type"] == "pandas":
                    kwargs["filename"] = df["name"] + ".parquet"
            dataframe = read_woodwork_table(data_path,
                                            validate=False,
                                            **kwargs)
        else:
            dataframe = empty_dataframe(df)

        entityset.add_dataframe(dataframe)

    for relationship in description["relationships"]:
        rel = Relationship.from_dictionary(relationship, entityset)
        entityset.add_relationship(relationship=rel)

    return entityset