示例#1
0
def ks_es(make_es):
    ks = pytest.importorskip('databricks.koalas',
                             reason="Koalas not installed, skipping")
    ks_es = copy.deepcopy(make_es)
    for entity in ks_es.entities:
        cleaned_df = pd_to_ks_clean(entity.df).reset_index(drop=True)
        entity.df = ks.from_pandas(cleaned_df)
    return ks_es
示例#2
0
def ks_es(make_es):
    ks = pytest.importorskip('databricks.koalas',
                             reason="Koalas not installed, skipping")
    if sys.platform.startswith('win'):
        pytest.skip('skipping Koalas tests for Windows')
    ks_es = copy.deepcopy(make_es)
    for entity in ks_es.entities:
        cleaned_df = pd_to_ks_clean(entity.df).reset_index(drop=True)
        entity.df = ks.from_pandas(cleaned_df)
    return ks_es
def test_create_entity_from_ks_df(pd_es):
    cleaned_df = pd_to_ks_clean(pd_es["log"].df)
    log_ks = ks.from_pandas(cleaned_df)

    ks_es = EntitySet(id="ks_es")
    ks_es = ks_es.entity_from_dataframe(
        entity_id="log_ks",
        dataframe=log_ks,
        index="id",
        time_index="datetime",
        variable_types=pd_es["log"].variable_types
    )
    pd.testing.assert_frame_equal(cleaned_df, ks_es["log_ks"].df.to_pandas(), check_like=True)
示例#4
0
def test_add_dataframe_from_ks_df(pd_es):
    cleaned_df = pd_to_ks_clean(pd_es["log"])
    log_ks = ks.from_pandas(cleaned_df)

    ks_es = EntitySet(id="ks_es")
    ks_es = ks_es.add_dataframe(dataframe_name="log_ks",
                                dataframe=log_ks,
                                index="id",
                                time_index="datetime",
                                logical_types=pd_es["log"].ww.logical_types,
                                semantic_tags=get_df_tags(pd_es["log"]))
    pd.testing.assert_frame_equal(cleaned_df,
                                  ks_es["log_ks"].to_pandas(),
                                  check_like=True)
示例#5
0
def ks_home_games_es(pd_home_games_es):
    ks = pytest.importorskip('databricks.koalas',
                             reason="Koalas not installed, skipping")
    entities = {}
    for entity in pd_home_games_es.entities:
        entities[entity.id] = (ks.from_pandas(pd_to_ks_clean(entity.df)),
                               entity.index, None, entity.variable_types)

    relationships = [(rel.parent_entity.id, rel.parent_variable.name,
                      rel.child_entity.id, rel.child_variable.name)
                     for rel in pd_home_games_es.relationships]

    return ft.EntitySet(id=pd_home_games_es.id,
                        entities=entities,
                        relationships=relationships)
示例#6
0
def ks_int_es(pd_int_es):
    ks = pytest.importorskip('databricks.koalas',
                             reason="Koalas not installed, skipping")
    es = ft.EntitySet(id=pd_int_es.id)
    for df in pd_int_es.dataframes:
        cleaned_df = pd_to_ks_clean(df).reset_index(drop=True)
        ks_df = ks.from_pandas(cleaned_df)
        ks_df.ww.init(schema=df.ww.schema)
        es.add_dataframe(ks_df)

    for rel in pd_int_es.relationships:
        es.add_relationship(rel._parent_dataframe_name,
                            rel._parent_column_name, rel._child_dataframe_name,
                            rel._child_column_name)
    return es
示例#7
0
def ks_mock_customer(pd_mock_customer):
    ks = pytest.importorskip('databricks.koalas',
                             reason="Koalas not installed, skipping")
    entities = {}
    for entity in pd_mock_customer.entities:
        cleaned_df = pd_to_ks_clean(entity.df).reset_index(drop=True)
        entities[entity.id] = (ks.from_pandas(cleaned_df), entity.index,
                               entity.time_index, entity.variable_types)

    relationships = [(rel.parent_entity.id, rel.parent_variable.name,
                      rel.child_entity.id, rel.child_variable.name)
                     for rel in pd_mock_customer.relationships]

    return ft.EntitySet(id=pd_mock_customer.id,
                        entities=entities,
                        relationships=relationships)
示例#8
0
def ks_mock_customer(pd_mock_customer):
    ks = pytest.importorskip('databricks.koalas',
                             reason="Koalas not installed, skipping")
    dataframes = {}
    for df in pd_mock_customer.dataframes:
        cleaned_df = pd_to_ks_clean(df).reset_index(drop=True)
        dataframes[df.ww.name] = (ks.from_pandas(cleaned_df), df.ww.index,
                                  df.ww.time_index, df.ww.logical_types)

    relationships = [(rel._parent_dataframe_name, rel._parent_column_name,
                      rel._child_dataframe_name, rel._child_column_name)
                     for rel in pd_mock_customer.relationships]

    return ft.EntitySet(id=pd_mock_customer.id,
                        dataframes=dataframes,
                        relationships=relationships)
示例#9
0
def ks_home_games_es(pd_home_games_es):
    ks = pytest.importorskip('databricks.koalas',
                             reason="Koalas not installed, skipping")
    dataframes = {}
    for df in pd_home_games_es.dataframes:
        ks_df = ks.from_pandas(pd_to_ks_clean(df))
        ks_df.ww.init(schema=df.ww.schema)
        dataframes[df.ww.name] = (ks_df, )

    relationships = [(rel._parent_dataframe_name, rel._parent_column_name,
                      rel._child_dataframe_name, rel._child_column_name)
                     for rel in pd_home_games_es.relationships]

    return ft.EntitySet(id=pd_home_games_es.id,
                        dataframes=dataframes,
                        relationships=relationships)
示例#10
0
def ks_diamond_es(pd_diamond_es):
    ks = pytest.importorskip('databricks.koalas',
                             reason="Koalas not installed, skipping")
    if sys.platform.startswith('win'):
        pytest.skip('skipping Koalas tests for Windows')
    entities = {}
    for entity in pd_diamond_es.entities:
        entities[entity.id] = (ks.from_pandas(pd_to_ks_clean(entity.df)),
                               entity.index, None, entity.variable_types)

    relationships = [(rel.parent_entity.id, rel.parent_variable.name,
                      rel.child_entity.id, rel.child_variable.name)
                     for rel in pd_diamond_es.relationships]

    return ft.EntitySet(id=pd_diamond_es.id,
                        entities=entities,
                        relationships=relationships)
示例#11
0
def ks_es(make_es):
    ks = pytest.importorskip('databricks.koalas',
                             reason="Koalas not installed, skipping")
    es = ft.EntitySet(id=make_es.id)
    for entity in make_es.entities:
        cleaned_df = pd_to_ks_clean(entity.df).reset_index(drop=True)
        es.entity_from_dataframe(
            entity.id,
            ks.from_pandas(cleaned_df),
            index=entity.index,
            time_index=entity.time_index,
            variable_types=entity.variable_types,
            secondary_time_index=entity.secondary_time_index)

    for rel in make_es.relationships:
        es.add_relationship(
            ft.Relationship(es[rel.parent_entity.id][rel.parent_variable.id],
                            es[rel.child_entity.id][rel.child_variable.id]))
    return es