Exemplo n.º 1
0
def test_infer_cardinals():
    tables = get_table_group_by_hash(CARDINALS_HASH,
                                     vectorization_type="lemmatize")
    joined_table = join_tables(tables)
    lemmas = lemmatize_table(joined_table)
    table_class = infer_table_class(joined_table, skip_header=True)
    assert table_class == "cardinal"
Exemplo n.º 2
0
def test_rdfize_table():
    hash_count = get_hash_count(vectorization_type="lemmatize")
    current_count = 0
    for (_hash, table_group) in load_table_groups_lazy(
            vectorization_type="lemmatize"):
        print("Processing {} out of {}".format(current_count, hash_count))
        if is_hash_to_processed(_hash):
            current_count += 1
            continue
        joined_table = join_tables(table_group)
        #if table got more than 10 columns, skip!
        if (len(joined_table[0]) > 10):
            continue
        #skip table groups with more than 1000 tables
        if (len(table_group) > 1000):
            continue
        _f = open("table/{}".format(str(_hash)), "wb")
        _f.write(str(joined_table).encode("utf-8"))
        _f.close()
        rdf = rdfize_table(joined_table)
        _f = open("run/{}".format(str(_hash)), "wb")
        _f.write(rdf)
        _f.close()
        add_hash_to_processed(_hash)
        current_count += 1
Exemplo n.º 3
0
def test_infer_column_name():
    tables = get_table_group_by_hash(EXAMPLE_HASH,
                                     vectorization_type="lemmatize")
    joined_table = join_tables(tables)
    col_table = columnize_table(joined_table)
    column_name = infer_column_name(col_table[0])
    assert column_name == b"label"
    column_name = infer_column_name(col_table[1])
    assert column_name == b"place"
Exemplo n.º 4
0
def test_infer_table_class():
    tables = get_table_group_by_hash(EXAMPLE_HASH,
                                     vectorization_type="lemmatize")
    joined_table = join_tables(tables)

    _class = infer_table_class(joined_table, rows=5)
    assert _class is not None
    _class = infer_table_class(joined_table, rows=30)
    assert _class is not None
Exemplo n.º 5
0
def test_join():
    for (_hash, table_group) in load_table_groups_lazy(
            vectorization_type="lemmatize"):
        print(_hash)
        joined_table = join_tables(table_group)
        #show 10 lines of joined table
        pprinter.pprint(joined_table[:10])
        print("Table class: %s" %
              (infer_table_class_by_category(joined_table, skip_header=True)))
Exemplo n.º 6
0
def test_rdfize_table():
    tables = get_table_group_by_hash(CARDINALS_HASH,
                                     vectorization_type="lemmatize")
    joined_table = join_tables(tables)
    rdf = rdfize_table(joined_table)
Exemplo n.º 7
0
def test_rdfize_table_test_1():
    tables = get_table_group_by_hash(TEST_HASH_1,
                                     vectorization_type="lemmatize")
    joined_table = join_tables(tables)
    rdf = rdfize_table(joined_table)
Exemplo n.º 8
0
def test_get_table_class_uri():
    tables = get_table_group_by_hash(CARDINALS_HASH,
                                     vectorization_type="lemmatize")
    joined_table = join_tables(tables)
    table_class = get_table_class_uri(joined_table)
    assert table_class == 'http://dbpedia.org/ontology/Cardinal'
Exemplo n.º 9
0
def test_join():
    for (_hash, table_group) in load_table_groups_lazy(
            vectorization_type="lemmatize"):
        print(_hash)
        join_tables(table_group)
        pprinter.pprint(join_tables(table_group))
Exemplo n.º 10
0
def test_infer_table_properties():  #table, rows=30, skip_header=False
    tables = get_table_group_by_hash(EXAMPLE_HASH,
                                     vectorization_type="lemmatize")
    joined_table = join_tables(tables)
    properties = infer_table_properties(joined_table)
    assert properties[1] == "http://dbpedia.org/ontology/type"
Exemplo n.º 11
0
def test_infer_porn_actors():
    tables = get_table_group_by_hash(PORN_ACTORS_HASH,
                                     vectorization_type="lemmatize")
    joined_table = join_tables(tables)
    category = infer_table_class_by_category(joined_table, skip_header=True)
    assert category == b'living_people'
Exemplo n.º 12
0
def test_infer():
    tables = get_table_group_by_hash(PROBLEMATIC_HASH,
                                     vectorization_type="lemmatize")
    joined_table = join_tables(tables)
    _class = infer_table_class(joined_table, rows=5, skip_header=True)
    assert _class == "aegean_sea"  #This is wrong
Exemplo n.º 13
0
def test_join_tables():
    table = join_tables(TEST_TABLES)
    assert table == DEDUPLICATED_TABLE