def my_test_4():
    """
    Try add_column_definition if the column already exists. This should fail.

    :return:
    """
    print_test_separator("Starting my_test_4")
    cleanup()
    cat = CSVCatalog.CSVCatalog()

    cds = []
    cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text"))

    t = cat.create_table("people", "data/People.csv", cds)
    print("People table", json.dumps(t.describe_table(), indent=2))
    print("\nTry to add a duplicate column playerID:")
    try:
        c = CSVCatalog.ColumnDefinition("playerID")
        t.add_column_definition(c)
    except Exception as e:
        print("Exception e = ", e)
    print("\n##### Expected result: fail")
    print_test_separator("Complete my_test_4")
def test_create_table_3_fail():
    """
    Creates a table that includes several column definitions. This test should fail because one of the defined
    columns is not in the underlying CSV file.
    :return:
    """
    print_test_separator("Starting test_create_table_3_fail")
    cleanup()
    cat = CSVCatalog.CSVCatalog('localhost',
                                3306,
                                'my_catalog',
                                'dbuser',
                                'dbuser',
                                debug_mode=None)

    cds = []
    cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text"))
    cds.append(CSVCatalog.ColumnDefinition("canary"))

    try:
        t = cat.create_table("people", "../data/People.csv", cds)
        print_test_separator("FAILURE test_create_table_3")
        print("People table", t.describe_table())
    except Exception as e:
        print("Exception e = ", e)
        print_test_separator("Complete test_create_table_3_fail successfully")
def test_create_table_4():
    """
        Creates a table that includes several column definitions.
        :return:
        """
    print_test_separator("Starting test_create_table_4")
    cleanup()
    cat = CSVCatalog.CSVCatalog()

    cds = []
    cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("teamID", "text", True))
    cds.append(
        CSVCatalog.ColumnDefinition("yearID",
                                    column_type="text",
                                    not_null=True))
    cds.append(
        CSVCatalog.ColumnDefinition("stint",
                                    column_type="number",
                                    not_null=True))
    cds.append(
        CSVCatalog.ColumnDefinition("H", column_type="number", not_null=False))
    cds.append(
        CSVCatalog.ColumnDefinition("AB", column_type="number",
                                    not_null=False))

    t = cat.create_table(
        "batting",
        "/Users/donaldferguson/Dropbox/ColumbiaCourse/Courses/Fall2018/W4111-Projects/CSVDB/Data/core/Batting.csv",
        cds)

    t.define_primary_key(['playerID', 'teamID', 'yearID', 'stint'])
    print("People table", json.dumps(t.describe_table(), indent=2))
    print_test_separator("Complete test_create_table_4")
def test_create_table_3_fail():
    """
    Creates a table that includes several column definitions. This test should fail because one of the defined
    columns is not in the underlying CSV file.
    :return:
    """
    print_test_separator("Starting test_create_table_3_fail")
    cleanup()
    cat = CSVCatalog.CSVCatalog()

    cds = []
    cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text"))
    cds.append(CSVCatalog.ColumnDefinition("canary"))

    try:
        t = cat.create_table(
            "people",
            "/Users/donaldferguson/Dropbox/ColumbiaCourse/Courses/Fall2018/W4111-Projects/CSVDB/Data/core/People.csv",
            cds)
        print_test_separator("FAILURE test_create_table_3")
        print("People table", json.dumps(t.describe_table(), indent=2))
    except Exception as e:
        print("Exception e = ", e)
        print_test_separator("Complete test_create_table_3_fail successfully")
def my_test_3():
    """
    Try passing multiple index definitions in table definition. Should pass.
    """
    print_test_separator("Starting my_test_3")
    print(
        "##### Try passing multiple index definitions in table definition. Should pass.\n"
    )
    cleanup()

    cat = CSVCatalog.CSVCatalog()
    cds = []
    cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text"))

    ids = []
    ## TODO test this?
    # ids.append(CSVCatalog.IndexDefinition("PRIMARY", "PRIMARY", ["playerID"]))
    # ids.append(CSVCatalog.IndexDefinition("my_idx", "INDEX", ["playerID"]))
    ids.append(CSVCatalog.IndexDefinition("PRIMARY", "PRIMARY", ["playerID"]))
    ids.append(
        CSVCatalog.IndexDefinition("my_idx", "INDEX",
                                   ["nameLast", "nameFirst"]))
    t = cat.create_table("people", "data/People.csv", cds, ids)
    print("People table", json.dumps(t.describe_table(), indent=2))
    print("\n##### Expected result: pass")
    print_test_separator("Complete my_test_3")
def test_find_by_template_index_batting():
    print_test_separator("Starting test_find_by_template_index_batting")

    cleanup()

    cat = CSVCatalog.CSVCatalog()

    cds = []
    cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("teamID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("yearID", column_type="text", not_null=True))
    cds.append(CSVCatalog.ColumnDefinition("stint", column_type="number", not_null=True))
    cds.append(CSVCatalog.ColumnDefinition("H", column_type="number", not_null=False))
    cds.append(CSVCatalog.ColumnDefinition("AB", column_type="number", not_null=False))

    t = cat.create_table("batting",
                         '/Users/apple/Desktop/HW3/src/Data/Batting.csv',
                         cds)
    t.define_index("team_year_idx", ['teamID', 'yearID'], "INDEX")
    batting_tbl = CSVTable.CSVTable("batting")

    start_time = time.time()

    tmp = {'playerID':'willite01','yearID':'1939','stint': '1','teamID':'BOS'}
    fields = ['playerID','yearID','stint','teamID']

    result = batting_tbl.find_by_template(tmp,fields)
    print("Result = \n",json.dumps(result,indent=2))
    print("table:\n")
    print_out(result)
    end_time = time.time()

    elapsed_time = end_time - start_time
    print("\nElapsed time to execute queries = ",elapsed_time)
    print_test_separator("Starting test_find_by_template_index_batting")
Exemplo n.º 7
0
def test_create_table_4_fail():
    """
    Creates a table that includes several column definitions and a primary key.
    The primary key references an undefined column, which is an error.
    NOTE: You should check for other errors. You do not need to check in the CSV file for uniqueness but
    should test other possible failures.
    :return:
    """
    print_test_separator("Starting test_create_table_4_fail")
    cleanup()
    cat = CSVCatalog.CSVCatalog()

    cds = []
    cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("teamID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("yearID", column_type="text", not_null=True))
    cds.append(CSVCatalog.ColumnDefinition("stint", column_type="number", not_null=True))
    cds.append(CSVCatalog.ColumnDefinition("H", column_type="number", not_null=False))
    cds.append(CSVCatalog.ColumnDefinition("AB", column_type="number", not_null=False))


    t = cat.create_table("batting",
                         "/Users/Yan/Desktop/hw3/data/Batting.csv",
                         cds)
    try:
        t.define_primary_key(['playerID', 'teamID', 'yearID', 'HR'])
        print("Batting table", json.dumps(t.describe_table(), indent=2))
        print_test_separator("FAILURES test_create_table_4_fail")
    except Exception as e:
        print("Exception e = ", e)
        print_test_separator("SUCCESS test_create_table_4_fail should fail.")
Exemplo n.º 8
0
def test_table_9_fail():
    """
    Simple create of table definition. No columns or indexes.
    :return:
    """
    cleanup()
    print_test_separator("Starting test_table_9")
    cat = CSVCatalog.CSVCatalog()
    try:
        t = cat.create_table(
            "teams",
            "../data/Teams.csv")
        print("Teams table", json.dumps(
            t.describe_table(), indent = 2))
        t.add_column_definition(CSVCatalog.ColumnDefinition("Rank", "number"))
        t.add_column_definition(CSVCatalog.ColumnDefinition("AB", "number"))
        t.add_column_definition(CSVCatalog.ColumnDefinition("teamID", "text"))
        t.drop_column_definition("Rank")
        print("Teams table", json.dumps(t.describe_table(), indent = 2))
        t.define_primary_key(["teamID"])
        print("Teams table", json.dumps(t.describe_table(), indent = 2))
        t.define_index("data1", ["AB", "HB"], "INDEX")  # should raise an error -403 here since there's no "HB" in column definitions
    except Exception as e:
        print("Exception e = ", e)
        print_test_separator("SUCCESS test_table_9 should fail.")
    print_test_separator("Complete test_table_9")
def test_to_json():
    try:
        print("\n*********** Testing to JSON. *******************\n")
        cat = CSVCatalog.CSVCatalog()
        cat.drop_table("teams")

        cds = []
        cds.append(CSVCatalog.ColumnDefinition('teamID', 'text', True))
        cds.append(CSVCatalog.ColumnDefinition('yearID', 'text', True))
        cds.append(CSVCatalog.ColumnDefinition('W', column_type='number'))

        tbl = CSVCatalog.TableDefinition(
            "teams", "data/Teams.csv", column_definitions=cds,
            cnx=cat.cnx)  # maybe this is inproper?
        r = json.dumps(tbl.to_json(), indent=2)
        print("Teams definition = \n", r)
        with open("unit_tests_catalog_json.txt", "w") as result_file:
            result_file.write(r)

        print("\n\n")
    except Exception as e:
        print(
            "My implementation throws a custom exception. You can print any meaningful error you want."
        )
        print("Could not create table. Exception = ", e)
        print("\n*********** Testing to JSON. *******************\n")
def test3():
    print_test_separator("Starting test_create_table_1")
    cat = CSVCatalog.CSVCatalog()
    t = cat.create_table(
        "people",
        "/Users/Jinyang_Li/Dropbox/ColumbiaCourse/Courses/Fall2018/W4111/Data/People.csv"
    )
def define_tables():
    cleanup()
    cat = CSVCatalog.CSVCatalog()
    
    cds = []
    cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text"))
    cds.append(CSVCatalog.ColumnDefinition("birthCity", column_type="text"))
    cds.append(CSVCatalog.ColumnDefinition("birthCountry", column_type="text"))
    cds.append(CSVCatalog.ColumnDefinition("throws", column_type="text"))

    
    #DDL:
    t = cat.create_table("people",
                         '/Users/apple/Desktop/HW3/src/Data/People.csv',cds)
    
    cds = []
    cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("H", column_type="number", not_null=True))
    cds.append(CSVCatalog.ColumnDefinition("AB", column_type="number"))
    cds.append(CSVCatalog.ColumnDefinition("teamID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("yearID", column_type="text", not_null=True))
    cds.append(CSVCatalog.ColumnDefinition("stint", column_type="number", not_null=True))
    
    t = cat.create_table("batting",
                         '/Users/apple/Desktop/HW3/src/Data/Batting.csv',cds)
def my_test_1():
    """
    Test add_column_definition. Should pass.

    :return:
    """
    print_start("my_test_1", "Test add_column_definition. Should pass.")
    cleanup()
    cat = CSVCatalog.CSVCatalog()

    cds = []
    cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("teamID", "text", True))
    cds.append(
        CSVCatalog.ColumnDefinition("yearID",
                                    column_type="text",
                                    not_null=True))
    cds.append(
        CSVCatalog.ColumnDefinition("stint",
                                    column_type="number",
                                    not_null=True))

    t = cat.create_table("batting", "data/Batting.csv", cds)
    print("Before adding another column:\n",
          json.dumps(t.describe_table(), indent=2))
    new_cd = CSVCatalog.ColumnDefinition("AB",
                                         column_type="number",
                                         not_null=False)
    t.add_column_definition(new_cd)
    print("\nAfter adding another column:\n",
          json.dumps(t.describe_table(), indent=2))
    print_end("my_test_1", "pass")
Exemplo n.º 13
0
def test_create_table_5_prep():
    """
    Creates a table that includes several column definitions and a primary key.
    :return:
    """
    print_test_separator("Starting test_create_table_5_prep")
    cleanup()
    cat = CSVCatalog.CSVCatalog()

    cds = []
    cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("teamID", "text", True))
    cds.append(
        CSVCatalog.ColumnDefinition("yearID",
                                    column_type="text",
                                    not_null=True))
    cds.append(
        CSVCatalog.ColumnDefinition("stint",
                                    column_type="number",
                                    not_null=True))
    cds.append(
        CSVCatalog.ColumnDefinition("H", column_type="number", not_null=False))
    cds.append(
        CSVCatalog.ColumnDefinition("AB", column_type="number",
                                    not_null=False))

    t = cat.create_table("batting", "../data/Batting.csv", cds)

    t.define_primary_key(['playerID', 'teamID', 'yearID', 'stint'])
    print("Batting table", json.dumps(t.describe_table(), indent=2))

    print_test_separator("Completed test_create_table_5_prep")
Exemplo n.º 14
0
def test_find_by_template(optimize=False):
    cleanup()
    print_test_separator("Starting test_optimizable_2, optimize = " + str(optimize))

    cat = CSVCatalog.CSVCatalog('localhost', 3306, db_name, user_name, pwd, debug_mode=None)

    cds = []
    cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text"))
    cds.append(CSVCatalog.ColumnDefinition("birthCity", "text"))
    cds.append(CSVCatalog.ColumnDefinition("birthCountry", "text"))
    cds.append(CSVCatalog.ColumnDefinition("throws", column_type="text"))

    t = cat.create_table(
        "people",
        data_dir + "People.csv",
        cds)
    print("People table metadata = \n", t.describe_table())

    t.define_index("nameLast_idx", ['nameLast'], "INDEX")

    people_tbl = CSVTable.CSVTable("people")
    print("Loaded people table = \n", people_tbl)

    tmp = {"nameLast": "Williams"}
    result = people_tbl.find_by_template(tmp);
    print("result = \n", len(result));
Exemplo n.º 15
0
def test_join_optimizable_3(optimize=False):
    """
    :return:
    """
    cleanup()
    print_test_separator("Starting test_optimizable_3, optimize = " +
                         str(optimize))

    cat = CSVCatalog.CSVCatalog()
    cds = []

    cds = []
    cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text"))
    cds.append(CSVCatalog.ColumnDefinition("birthCity", "text"))
    cds.append(CSVCatalog.ColumnDefinition("birthCountry", "text"))
    cds.append(CSVCatalog.ColumnDefinition("throws", column_type="text"))

    t = cat.create_table("people", "/Users/Yan/Desktop/hw3/data/People.csv",
                         cds)
    t.define_index("pid_idx", "INDEX", ['playerID'])
    print("People table metadata = \n", json.dumps(t.describe_table(),
                                                   indent=2))

    cds = []
    cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("H", "number", True))
    cds.append(CSVCatalog.ColumnDefinition("AB", column_type="number"))
    cds.append(CSVCatalog.ColumnDefinition("teamID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("yearID", "text", True))
    cds.append(
        CSVCatalog.ColumnDefinition("stint",
                                    column_type="number",
                                    not_null=True))

    t = cat.create_table("batting", "/Users/Yan/Desktop/hw3/data/Batting.csv",
                         cds)
    print("Batting table metadata = \n",
          json.dumps(t.describe_table(), indent=2))
    t.define_index("pid_idx", "INDEX", ['playerID'])

    people_tbl = CSVDataTable.CSVTable("people")
    batting_tbl = CSVDataTable.CSVTable("batting")

    print("Loaded people table = \n", people_tbl)
    print("Loaded batting table = \n", batting_tbl)

    start_time = time.time()

    tmp = {"playerID": "willite01"}
    join_result = people_tbl.join(batting_tbl, ['playerID'], tmp)

    end_time = time.time()

    print("Result = \n", join_result)
    elapsed_time = end_time - start_time
    print("\n\nElapsed time = ", elapsed_time)

    print_test_separator("Complete test_join_optimizable_3")
Exemplo n.º 16
0
def test_table_10_fail():
    """
    Simple create of table definition. No columns or indexes.
    :return:
    """
    cleanup()
    print_test_separator("Starting test_table_10")
    cat = CSVCatalog.CSVCatalog()
    try:
        t = cat.create_table(
            "teams",
            "../data/Teams.csv")
        print("Teams table", json.dumps(t.describe_table(), indent = 2))
        t.add_column_definition(CSVCatalog.ColumnDefinition("Rank", "number"))
        t.add_column_definition(CSVCatalog.ColumnDefinition("AB", "number"))
        t.add_column_definition(CSVCatalog.ColumnDefinition("CG", "number"))
        t.add_column_definition(CSVCatalog.ColumnDefinition("teamID", "text", not_null=True))
        t.drop_column_definition("Rank")
        print("Teams table", json.dumps(t.describe_table(), indent = 2))
        t.define_primary_key(["teamID"])
        print("Teams table", json.dumps(t.describe_table(), indent = 2))
        t.define_index("data1", ["AB"], "INDEX")
        t.define_index("data2", ["CG"])
        print("Teams table", json.dumps(t.describe_table(), indent = 2))
        t.drop_column_definition("AB")
        print("Teams table", json.dumps(t.describe_table(), indent = 2))
        t.drop_index("data1")             # should raise an error -403 here: because we dropped "AB", the "data1" index should disappear
    except Exception as e:
        print("Exception e = ", e)
        print_test_separator("SUCCESS test_table_10 should fail.")
    print_test_separator("Complete test_table_10")
def test_join_not_optimized(optimize=False):
    """

    :return:
    """

    print_test_separator("Starting test_optimizable_1, optimize = " + str(optimize))
    print("\n\nDude. This takes 30 minutes. Trust me.\n\n")
    return

    cleanup()
    print_test_separator("Starting test_optimizable_1, optimize = " + str(optimize))

    cat = CSVCatalog.CSVCatalog()
    cds = []

    cds = []
    cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text"))
    cds.append(CSVCatalog.ColumnDefinition("birthCity", "text"))
    cds.append(CSVCatalog.ColumnDefinition("birthCountry", "text"))
    cds.append(CSVCatalog.ColumnDefinition("throws", column_type="text"))

    t = cat.create_table(
        "people",
        data_dir + "People.csv",
        cds)
    print("People table metadata = \n", json.dumps(t.describe_table(), indent=2))

    cds = []
    cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("H", "number", True))
    cds.append(CSVCatalog.ColumnDefinition("AB", column_type="number"))
    cds.append(CSVCatalog.ColumnDefinition("teamID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("yearID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("stint", column_type="number", not_null=True))

    t = cat.create_table(
        "batting",
        data_dir + "Batting.csv",
        cds)
    print("Batting table metadata = \n", json.dumps(t.describe_table(), indent=2))

    people_tbl = CSVTable.CSVTable("people")
    batting_tbl = CSVTable.CSVTable("batting")

    start_time = time.time()

    tmp = { "playerID": "willite01"}
    join_result = people_tbl.join(batting_tbl,['playerID'], tmp, optimize=optimize)

    end_time = time.time()

    print("Result = \n", json.dumps(join_result,indent=2))
    elapsed_time = end_time - start_time
    print("\n\nElapsed time = ", elapsed_time)

    print_test_separator("Complete test_join_optimizable")
Exemplo n.º 18
0
def test_join_optimizable_3(optimize=False):
    """

    :return:
    """
    cleanup()
    print_test_separator("Starting test_optimizable_3, optimize = " + str(optimize))

    cat = CSVCatalog.CSVCatalog('localhost', 3306, db_name, user_name, pwd, debug_mode=None)

    cds = []
    cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text"))
    cds.append(CSVCatalog.ColumnDefinition("birthCity", "text"))
    cds.append(CSVCatalog.ColumnDefinition("birthCountry", "text"))
    cds.append(CSVCatalog.ColumnDefinition("throws", column_type="text"))

    t = cat.create_table(
        "people",
        data_dir + "People.csv",
        cds)
    t.define_index("pid_idx", ['playerID'], "INDEX")
    print("People table metadata = \n", t.describe_table())

    cds = []
    cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("H", "number", True))
    cds.append(CSVCatalog.ColumnDefinition("AB", column_type="number"))
    cds.append(CSVCatalog.ColumnDefinition("teamID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("yearID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("stint", column_type="number", not_null=True))

    t = cat.create_table(
        "batting",
        data_dir + "Batting.csv",
        cds)
    print("Batting table metadata = \n", t.describe_table())
    t.define_index("pid_idx", ['playerID'], "INDEX")

    people_tbl = CSVTable.CSVTable("people")
    batting_tbl = CSVTable.CSVTable("batting")

    print("Loaded people table = \n", people_tbl)
    print("Loaded batting table = \n", batting_tbl)

    start_time = time.time()

    tmp = {"playerID": "willite01"}
    join_result = people_tbl.join(batting_tbl, ['playerID'], tmp, optimize=optimize)

    end_time = time.time()

    print("Result = \n", json.dumps(join_result, indent=2))
    print("Rows count=", len(join_result))
    elapsed_time = end_time - start_time
    print("\n\nElapsed time = ", elapsed_time)

    print_test_separator("Complete test_join_optimizable_3")
def test_join_optimizable_2(optimize=False):
    """
    Calling this with optimize=True turns on optimizations in the JOIN code.
    :return:
    """
    cleanup()
    print_test_separator("Starting test_optimizable_2, optimize = " +
                         str(optimize))

    cat = CSVCatalog.CSVCatalog()
    cds = []

    cds = []
    cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text"))
    cds.append(CSVCatalog.ColumnDefinition("birthCity", "text"))
    cds.append(CSVCatalog.ColumnDefinition("birthCountry", "text"))
    cds.append(CSVCatalog.ColumnDefinition("throws", column_type="text"))

    t = cat.create_table("people", data_dir + "People.csv", cds)
    print("People table metadata = \n", json.dumps(t.describe_table(),
                                                   indent=2))
    t.define_index("pid_idx", ['playerID'], "INDEX")

    cds = []
    cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("H", "number", True))
    cds.append(CSVCatalog.ColumnDefinition("AB", column_type="number"))
    cds.append(CSVCatalog.ColumnDefinition("teamID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("yearID", "text", True))
    cds.append(
        CSVCatalog.ColumnDefinition("stint",
                                    column_type="number",
                                    not_null=True))

    t = cat.create_table("batting", data_dir + "Batting.csv", cds)
    print("Batting table metadata = \n",
          json.dumps(t.describe_table(), indent=2))

    people_tbl = CSVTable.CSVTable("people")
    batting_tbl = CSVTable.CSVTable("batting")

    print("Loaded people table = \n", people_tbl)
    print("Loaded batting table = \n", batting_tbl)

    start_time = time.time()

    join_result = people_tbl.join(batting_tbl, ['playerID'],
                                  None,
                                  optimize=optimize)

    end_time = time.time()

    print("Result = \n", join_result)
    elapsed_time = end_time - start_time
    print("\n\nElapsed time = ", elapsed_time)

    print_test_separator("Complete test_join_optimizable_2")
Exemplo n.º 20
0
 def __load_info__(self):
     """
     Loads metadata from catalog and sets __description__ to hold the information.
     :return:
     """
     cat = CSVCatalog.CSVCatalog()
     t = cat.get_table(self.__table_name__)
     self.__description__ = t.describe_table()
def test_create_table_1():
    cleanup()
    print_test_separator("Starting test_create_table_1")
    cat = CSVCatalog.CSVCatalog()
    t = cat.create_table("people",
                         "/Users/apple/Desktop/HW3/src/Data/People.csv")
    print("People table", json.dumps(t.describe_table()))
    print_test_separator("Complete test_create_table_1")
def test_join_optimizable_2_op(optimize=False):
    '''
    I used limit=15 here, because I just want to show the first 15 records and the whole table is so big
    '''
    
    cleanup()
    print_test_separator("Starting test_join_optimizable_2_op, optimize = " + str(optimize))

    cat = CSVCatalog.CSVCatalog()
    cds = []

    cds = []
    cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text"))
    cds.append(CSVCatalog.ColumnDefinition("birthCity", "text"))
    cds.append(CSVCatalog.ColumnDefinition("birthCountry", "text"))
    cds.append(CSVCatalog.ColumnDefinition("throws", column_type="text"))

    t = cat.create_table(
        "people",
        data_dir + "People.csv",
        cds)
    
    print("People table metadata = \n", json.dumps(t.describe_table(), indent=2))
    t.define_index("pid_idx", ['playerID'],"INDEX")
    cds = []
    cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("H", "number", True))
    cds.append(CSVCatalog.ColumnDefinition("AB", column_type="number"))
    cds.append(CSVCatalog.ColumnDefinition("teamID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("yearID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("stint", column_type="number", not_null=True))

    t = cat.create_table(
        "batting",
        data_dir + "Batting.csv",
        cds)
    print("Batting table metadata = \n", json.dumps(t.describe_table(), indent=2))
    t.define_index("pid_idx", ['playerID'],"INDEX")

    people_tbl = CSVTable.CSVTable("people")
    batting_tbl = CSVTable.CSVTable("batting")


    start_time = time.time()

    tmp = {"playerID": "willite01"}
    join_result = people_tbl.join(batting_tbl,['playerID'], None, optimize=optimize)

    end_time = time.time()
    
    print('table = \n')
    print_out(join_result,limit=15)
    elapsed_time = end_time - start_time
    print("\n\nElapsed time = ", elapsed_time)

    print_test_separator("Complete test_join_optimizable_2_op")
Exemplo n.º 23
0
def cleanup():
    """
    Deletes previously created information to enable re-running tests.
    :return: None
    """
    cat = CSVCatalog.CSVCatalog()
    cat.drop_table("people")
    cat.drop_table("batting")
    cat.drop_table("teams")
def test4_drop():
    cat = CSVCatalog.CSVCatalog()
    t = cat.get_table("people")
    #print("t: ",t.describe_table())
    print("Initial status of table = \n",
          json.dumps(t.describe_table_test(), indent=2))
    t = cat.drop_table('people')
    print("Initial status of table = \n",
          json.dumps(t.describe_table_test(), indent=2))
Exemplo n.º 25
0
def cleanup():
    """
    Deletes previously created information to enable re-running tests.
    :return: None
    """
    cat = CSVCatalog.CSVCatalog('localhost', 3306, db_name, user_name, pwd, debug_mode=None)
    cat.drop_table("people")
    cat.drop_table("batting")
    cat.drop_table("teams")
Exemplo n.º 26
0
def test_create_table_1():
    """
    Simple create of table definition. No columns or indexes.
    :return:
    """
    cat = CSVCatalog.CSVCatalog()
    t = cat.create_table(
        "people",
        "../data/People.csv")
Exemplo n.º 27
0
def test_create_table_1():
    """
    Simple create of table definition. No columns or indexes.
    :return:
    """
    cleanup()
    print_test_separator("Starting test_create_table_1")
    cat = CSVCatalog.CSVCatalog()
    t = cat.create_table("people", "../Data/People.csv")
    print("People table", json.dumps(t.describe_table(), indent=2))
    print_test_separator("Complete test_create_table_1")
Exemplo n.º 28
0
def test_table_6():
    """
    Simple create of table definition. No columns or indexes.
    :return:
    """
    cleanup()
    print_test_separator("Starting test_table_6")
    cat = CSVCatalog.CSVCatalog()
    t = cat.create_table(
        "teams",
        "../data/Teams.csv")
    print("Test1: \n Teams table", json.dumps(t.describe_table(), indent = 2), "\n\n")    # The output should be: Teams table {"definition": {"name": "teams", "path": "../data/Teams.csv"}, "columns": [], "indexes": {}}

    t.add_column_definition(CSVCatalog.ColumnDefinition("Rank", "number"))
    t.add_column_definition(CSVCatalog.ColumnDefinition("AB", "number"))
    t.add_column_definition(CSVCatalog.ColumnDefinition("CG", "number"))
    t.add_column_definition(CSVCatalog.ColumnDefinition("teamID", "text", not_null=True))
    t.drop_column_definition("Rank")
    print("Test2: \n Teams table", json.dumps(t.describe_table(), indent = 2), "\n\n")    # The output should have "AB" and "teamID"'s column definitions

    t.define_primary_key(["teamID"])
    print("Test3: \n Teams table", json.dumps(t.describe_table(), indent = 2), "\n\n")    # The output should have a "PRIMARY" index now

    t.define_index("data1", ["AB"], "INDEX")                # The output should have a "data1" index now
    t.define_index("data2", ["CG"])                         # The output should have a "data2" index now
    print("Test4: \n Teams table", json.dumps(t.describe_table(), indent = 2), "\n\n")    # Test the index definitions

    t.drop_column_definition("AB")
    print("Test5: \n Teams table", json.dumps(t.describe_table(), indent = 2), "\n\n")    # Test the column and index definitions, column "AB" and index "data1" should disappear

    test_create_table_1()
    p = CSVCatalog.CSVCatalog().get_table("People")
    p.add_column_definition(CSVCatalog.ColumnDefinition("playerID", "TEXT", not_null=True))
    p.define_primary_key(["playerID"])
    print("Test6: \n People table", json.dumps(p.describe_table(), indent = 2), "\n\n")   # Test reloading an existing table and altering the existing table

    t.drop_index("data2")
    print("Test7: \n Teams table", json.dumps(t.describe_table(), indent = 2), "\n\n")    # Test dropping an existing index, the index "data2" should disappear

    print_test_separator("Complete test_table_6")
Exemplo n.º 29
0
def test_find_by_template():

    cleanup()
    print_test_separator("Starting test_find_by_template")

    cat = CSVCatalog.CSVCatalog()
    cds = []

    cds = []
    cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text"))
    cds.append(CSVCatalog.ColumnDefinition("birthCity", "text"))
    cds.append(CSVCatalog.ColumnDefinition("birthCountry", "text"))
    cds.append(CSVCatalog.ColumnDefinition("throws", column_type="text"))

    t = cat.create_table("people", "/Users/Yan/Desktop/hw3/data/People.csv",
                         cds)
    t.define_index("id_idx", "INDEX", ['nameLast'])
    print("People table metadata = \n", json.dumps(t.describe_table(),
                                                   indent=2))

    people_tbl = CSVDataTable.CSVTable("people")
    print("Loaded people table = \n", people_tbl)

    tries = 1000
    start_time = time.time()
    templ = {"nameLast": "Williams"}
    print("Starting test on find using indexed field, templ = ",
          json.dumps(templ))
    for i in range(0, tries):
        result = people_tbl.find_by_template(
            templ, ['playerID', 'nameLast', 'nameFirst'])
        if i == 0:
            print("Sample result = ", json.dumps(result))
    end_time = time.time()
    print("Elapsed time for ", tries, "lookups = ", end_time - start_time)

    tries = 1000
    start_time = time.time()
    templ = {"nameFirst": "Ted"}
    print("\n\nStarting test on find using NON-indexed field, tmpl = ",
          json.dumps(templ))
    for i in range(0, tries):
        result = people_tbl.find_by_template(
            templ, ['playerID', 'nameLast', 'nameFirst'])
        if i == 0:
            print("Sample result = ", json.dumps(result))
    end_time = time.time()
    print("Elapsed time for ", tries, "lookups = ", end_time - start_time)

    print_test_separator("Complete test_find_by_template")
def test_drop_index():
    print_test_separator("Starting test_drop_index_5")

    # DO NOT CALL CLEANUP. Want to access preexisting table.
    cat = CSVCatalog.CSVCatalog()
    t = cat.get_table("batting")
    print("Initial status of table = \n",
          json.dumps(t.describe_table(), indent=2))

    t.drop_column_(CSVCatalog.ColumnDefinition("G", "number"))
    print("Modified status of table = \n",
          json.dumps(t.describe_table(), indent=2))
    print_test_separator("Success test_drop_index_5")