def my_test_3():
    """
    Try passing multiple index definitions in table definition. Should pass.
    """
    print_test_separator("Starting my_test_3")
    print(
        "##### Try passing multiple index definitions in table definition. Should pass.\n"
    )
    cleanup()

    cat = CSVCatalog.CSVCatalog()
    cds = []
    cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text"))

    ids = []
    ## TODO test this?
    # ids.append(CSVCatalog.IndexDefinition("PRIMARY", "PRIMARY", ["playerID"]))
    # ids.append(CSVCatalog.IndexDefinition("my_idx", "INDEX", ["playerID"]))
    ids.append(CSVCatalog.IndexDefinition("PRIMARY", "PRIMARY", ["playerID"]))
    ids.append(
        CSVCatalog.IndexDefinition("my_idx", "INDEX",
                                   ["nameLast", "nameFirst"]))
    t = cat.create_table("people", "data/People.csv", cds, ids)
    print("People table", json.dumps(t.describe_table(), indent=2))
    print("\n##### Expected result: pass")
    print_test_separator("Complete my_test_3")
def test_create_table_3_fail():
    """
    Creates a table that includes several column definitions. This test should fail because one of the defined
    columns is not in the underlying CSV file.
    :return:
    """
    print_test_separator("Starting test_create_table_3_fail")
    cleanup()
    cat = CSVCatalog.CSVCatalog('localhost',
                                3306,
                                'my_catalog',
                                'dbuser',
                                'dbuser',
                                debug_mode=None)

    cds = []
    cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text"))
    cds.append(CSVCatalog.ColumnDefinition("canary"))

    try:
        t = cat.create_table("people", "../data/People.csv", cds)
        print_test_separator("FAILURE test_create_table_3")
        print("People table", t.describe_table())
    except Exception as e:
        print("Exception e = ", e)
        print_test_separator("Complete test_create_table_3_fail successfully")
def my_test_4():
    """
    Try add_column_definition if the column already exists. This should fail.

    :return:
    """
    print_test_separator("Starting my_test_4")
    cleanup()
    cat = CSVCatalog.CSVCatalog()

    cds = []
    cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text"))

    t = cat.create_table("people", "data/People.csv", cds)
    print("People table", json.dumps(t.describe_table(), indent=2))
    print("\nTry to add a duplicate column playerID:")
    try:
        c = CSVCatalog.ColumnDefinition("playerID")
        t.add_column_definition(c)
    except Exception as e:
        print("Exception e = ", e)
    print("\n##### Expected result: fail")
    print_test_separator("Complete my_test_4")
def my_test_1():
    """
    Test add_column_definition. Should pass.

    :return:
    """
    print_start("my_test_1", "Test add_column_definition. Should pass.")
    cleanup()
    cat = CSVCatalog.CSVCatalog()

    cds = []
    cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("teamID", "text", True))
    cds.append(
        CSVCatalog.ColumnDefinition("yearID",
                                    column_type="text",
                                    not_null=True))
    cds.append(
        CSVCatalog.ColumnDefinition("stint",
                                    column_type="number",
                                    not_null=True))

    t = cat.create_table("batting", "data/Batting.csv", cds)
    print("Before adding another column:\n",
          json.dumps(t.describe_table(), indent=2))
    new_cd = CSVCatalog.ColumnDefinition("AB",
                                         column_type="number",
                                         not_null=False)
    t.add_column_definition(new_cd)
    print("\nAfter adding another column:\n",
          json.dumps(t.describe_table(), indent=2))
    print_end("my_test_1", "pass")
def test_create_table_3_fail():
    """
    Creates a table that includes several column definitions. This test should fail because one of the defined
    columns is not in the underlying CSV file.
    :return:
    """
    print_test_separator("Starting test_create_table_3_fail")
    cleanup()
    cat = CSVCatalog.CSVCatalog()

    cds = []
    cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text"))
    cds.append(CSVCatalog.ColumnDefinition("canary"))

    try:
        t = cat.create_table(
            "people",
            "/Users/donaldferguson/Dropbox/ColumbiaCourse/Courses/Fall2018/W4111-Projects/CSVDB/Data/core/People.csv",
            cds)
        print_test_separator("FAILURE test_create_table_3")
        print("People table", json.dumps(t.describe_table(), indent=2))
    except Exception as e:
        print("Exception e = ", e)
        print_test_separator("Complete test_create_table_3_fail successfully")
示例#6
0
def test_table_9_fail():
    """
    Simple create of table definition. No columns or indexes.
    :return:
    """
    cleanup()
    print_test_separator("Starting test_table_9")
    cat = CSVCatalog.CSVCatalog()
    try:
        t = cat.create_table(
            "teams",
            "../data/Teams.csv")
        print("Teams table", json.dumps(
            t.describe_table(), indent = 2))
        t.add_column_definition(CSVCatalog.ColumnDefinition("Rank", "number"))
        t.add_column_definition(CSVCatalog.ColumnDefinition("AB", "number"))
        t.add_column_definition(CSVCatalog.ColumnDefinition("teamID", "text"))
        t.drop_column_definition("Rank")
        print("Teams table", json.dumps(t.describe_table(), indent = 2))
        t.define_primary_key(["teamID"])
        print("Teams table", json.dumps(t.describe_table(), indent = 2))
        t.define_index("data1", ["AB", "HB"], "INDEX")  # should raise an error -403 here since there's no "HB" in column definitions
    except Exception as e:
        print("Exception e = ", e)
        print_test_separator("SUCCESS test_table_9 should fail.")
    print_test_separator("Complete test_table_9")
def test_to_json():
    try:
        print("\n*********** Testing to JSON. *******************\n")
        cat = CSVCatalog.CSVCatalog()
        cat.drop_table("teams")

        cds = []
        cds.append(CSVCatalog.ColumnDefinition('teamID', 'text', True))
        cds.append(CSVCatalog.ColumnDefinition('yearID', 'text', True))
        cds.append(CSVCatalog.ColumnDefinition('W', column_type='number'))

        tbl = CSVCatalog.TableDefinition(
            "teams", "data/Teams.csv", column_definitions=cds,
            cnx=cat.cnx)  # maybe this is inproper?
        r = json.dumps(tbl.to_json(), indent=2)
        print("Teams definition = \n", r)
        with open("unit_tests_catalog_json.txt", "w") as result_file:
            result_file.write(r)

        print("\n\n")
    except Exception as e:
        print(
            "My implementation throws a custom exception. You can print any meaningful error you want."
        )
        print("Could not create table. Exception = ", e)
        print("\n*********** Testing to JSON. *******************\n")
示例#8
0
def test_table_10_fail():
    """
    Simple create of table definition. No columns or indexes.
    :return:
    """
    cleanup()
    print_test_separator("Starting test_table_10")
    cat = CSVCatalog.CSVCatalog()
    try:
        t = cat.create_table(
            "teams",
            "../data/Teams.csv")
        print("Teams table", json.dumps(t.describe_table(), indent = 2))
        t.add_column_definition(CSVCatalog.ColumnDefinition("Rank", "number"))
        t.add_column_definition(CSVCatalog.ColumnDefinition("AB", "number"))
        t.add_column_definition(CSVCatalog.ColumnDefinition("CG", "number"))
        t.add_column_definition(CSVCatalog.ColumnDefinition("teamID", "text", not_null=True))
        t.drop_column_definition("Rank")
        print("Teams table", json.dumps(t.describe_table(), indent = 2))
        t.define_primary_key(["teamID"])
        print("Teams table", json.dumps(t.describe_table(), indent = 2))
        t.define_index("data1", ["AB"], "INDEX")
        t.define_index("data2", ["CG"])
        print("Teams table", json.dumps(t.describe_table(), indent = 2))
        t.drop_column_definition("AB")
        print("Teams table", json.dumps(t.describe_table(), indent = 2))
        t.drop_index("data1")             # should raise an error -403 here: because we dropped "AB", the "data1" index should disappear
    except Exception as e:
        print("Exception e = ", e)
        print_test_separator("SUCCESS test_table_10 should fail.")
    print_test_separator("Complete test_table_10")
def test_index_def_to_json():
    print("\n*********** Testing index def to JSON. *******************\n")

    cds = []
    cds.append(CSVCatalog.ColumnDefinition('teamID', 'text', True))
    cds.append(CSVCatalog.ColumnDefinition('yearID', 'text', True))
    ind = CSVCatalog.IndexDefinition('INDEX_teamID_yearID', cds, "PRIMARY");
    print(ind.to_json());
def test():
    b = []
    #a =  CSVCatalog.ColumnDefinition("playerID", "text", True)
    b.append(CSVCatalog.ColumnDefinition("playerID", "text", True))
    b.append(CSVCatalog.ColumnDefinition("playerID", "number", True))
    #CSVCatalog.ColumnDefinition("playerID", "text", True).to_json()
    #print(type(b[0]))
    return b
def test_table_def_to_json():
    print("\n*********** Testing table def to JSON. *******************\n")
    cds = list()
    cds.append(CSVCatalog.ColumnDefinition('teamID', 'text', True))
    cds.append(CSVCatalog.ColumnDefinition('yearID', 'text', True))
    cds.append(CSVCatalog.ColumnDefinition('AB', 'number'))

    tbl = CSVCatalog.TableDefinition(
        "batting",
        "../data/Batting.csv",
        column_definitions=cds)
    print(tbl.to_json())
def test_join_optimizable_4(optimize=False):
    """
    Calling this with optimize=True turns on optimizations in the JOIN code.
    :return:
    """
    cleanup()
    print_test_separator("Starting test_optimizable_4, optimize = " +
                         str(optimize))

    cat = CSVCatalog.CSVCatalog()

    cds = []
    cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("H", "number", True))
    cds.append(CSVCatalog.ColumnDefinition("AB", column_type="number"))
    cds.append(CSVCatalog.ColumnDefinition("teamID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("yearID", "text", True))
    cds.append(
        CSVCatalog.ColumnDefinition("stint",
                                    column_type="number",
                                    not_null=True))
    t = cat.create_table("batting", data_dir + "Batting.csv", cds)
    t.define_index("tid_idx", ['teamID'], "INDEX")
    print("Batting table metadata = \n",
          json.dumps(t.describe_table(), indent=2))
    batting_tbl = CSVTable.CSVTable("batting")
    print("Loaded batting table = \n", batting_tbl)

    cds = []
    cds.append(CSVCatalog.ColumnDefinition("yearID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("teamID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("lgID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True))
    p = cat.create_table("appearances", data_dir + "Appearances.csv", cds)
    print("Appearances table metadata = \n",
          json.dumps(p.describe_table(), indent=2))
    appearances_tbl = CSVTable.CSVTable("appearances")
    print("Loaded appearances table = \n", appearances_tbl)

    start_time = time.time()
    join_result = batting_tbl.join(appearances_tbl, ['playerID', 'teamID'],
                                   {"teamID": "SEA"},
                                   optimize=optimize)
    end_time = time.time()
    print("Result = \n", join_result)
    elapsed_time = end_time - start_time
    print("\n\nElapsed time = ", elapsed_time)

    print_test_separator("Complete test_join_optimizable_4")
示例#13
0
def test_create_table_5():
    """
    Modifies a preexisting/precreated table definition.
    :return:
    """
    print_test_separator("Starting test_create_table_5")

    # DO NOT CALL CLEANUP. Want to access preexisting table.
    cat = CSVCatalog.CSVCatalog()
    t = cat.get_table("batting")
    print("Initial status of table = \n", json.dumps(t.describe_table(), indent=2))
    t.add_column_definition(CSVCatalog.ColumnDefinition("HR", "number"))
    t.add_column_definition(CSVCatalog.ColumnDefinition("G", "number"))
    t.define_index("team_year_idx", ['teamID', 'yearID'], "INDEX")
    print("Modified status of table = \n", json.dumps(t.describe_table(), indent=2))
    print_test_separator("Success test_create_table_5")
def test_create_table_3():
    """
    Creates a table that includes several column definitions.
    :return:
    """
    print_test_separator("Starting test_create_table_3")
    cleanup()
    cat = CSVCatalog.CSVCatalog()

    cds = []
    cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text"))

    t = cat.create_table("people", "../data/People.csv", cds)
    print("People table", json.dumps(t.describe_table(), indent=2))
    print_test_separator("Complete test_create_table_3")
def test_catalog_create():
    catalog = CSVCatalog.CSVCatalog('localhost', 3306, 'my_catalog', 'dbuser', 'dbuser', debug_mode=None)
    catalog.drop_table("People")

    player = CSVCatalog.ColumnDefinition('playerID', 'text', True);
    name_first = CSVCatalog.ColumnDefinition('nameFirst', 'text', True);
    birth_year = CSVCatalog.ColumnDefinition('birthYear', 'number');
    name_last = CSVCatalog.ColumnDefinition('nameLast', 'text', True)

    columns = list()
    columns.append(player)
    columns.append(name_first)
    columns.append(birth_year)

    print("\n*********** Testing catalog create/drop table. *******************\n")
    re = catalog.create_table("People", "../data/People.csv", columns, None)
    print(re.to_json())
    # catalog.drop_table("PeopleSmall")

    print("\n*********** Testing catalog table. *******************\n")
    table = catalog.get_table("People");
    print(table.to_json())

    print("\n*********** Testing catalog add column. *******************\n")
    table.add_column_definition(name_last)
    print(table.to_json())

    print("\n*********** Testing catalog drop column. *******************\n")
    table.drop_column_definition("birthYear")
    print(table.to_json())

    print("\n*********** Testing catalog add primary key. *******************\n")
    table.define_primary_key(['playerID']);
    print(table.to_json())

    print("\n*********** Testing catalog add index. *******************\n")
    table.define_index("People_nameFirst_nameLast_index", ["nameFirst","nameLast"], "INDEX");
    print(table.to_json())

    print("\n*********** Testing catalog drop index. *******************\n")
    table.drop_index("People_nameFirst_nameLast_index");
    print(table.to_json());
def test_create_table_3():
    """
    Creates a table that includes several column definitions.
    :return:
    """
    print_test_separator("Starting test_create_table_3")
    cleanup()
    cat = CSVCatalog.CSVCatalog()

    cds = []
    cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text"))

    t = cat.create_table(
        "people",
        "/Users/donaldferguson/Dropbox/ColumbiaCourse/Courses/Fall2018/W4111-Projects/CSVDB/Data/core/People.csv",
        cds)
    print("People table", json.dumps(t.describe_table(), indent=2))
    print_test_separator("Complete test_create_table_3")
示例#17
0
def test_table_7_fail():
    """
    Simple create of table definition. No columns or indexes.
    :return:
    """
    cleanup()
    print_test_separator("Starting test_table_7")
    cat = CSVCatalog.CSVCatalog()
    try:
        t = cat.create_table(
            "teams",
            "../data/Teams.csv")
        print("Teams table", json.dumps(t.describe_table(), indent = 2))
        t.add_column_definition(CSVCatalog.ColumnDefinition("Rank", "number"))
        t.add_column_definition(CSVCatalog.ColumnDefinition("AB", "number"))
        t.add_column_definition(CSVCatalog.ColumnDefinition("playerID", "text"))    # should raise an error -100 here since there's no "playerID" in "Teams"
    except Exception as e:
        print("Exception e = ", e)
        print_test_separator("SUCCESS test_table_7 should fail.")
    print_test_separator("Complete test_table_7")
def test_create_table_5():
    """
    Modifies a preexisting/precreated table definition.
    :return:
    """
    print_test_separator("Starting test_create_table_5")

    # DO NOT CALL CLEANUP. Want to access preexisting table.
    cat = CSVCatalog.CSVCatalog('localhost',
                                3306,
                                'my_catalog',
                                'dbuser',
                                'dbuser',
                                debug_mode=None)
    t = cat.get_table("batting")
    print("Initial status of table = \n", t.describe_table())
    t.add_column_definition(CSVCatalog.ColumnDefinition("HR", "number"))
    t.add_column_definition(CSVCatalog.ColumnDefinition("G", "number"))
    t.define_index("team_year_idx", ['teamID', 'yearID'], "INDEX")
    print("Modified status of table = \n", t.describe_table())
    print_test_separator("Success test_create_table_5")
示例#19
0
def test_create_table_4_fail():
    """
    Creates a table that includes several column definitions and a primary key.
    The primary key references an undefined column, which is an error.
    NOTE: You should check for other errors. You do not need to check in the CSV file for uniqueness but
    should test other possible failures.
    :return:
    """
    print_test_separator("Starting test_create_table_4_fail")
    cleanup()
    cat = CSVCatalog.CSVCatalog()

    cds = []
    cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("teamID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("yearID", column_type="text", not_null=True))
    cds.append(CSVCatalog.ColumnDefinition("stint", column_type="number", not_null=True))
    cds.append(CSVCatalog.ColumnDefinition("H", column_type="number", not_null=False))
    cds.append(CSVCatalog.ColumnDefinition("AB", column_type="number", not_null=False))


    t = cat.create_table("batting",
                         "/Users/Yan/Desktop/hw3/data/Batting.csv",
                         cds)
    try:
        t.define_primary_key(['playerID', 'teamID', 'yearID', 'HR'])
        print("Batting table", json.dumps(t.describe_table(), indent=2))
        print_test_separator("FAILURES test_create_table_4_fail")
    except Exception as e:
        print("Exception e = ", e)
        print_test_separator("SUCCESS test_create_table_4_fail should fail.")
示例#20
0
def test_find_by_template(optimize=False):
    cleanup()
    print_test_separator("Starting test_optimizable_2, optimize = " + str(optimize))

    cat = CSVCatalog.CSVCatalog('localhost', 3306, db_name, user_name, pwd, debug_mode=None)

    cds = []
    cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text"))
    cds.append(CSVCatalog.ColumnDefinition("birthCity", "text"))
    cds.append(CSVCatalog.ColumnDefinition("birthCountry", "text"))
    cds.append(CSVCatalog.ColumnDefinition("throws", column_type="text"))

    t = cat.create_table(
        "people",
        data_dir + "People.csv",
        cds)
    print("People table metadata = \n", t.describe_table())

    t.define_index("nameLast_idx", ['nameLast'], "INDEX")

    people_tbl = CSVTable.CSVTable("people")
    print("Loaded people table = \n", people_tbl)

    tmp = {"nameLast": "Williams"}
    result = people_tbl.find_by_template(tmp);
    print("result = \n", len(result));
def test_find_by_template_index_batting():
    print_test_separator("Starting test_find_by_template_index_batting")

    cleanup()

    cat = CSVCatalog.CSVCatalog()

    cds = []
    cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("teamID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("yearID", column_type="text", not_null=True))
    cds.append(CSVCatalog.ColumnDefinition("stint", column_type="number", not_null=True))
    cds.append(CSVCatalog.ColumnDefinition("H", column_type="number", not_null=False))
    cds.append(CSVCatalog.ColumnDefinition("AB", column_type="number", not_null=False))

    t = cat.create_table("batting",
                         '/Users/apple/Desktop/HW3/src/Data/Batting.csv',
                         cds)
    t.define_index("team_year_idx", ['teamID', 'yearID'], "INDEX")
    batting_tbl = CSVTable.CSVTable("batting")

    start_time = time.time()

    tmp = {'playerID':'willite01','yearID':'1939','stint': '1','teamID':'BOS'}
    fields = ['playerID','yearID','stint','teamID']

    result = batting_tbl.find_by_template(tmp,fields)
    print("Result = \n",json.dumps(result,indent=2))
    print("table:\n")
    print_out(result)
    end_time = time.time()

    elapsed_time = end_time - start_time
    print("\nElapsed time to execute queries = ",elapsed_time)
    print_test_separator("Starting test_find_by_template_index_batting")
示例#22
0
def test_create_table_5_prep():
    """
    Creates a table that includes several column definitions and a primary key.
    :return:
    """
    print_test_separator("Starting test_create_table_5_prep")
    cleanup()
    cat = CSVCatalog.CSVCatalog()

    cds = []
    cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("teamID", "text", True))
    cds.append(
        CSVCatalog.ColumnDefinition("yearID",
                                    column_type="text",
                                    not_null=True))
    cds.append(
        CSVCatalog.ColumnDefinition("stint",
                                    column_type="number",
                                    not_null=True))
    cds.append(
        CSVCatalog.ColumnDefinition("H", column_type="number", not_null=False))
    cds.append(
        CSVCatalog.ColumnDefinition("AB", column_type="number",
                                    not_null=False))

    t = cat.create_table("batting", "../data/Batting.csv", cds)

    t.define_primary_key(['playerID', 'teamID', 'yearID', 'stint'])
    print("Batting table", json.dumps(t.describe_table(), indent=2))

    print_test_separator("Completed test_create_table_5_prep")
def test_create_table_4():
    """
        Creates a table that includes several column definitions.
        :return:
        """
    print_test_separator("Starting test_create_table_4")
    cleanup()
    cat = CSVCatalog.CSVCatalog()

    cds = []
    cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("teamID", "text", True))
    cds.append(
        CSVCatalog.ColumnDefinition("yearID",
                                    column_type="text",
                                    not_null=True))
    cds.append(
        CSVCatalog.ColumnDefinition("stint",
                                    column_type="number",
                                    not_null=True))
    cds.append(
        CSVCatalog.ColumnDefinition("H", column_type="number", not_null=False))
    cds.append(
        CSVCatalog.ColumnDefinition("AB", column_type="number",
                                    not_null=False))

    t = cat.create_table(
        "batting",
        "/Users/donaldferguson/Dropbox/ColumbiaCourse/Courses/Fall2018/W4111-Projects/CSVDB/Data/core/Batting.csv",
        cds)

    t.define_primary_key(['playerID', 'teamID', 'yearID', 'stint'])
    print("People table", json.dumps(t.describe_table(), indent=2))
    print_test_separator("Complete test_create_table_4")
示例#24
0
def test_table_6():
    """
    Simple create of table definition. No columns or indexes.
    :return:
    """
    cleanup()
    print_test_separator("Starting test_table_6")
    cat = CSVCatalog.CSVCatalog()
    t = cat.create_table(
        "teams",
        "../data/Teams.csv")
    print("Test1: \n Teams table", json.dumps(t.describe_table(), indent = 2), "\n\n")    # The output should be: Teams table {"definition": {"name": "teams", "path": "../data/Teams.csv"}, "columns": [], "indexes": {}}

    t.add_column_definition(CSVCatalog.ColumnDefinition("Rank", "number"))
    t.add_column_definition(CSVCatalog.ColumnDefinition("AB", "number"))
    t.add_column_definition(CSVCatalog.ColumnDefinition("CG", "number"))
    t.add_column_definition(CSVCatalog.ColumnDefinition("teamID", "text", not_null=True))
    t.drop_column_definition("Rank")
    print("Test2: \n Teams table", json.dumps(t.describe_table(), indent = 2), "\n\n")    # The output should have "AB" and "teamID"'s column definitions

    t.define_primary_key(["teamID"])
    print("Test3: \n Teams table", json.dumps(t.describe_table(), indent = 2), "\n\n")    # The output should have a "PRIMARY" index now

    t.define_index("data1", ["AB"], "INDEX")                # The output should have a "data1" index now
    t.define_index("data2", ["CG"])                         # The output should have a "data2" index now
    print("Test4: \n Teams table", json.dumps(t.describe_table(), indent = 2), "\n\n")    # Test the index definitions

    t.drop_column_definition("AB")
    print("Test5: \n Teams table", json.dumps(t.describe_table(), indent = 2), "\n\n")    # Test the column and index definitions, column "AB" and index "data1" should disappear

    test_create_table_1()
    p = CSVCatalog.CSVCatalog().get_table("People")
    p.add_column_definition(CSVCatalog.ColumnDefinition("playerID", "TEXT", not_null=True))
    p.define_primary_key(["playerID"])
    print("Test6: \n People table", json.dumps(p.describe_table(), indent = 2), "\n\n")   # Test reloading an existing table and altering the existing table

    t.drop_index("data2")
    print("Test7: \n Teams table", json.dumps(t.describe_table(), indent = 2), "\n\n")    # Test dropping an existing index, the index "data2" should disappear

    print_test_separator("Complete test_table_6")
def test_create_table_3():
    """
    Creates a table that includes several column definitions.
    :return:
    """
    print_test_separator("Starting test_create_table_3")
    cleanup()
    cat = CSVCatalog.CSVCatalog('localhost',
                                3306,
                                'my_catalog',
                                'dbuser',
                                'dbuser',
                                debug_mode=None)

    cds = []
    cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text"))

    t = cat.create_table("people", "../data/People.csv", cds)
    print("People table", t.describe_table())
    print_test_separator("Complete test_create_table_3")
def test_drop_index():
    print_test_separator("Starting test_drop_index_5")

    # DO NOT CALL CLEANUP. Want to access preexisting table.
    cat = CSVCatalog.CSVCatalog()
    t = cat.get_table("batting")
    print("Initial status of table = \n",
          json.dumps(t.describe_table(), indent=2))

    t.drop_column_(CSVCatalog.ColumnDefinition("G", "number"))
    print("Modified status of table = \n",
          json.dumps(t.describe_table(), indent=2))
    print_test_separator("Success test_drop_index_5")
示例#27
0
def test_find_by_template():

    cleanup()
    print_test_separator("Starting test_find_by_template")

    cat = CSVCatalog.CSVCatalog()
    cds = []

    cds = []
    cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text"))
    cds.append(CSVCatalog.ColumnDefinition("birthCity", "text"))
    cds.append(CSVCatalog.ColumnDefinition("birthCountry", "text"))
    cds.append(CSVCatalog.ColumnDefinition("throws", column_type="text"))

    t = cat.create_table("people", "/Users/Yan/Desktop/hw3/data/People.csv",
                         cds)
    t.define_index("id_idx", "INDEX", ['nameLast'])
    print("People table metadata = \n", json.dumps(t.describe_table(),
                                                   indent=2))

    people_tbl = CSVDataTable.CSVTable("people")
    print("Loaded people table = \n", people_tbl)

    tries = 1000
    start_time = time.time()
    templ = {"nameLast": "Williams"}
    print("Starting test on find using indexed field, templ = ",
          json.dumps(templ))
    for i in range(0, tries):
        result = people_tbl.find_by_template(
            templ, ['playerID', 'nameLast', 'nameFirst'])
        if i == 0:
            print("Sample result = ", json.dumps(result))
    end_time = time.time()
    print("Elapsed time for ", tries, "lookups = ", end_time - start_time)

    tries = 1000
    start_time = time.time()
    templ = {"nameFirst": "Ted"}
    print("\n\nStarting test on find using NON-indexed field, tmpl = ",
          json.dumps(templ))
    for i in range(0, tries):
        result = people_tbl.find_by_template(
            templ, ['playerID', 'nameLast', 'nameFirst'])
        if i == 0:
            print("Sample result = ", json.dumps(result))
    end_time = time.time()
    print("Elapsed time for ", tries, "lookups = ", end_time - start_time)

    print_test_separator("Complete test_find_by_template")
示例#28
0
def test_select_with_index_3():
    """

    :return:
    """

    print_test_separator("Starting test_select_with_index_3")

    cleanup()

    cat = CSVCatalog.CSVCatalog()
    cds = []
    cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("H", "number", True))
    cds.append(CSVCatalog.ColumnDefinition("AB", column_type="number"))
    cds.append(CSVCatalog.ColumnDefinition("teamID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("yearID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("stint", column_type="number", not_null=True))

    t = cat.create_table(
        "batting",
        data_dir + "Batting.csv",
        cds)
    t.define_index("playerID", ['playerID'], 'INDEX')
#    t.define_index("teamID_yearID", ['teamID', 'yearID'], "INDEX")
    print("Batting table metadata = \n", json.dumps(t.describe_table(), indent=2))



    batting_tbl = CSVTable.CSVTable("batting")

    print("Loaded batting table = \n", batting_tbl)

    start_time = time.time()

    select_result = batting_tbl.find_by_template({'playerID': 'willste01'})

    end_time = time.time()

    print("Result = \n", json.dumps(select_result, indent=2))
    elapsed_time = end_time - start_time
    print("\n\nElapsed time = ", elapsed_time)

    print_test_separator("Complete test_select_with_index_3")
示例#29
0
def test_select_no_index_1():
    """

    :return:
    """

    print_test_separator("Starting test_select_no_index_1")

    cleanup()

    cat = CSVCatalog.CSVCatalog()
    cds = []

    cds = []
    cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text"))
    cds.append(CSVCatalog.ColumnDefinition("birthCity", "text"))
    cds.append(CSVCatalog.ColumnDefinition("birthCountry", "text"))
    cds.append(CSVCatalog.ColumnDefinition("throws", column_type="text"))

    t = cat.create_table(
        "people",
        data_dir + "People.csv",
        cds)
    print("People table metadata = \n", json.dumps(t.describe_table(), indent=2))


    people_tbl = CSVTable.CSVTable("people")

    print("Loaded people table = \n", people_tbl)

    start_time = time.time()

    select_result = people_tbl.find_by_template({'nameLast': 'Williams', 'nameFirst': 'Ted'})

    end_time = time.time()

    print("Result = \n", select_result)
    elapsed_time = end_time - start_time
    print("\n\nElapsed time = ", elapsed_time)

    print_test_separator("Complete test_select_no_index_1")
def test_create_table_4():
    """
        Creates a table that includes several column definitions.
        :return:
        """
    print_test_separator("Starting test_create_table_4")
    cleanup()
    cat = CSVCatalog.CSVCatalog('localhost',
                                3306,
                                'my_catalog',
                                'dbuser',
                                'dbuser',
                                debug_mode=None)

    cds = []
    cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("teamID", "text", True))
    cds.append(
        CSVCatalog.ColumnDefinition("yearID",
                                    column_type="text",
                                    not_null=True))
    cds.append(
        CSVCatalog.ColumnDefinition("stint",
                                    column_type="number",
                                    not_null=True))
    cds.append(
        CSVCatalog.ColumnDefinition("H", column_type="number", not_null=False))
    cds.append(
        CSVCatalog.ColumnDefinition("AB", column_type="number",
                                    not_null=False))

    t = cat.create_table("batting", "../data/Batting.csv", cds)

    t.define_primary_key(['playerID', 'teamID', 'yearID', 'stint'])
    print("People table", t.describe_table())
    print_test_separator("Complete test_create_table_4")