def my_test_3(): """ Try passing multiple index definitions in table definition. Should pass. """ print_test_separator("Starting my_test_3") print( "##### Try passing multiple index definitions in table definition. Should pass.\n" ) cleanup() cat = CSVCatalog.CSVCatalog() cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text")) ids = [] ## TODO test this? # ids.append(CSVCatalog.IndexDefinition("PRIMARY", "PRIMARY", ["playerID"])) # ids.append(CSVCatalog.IndexDefinition("my_idx", "INDEX", ["playerID"])) ids.append(CSVCatalog.IndexDefinition("PRIMARY", "PRIMARY", ["playerID"])) ids.append( CSVCatalog.IndexDefinition("my_idx", "INDEX", ["nameLast", "nameFirst"])) t = cat.create_table("people", "data/People.csv", cds, ids) print("People table", json.dumps(t.describe_table(), indent=2)) print("\n##### Expected result: pass") print_test_separator("Complete my_test_3")
def test_create_table_3_fail(): """ Creates a table that includes several column definitions. This test should fail because one of the defined columns is not in the underlying CSV file. :return: """ print_test_separator("Starting test_create_table_3_fail") cleanup() cat = CSVCatalog.CSVCatalog('localhost', 3306, 'my_catalog', 'dbuser', 'dbuser', debug_mode=None) cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text")) cds.append(CSVCatalog.ColumnDefinition("canary")) try: t = cat.create_table("people", "../data/People.csv", cds) print_test_separator("FAILURE test_create_table_3") print("People table", t.describe_table()) except Exception as e: print("Exception e = ", e) print_test_separator("Complete test_create_table_3_fail successfully")
def my_test_4(): """ Try add_column_definition if the column already exists. This should fail. :return: """ print_test_separator("Starting my_test_4") cleanup() cat = CSVCatalog.CSVCatalog() cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text")) t = cat.create_table("people", "data/People.csv", cds) print("People table", json.dumps(t.describe_table(), indent=2)) print("\nTry to add a duplicate column playerID:") try: c = CSVCatalog.ColumnDefinition("playerID") t.add_column_definition(c) except Exception as e: print("Exception e = ", e) print("\n##### Expected result: fail") print_test_separator("Complete my_test_4")
def my_test_1(): """ Test add_column_definition. Should pass. :return: """ print_start("my_test_1", "Test add_column_definition. Should pass.") cleanup() cat = CSVCatalog.CSVCatalog() cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("teamID", "text", True)) cds.append( CSVCatalog.ColumnDefinition("yearID", column_type="text", not_null=True)) cds.append( CSVCatalog.ColumnDefinition("stint", column_type="number", not_null=True)) t = cat.create_table("batting", "data/Batting.csv", cds) print("Before adding another column:\n", json.dumps(t.describe_table(), indent=2)) new_cd = CSVCatalog.ColumnDefinition("AB", column_type="number", not_null=False) t.add_column_definition(new_cd) print("\nAfter adding another column:\n", json.dumps(t.describe_table(), indent=2)) print_end("my_test_1", "pass")
def test_create_table_3_fail(): """ Creates a table that includes several column definitions. This test should fail because one of the defined columns is not in the underlying CSV file. :return: """ print_test_separator("Starting test_create_table_3_fail") cleanup() cat = CSVCatalog.CSVCatalog() cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text")) cds.append(CSVCatalog.ColumnDefinition("canary")) try: t = cat.create_table( "people", "/Users/donaldferguson/Dropbox/ColumbiaCourse/Courses/Fall2018/W4111-Projects/CSVDB/Data/core/People.csv", cds) print_test_separator("FAILURE test_create_table_3") print("People table", json.dumps(t.describe_table(), indent=2)) except Exception as e: print("Exception e = ", e) print_test_separator("Complete test_create_table_3_fail successfully")
def test_table_9_fail(): """ Simple create of table definition. No columns or indexes. :return: """ cleanup() print_test_separator("Starting test_table_9") cat = CSVCatalog.CSVCatalog() try: t = cat.create_table( "teams", "../data/Teams.csv") print("Teams table", json.dumps( t.describe_table(), indent = 2)) t.add_column_definition(CSVCatalog.ColumnDefinition("Rank", "number")) t.add_column_definition(CSVCatalog.ColumnDefinition("AB", "number")) t.add_column_definition(CSVCatalog.ColumnDefinition("teamID", "text")) t.drop_column_definition("Rank") print("Teams table", json.dumps(t.describe_table(), indent = 2)) t.define_primary_key(["teamID"]) print("Teams table", json.dumps(t.describe_table(), indent = 2)) t.define_index("data1", ["AB", "HB"], "INDEX") # should raise an error -403 here since there's no "HB" in column definitions except Exception as e: print("Exception e = ", e) print_test_separator("SUCCESS test_table_9 should fail.") print_test_separator("Complete test_table_9")
def test_to_json(): try: print("\n*********** Testing to JSON. *******************\n") cat = CSVCatalog.CSVCatalog() cat.drop_table("teams") cds = [] cds.append(CSVCatalog.ColumnDefinition('teamID', 'text', True)) cds.append(CSVCatalog.ColumnDefinition('yearID', 'text', True)) cds.append(CSVCatalog.ColumnDefinition('W', column_type='number')) tbl = CSVCatalog.TableDefinition( "teams", "data/Teams.csv", column_definitions=cds, cnx=cat.cnx) # maybe this is inproper? r = json.dumps(tbl.to_json(), indent=2) print("Teams definition = \n", r) with open("unit_tests_catalog_json.txt", "w") as result_file: result_file.write(r) print("\n\n") except Exception as e: print( "My implementation throws a custom exception. You can print any meaningful error you want." ) print("Could not create table. Exception = ", e) print("\n*********** Testing to JSON. *******************\n")
def test_table_10_fail(): """ Simple create of table definition. No columns or indexes. :return: """ cleanup() print_test_separator("Starting test_table_10") cat = CSVCatalog.CSVCatalog() try: t = cat.create_table( "teams", "../data/Teams.csv") print("Teams table", json.dumps(t.describe_table(), indent = 2)) t.add_column_definition(CSVCatalog.ColumnDefinition("Rank", "number")) t.add_column_definition(CSVCatalog.ColumnDefinition("AB", "number")) t.add_column_definition(CSVCatalog.ColumnDefinition("CG", "number")) t.add_column_definition(CSVCatalog.ColumnDefinition("teamID", "text", not_null=True)) t.drop_column_definition("Rank") print("Teams table", json.dumps(t.describe_table(), indent = 2)) t.define_primary_key(["teamID"]) print("Teams table", json.dumps(t.describe_table(), indent = 2)) t.define_index("data1", ["AB"], "INDEX") t.define_index("data2", ["CG"]) print("Teams table", json.dumps(t.describe_table(), indent = 2)) t.drop_column_definition("AB") print("Teams table", json.dumps(t.describe_table(), indent = 2)) t.drop_index("data1") # should raise an error -403 here: because we dropped "AB", the "data1" index should disappear except Exception as e: print("Exception e = ", e) print_test_separator("SUCCESS test_table_10 should fail.") print_test_separator("Complete test_table_10")
def test_index_def_to_json(): print("\n*********** Testing index def to JSON. *******************\n") cds = [] cds.append(CSVCatalog.ColumnDefinition('teamID', 'text', True)) cds.append(CSVCatalog.ColumnDefinition('yearID', 'text', True)) ind = CSVCatalog.IndexDefinition('INDEX_teamID_yearID', cds, "PRIMARY"); print(ind.to_json());
def test(): b = [] #a = CSVCatalog.ColumnDefinition("playerID", "text", True) b.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) b.append(CSVCatalog.ColumnDefinition("playerID", "number", True)) #CSVCatalog.ColumnDefinition("playerID", "text", True).to_json() #print(type(b[0])) return b
def test_table_def_to_json(): print("\n*********** Testing table def to JSON. *******************\n") cds = list() cds.append(CSVCatalog.ColumnDefinition('teamID', 'text', True)) cds.append(CSVCatalog.ColumnDefinition('yearID', 'text', True)) cds.append(CSVCatalog.ColumnDefinition('AB', 'number')) tbl = CSVCatalog.TableDefinition( "batting", "../data/Batting.csv", column_definitions=cds) print(tbl.to_json())
def test_join_optimizable_4(optimize=False): """ Calling this with optimize=True turns on optimizations in the JOIN code. :return: """ cleanup() print_test_separator("Starting test_optimizable_4, optimize = " + str(optimize)) cat = CSVCatalog.CSVCatalog() cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("H", "number", True)) cds.append(CSVCatalog.ColumnDefinition("AB", column_type="number")) cds.append(CSVCatalog.ColumnDefinition("teamID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("yearID", "text", True)) cds.append( CSVCatalog.ColumnDefinition("stint", column_type="number", not_null=True)) t = cat.create_table("batting", data_dir + "Batting.csv", cds) t.define_index("tid_idx", ['teamID'], "INDEX") print("Batting table metadata = \n", json.dumps(t.describe_table(), indent=2)) batting_tbl = CSVTable.CSVTable("batting") print("Loaded batting table = \n", batting_tbl) cds = [] cds.append(CSVCatalog.ColumnDefinition("yearID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("teamID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("lgID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) p = cat.create_table("appearances", data_dir + "Appearances.csv", cds) print("Appearances table metadata = \n", json.dumps(p.describe_table(), indent=2)) appearances_tbl = CSVTable.CSVTable("appearances") print("Loaded appearances table = \n", appearances_tbl) start_time = time.time() join_result = batting_tbl.join(appearances_tbl, ['playerID', 'teamID'], {"teamID": "SEA"}, optimize=optimize) end_time = time.time() print("Result = \n", join_result) elapsed_time = end_time - start_time print("\n\nElapsed time = ", elapsed_time) print_test_separator("Complete test_join_optimizable_4")
def test_create_table_5(): """ Modifies a preexisting/precreated table definition. :return: """ print_test_separator("Starting test_create_table_5") # DO NOT CALL CLEANUP. Want to access preexisting table. cat = CSVCatalog.CSVCatalog() t = cat.get_table("batting") print("Initial status of table = \n", json.dumps(t.describe_table(), indent=2)) t.add_column_definition(CSVCatalog.ColumnDefinition("HR", "number")) t.add_column_definition(CSVCatalog.ColumnDefinition("G", "number")) t.define_index("team_year_idx", ['teamID', 'yearID'], "INDEX") print("Modified status of table = \n", json.dumps(t.describe_table(), indent=2)) print_test_separator("Success test_create_table_5")
def test_create_table_3(): """ Creates a table that includes several column definitions. :return: """ print_test_separator("Starting test_create_table_3") cleanup() cat = CSVCatalog.CSVCatalog() cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text")) t = cat.create_table("people", "../data/People.csv", cds) print("People table", json.dumps(t.describe_table(), indent=2)) print_test_separator("Complete test_create_table_3")
def test_catalog_create(): catalog = CSVCatalog.CSVCatalog('localhost', 3306, 'my_catalog', 'dbuser', 'dbuser', debug_mode=None) catalog.drop_table("People") player = CSVCatalog.ColumnDefinition('playerID', 'text', True); name_first = CSVCatalog.ColumnDefinition('nameFirst', 'text', True); birth_year = CSVCatalog.ColumnDefinition('birthYear', 'number'); name_last = CSVCatalog.ColumnDefinition('nameLast', 'text', True) columns = list() columns.append(player) columns.append(name_first) columns.append(birth_year) print("\n*********** Testing catalog create/drop table. *******************\n") re = catalog.create_table("People", "../data/People.csv", columns, None) print(re.to_json()) # catalog.drop_table("PeopleSmall") print("\n*********** Testing catalog table. *******************\n") table = catalog.get_table("People"); print(table.to_json()) print("\n*********** Testing catalog add column. *******************\n") table.add_column_definition(name_last) print(table.to_json()) print("\n*********** Testing catalog drop column. *******************\n") table.drop_column_definition("birthYear") print(table.to_json()) print("\n*********** Testing catalog add primary key. *******************\n") table.define_primary_key(['playerID']); print(table.to_json()) print("\n*********** Testing catalog add index. *******************\n") table.define_index("People_nameFirst_nameLast_index", ["nameFirst","nameLast"], "INDEX"); print(table.to_json()) print("\n*********** Testing catalog drop index. *******************\n") table.drop_index("People_nameFirst_nameLast_index"); print(table.to_json());
def test_create_table_3(): """ Creates a table that includes several column definitions. :return: """ print_test_separator("Starting test_create_table_3") cleanup() cat = CSVCatalog.CSVCatalog() cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text")) t = cat.create_table( "people", "/Users/donaldferguson/Dropbox/ColumbiaCourse/Courses/Fall2018/W4111-Projects/CSVDB/Data/core/People.csv", cds) print("People table", json.dumps(t.describe_table(), indent=2)) print_test_separator("Complete test_create_table_3")
def test_table_7_fail(): """ Simple create of table definition. No columns or indexes. :return: """ cleanup() print_test_separator("Starting test_table_7") cat = CSVCatalog.CSVCatalog() try: t = cat.create_table( "teams", "../data/Teams.csv") print("Teams table", json.dumps(t.describe_table(), indent = 2)) t.add_column_definition(CSVCatalog.ColumnDefinition("Rank", "number")) t.add_column_definition(CSVCatalog.ColumnDefinition("AB", "number")) t.add_column_definition(CSVCatalog.ColumnDefinition("playerID", "text")) # should raise an error -100 here since there's no "playerID" in "Teams" except Exception as e: print("Exception e = ", e) print_test_separator("SUCCESS test_table_7 should fail.") print_test_separator("Complete test_table_7")
def test_create_table_5(): """ Modifies a preexisting/precreated table definition. :return: """ print_test_separator("Starting test_create_table_5") # DO NOT CALL CLEANUP. Want to access preexisting table. cat = CSVCatalog.CSVCatalog('localhost', 3306, 'my_catalog', 'dbuser', 'dbuser', debug_mode=None) t = cat.get_table("batting") print("Initial status of table = \n", t.describe_table()) t.add_column_definition(CSVCatalog.ColumnDefinition("HR", "number")) t.add_column_definition(CSVCatalog.ColumnDefinition("G", "number")) t.define_index("team_year_idx", ['teamID', 'yearID'], "INDEX") print("Modified status of table = \n", t.describe_table()) print_test_separator("Success test_create_table_5")
def test_create_table_4_fail(): """ Creates a table that includes several column definitions and a primary key. The primary key references an undefined column, which is an error. NOTE: You should check for other errors. You do not need to check in the CSV file for uniqueness but should test other possible failures. :return: """ print_test_separator("Starting test_create_table_4_fail") cleanup() cat = CSVCatalog.CSVCatalog() cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("teamID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("yearID", column_type="text", not_null=True)) cds.append(CSVCatalog.ColumnDefinition("stint", column_type="number", not_null=True)) cds.append(CSVCatalog.ColumnDefinition("H", column_type="number", not_null=False)) cds.append(CSVCatalog.ColumnDefinition("AB", column_type="number", not_null=False)) t = cat.create_table("batting", "/Users/Yan/Desktop/hw3/data/Batting.csv", cds) try: t.define_primary_key(['playerID', 'teamID', 'yearID', 'HR']) print("Batting table", json.dumps(t.describe_table(), indent=2)) print_test_separator("FAILURES test_create_table_4_fail") except Exception as e: print("Exception e = ", e) print_test_separator("SUCCESS test_create_table_4_fail should fail.")
def test_find_by_template(optimize=False): cleanup() print_test_separator("Starting test_optimizable_2, optimize = " + str(optimize)) cat = CSVCatalog.CSVCatalog('localhost', 3306, db_name, user_name, pwd, debug_mode=None) cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text")) cds.append(CSVCatalog.ColumnDefinition("birthCity", "text")) cds.append(CSVCatalog.ColumnDefinition("birthCountry", "text")) cds.append(CSVCatalog.ColumnDefinition("throws", column_type="text")) t = cat.create_table( "people", data_dir + "People.csv", cds) print("People table metadata = \n", t.describe_table()) t.define_index("nameLast_idx", ['nameLast'], "INDEX") people_tbl = CSVTable.CSVTable("people") print("Loaded people table = \n", people_tbl) tmp = {"nameLast": "Williams"} result = people_tbl.find_by_template(tmp); print("result = \n", len(result));
def test_find_by_template_index_batting(): print_test_separator("Starting test_find_by_template_index_batting") cleanup() cat = CSVCatalog.CSVCatalog() cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("teamID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("yearID", column_type="text", not_null=True)) cds.append(CSVCatalog.ColumnDefinition("stint", column_type="number", not_null=True)) cds.append(CSVCatalog.ColumnDefinition("H", column_type="number", not_null=False)) cds.append(CSVCatalog.ColumnDefinition("AB", column_type="number", not_null=False)) t = cat.create_table("batting", '/Users/apple/Desktop/HW3/src/Data/Batting.csv', cds) t.define_index("team_year_idx", ['teamID', 'yearID'], "INDEX") batting_tbl = CSVTable.CSVTable("batting") start_time = time.time() tmp = {'playerID':'willite01','yearID':'1939','stint': '1','teamID':'BOS'} fields = ['playerID','yearID','stint','teamID'] result = batting_tbl.find_by_template(tmp,fields) print("Result = \n",json.dumps(result,indent=2)) print("table:\n") print_out(result) end_time = time.time() elapsed_time = end_time - start_time print("\nElapsed time to execute queries = ",elapsed_time) print_test_separator("Starting test_find_by_template_index_batting")
def test_create_table_5_prep(): """ Creates a table that includes several column definitions and a primary key. :return: """ print_test_separator("Starting test_create_table_5_prep") cleanup() cat = CSVCatalog.CSVCatalog() cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("teamID", "text", True)) cds.append( CSVCatalog.ColumnDefinition("yearID", column_type="text", not_null=True)) cds.append( CSVCatalog.ColumnDefinition("stint", column_type="number", not_null=True)) cds.append( CSVCatalog.ColumnDefinition("H", column_type="number", not_null=False)) cds.append( CSVCatalog.ColumnDefinition("AB", column_type="number", not_null=False)) t = cat.create_table("batting", "../data/Batting.csv", cds) t.define_primary_key(['playerID', 'teamID', 'yearID', 'stint']) print("Batting table", json.dumps(t.describe_table(), indent=2)) print_test_separator("Completed test_create_table_5_prep")
def test_create_table_4(): """ Creates a table that includes several column definitions. :return: """ print_test_separator("Starting test_create_table_4") cleanup() cat = CSVCatalog.CSVCatalog() cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("teamID", "text", True)) cds.append( CSVCatalog.ColumnDefinition("yearID", column_type="text", not_null=True)) cds.append( CSVCatalog.ColumnDefinition("stint", column_type="number", not_null=True)) cds.append( CSVCatalog.ColumnDefinition("H", column_type="number", not_null=False)) cds.append( CSVCatalog.ColumnDefinition("AB", column_type="number", not_null=False)) t = cat.create_table( "batting", "/Users/donaldferguson/Dropbox/ColumbiaCourse/Courses/Fall2018/W4111-Projects/CSVDB/Data/core/Batting.csv", cds) t.define_primary_key(['playerID', 'teamID', 'yearID', 'stint']) print("People table", json.dumps(t.describe_table(), indent=2)) print_test_separator("Complete test_create_table_4")
def test_table_6(): """ Simple create of table definition. No columns or indexes. :return: """ cleanup() print_test_separator("Starting test_table_6") cat = CSVCatalog.CSVCatalog() t = cat.create_table( "teams", "../data/Teams.csv") print("Test1: \n Teams table", json.dumps(t.describe_table(), indent = 2), "\n\n") # The output should be: Teams table {"definition": {"name": "teams", "path": "../data/Teams.csv"}, "columns": [], "indexes": {}} t.add_column_definition(CSVCatalog.ColumnDefinition("Rank", "number")) t.add_column_definition(CSVCatalog.ColumnDefinition("AB", "number")) t.add_column_definition(CSVCatalog.ColumnDefinition("CG", "number")) t.add_column_definition(CSVCatalog.ColumnDefinition("teamID", "text", not_null=True)) t.drop_column_definition("Rank") print("Test2: \n Teams table", json.dumps(t.describe_table(), indent = 2), "\n\n") # The output should have "AB" and "teamID"'s column definitions t.define_primary_key(["teamID"]) print("Test3: \n Teams table", json.dumps(t.describe_table(), indent = 2), "\n\n") # The output should have a "PRIMARY" index now t.define_index("data1", ["AB"], "INDEX") # The output should have a "data1" index now t.define_index("data2", ["CG"]) # The output should have a "data2" index now print("Test4: \n Teams table", json.dumps(t.describe_table(), indent = 2), "\n\n") # Test the index definitions t.drop_column_definition("AB") print("Test5: \n Teams table", json.dumps(t.describe_table(), indent = 2), "\n\n") # Test the column and index definitions, column "AB" and index "data1" should disappear test_create_table_1() p = CSVCatalog.CSVCatalog().get_table("People") p.add_column_definition(CSVCatalog.ColumnDefinition("playerID", "TEXT", not_null=True)) p.define_primary_key(["playerID"]) print("Test6: \n People table", json.dumps(p.describe_table(), indent = 2), "\n\n") # Test reloading an existing table and altering the existing table t.drop_index("data2") print("Test7: \n Teams table", json.dumps(t.describe_table(), indent = 2), "\n\n") # Test dropping an existing index, the index "data2" should disappear print_test_separator("Complete test_table_6")
def test_create_table_3(): """ Creates a table that includes several column definitions. :return: """ print_test_separator("Starting test_create_table_3") cleanup() cat = CSVCatalog.CSVCatalog('localhost', 3306, 'my_catalog', 'dbuser', 'dbuser', debug_mode=None) cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text")) t = cat.create_table("people", "../data/People.csv", cds) print("People table", t.describe_table()) print_test_separator("Complete test_create_table_3")
def test_drop_index(): print_test_separator("Starting test_drop_index_5") # DO NOT CALL CLEANUP. Want to access preexisting table. cat = CSVCatalog.CSVCatalog() t = cat.get_table("batting") print("Initial status of table = \n", json.dumps(t.describe_table(), indent=2)) t.drop_column_(CSVCatalog.ColumnDefinition("G", "number")) print("Modified status of table = \n", json.dumps(t.describe_table(), indent=2)) print_test_separator("Success test_drop_index_5")
def test_find_by_template(): cleanup() print_test_separator("Starting test_find_by_template") cat = CSVCatalog.CSVCatalog() cds = [] cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text")) cds.append(CSVCatalog.ColumnDefinition("birthCity", "text")) cds.append(CSVCatalog.ColumnDefinition("birthCountry", "text")) cds.append(CSVCatalog.ColumnDefinition("throws", column_type="text")) t = cat.create_table("people", "/Users/Yan/Desktop/hw3/data/People.csv", cds) t.define_index("id_idx", "INDEX", ['nameLast']) print("People table metadata = \n", json.dumps(t.describe_table(), indent=2)) people_tbl = CSVDataTable.CSVTable("people") print("Loaded people table = \n", people_tbl) tries = 1000 start_time = time.time() templ = {"nameLast": "Williams"} print("Starting test on find using indexed field, templ = ", json.dumps(templ)) for i in range(0, tries): result = people_tbl.find_by_template( templ, ['playerID', 'nameLast', 'nameFirst']) if i == 0: print("Sample result = ", json.dumps(result)) end_time = time.time() print("Elapsed time for ", tries, "lookups = ", end_time - start_time) tries = 1000 start_time = time.time() templ = {"nameFirst": "Ted"} print("\n\nStarting test on find using NON-indexed field, tmpl = ", json.dumps(templ)) for i in range(0, tries): result = people_tbl.find_by_template( templ, ['playerID', 'nameLast', 'nameFirst']) if i == 0: print("Sample result = ", json.dumps(result)) end_time = time.time() print("Elapsed time for ", tries, "lookups = ", end_time - start_time) print_test_separator("Complete test_find_by_template")
def test_select_with_index_3(): """ :return: """ print_test_separator("Starting test_select_with_index_3") cleanup() cat = CSVCatalog.CSVCatalog() cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("H", "number", True)) cds.append(CSVCatalog.ColumnDefinition("AB", column_type="number")) cds.append(CSVCatalog.ColumnDefinition("teamID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("yearID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("stint", column_type="number", not_null=True)) t = cat.create_table( "batting", data_dir + "Batting.csv", cds) t.define_index("playerID", ['playerID'], 'INDEX') # t.define_index("teamID_yearID", ['teamID', 'yearID'], "INDEX") print("Batting table metadata = \n", json.dumps(t.describe_table(), indent=2)) batting_tbl = CSVTable.CSVTable("batting") print("Loaded batting table = \n", batting_tbl) start_time = time.time() select_result = batting_tbl.find_by_template({'playerID': 'willste01'}) end_time = time.time() print("Result = \n", json.dumps(select_result, indent=2)) elapsed_time = end_time - start_time print("\n\nElapsed time = ", elapsed_time) print_test_separator("Complete test_select_with_index_3")
def test_select_no_index_1(): """ :return: """ print_test_separator("Starting test_select_no_index_1") cleanup() cat = CSVCatalog.CSVCatalog() cds = [] cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text")) cds.append(CSVCatalog.ColumnDefinition("birthCity", "text")) cds.append(CSVCatalog.ColumnDefinition("birthCountry", "text")) cds.append(CSVCatalog.ColumnDefinition("throws", column_type="text")) t = cat.create_table( "people", data_dir + "People.csv", cds) print("People table metadata = \n", json.dumps(t.describe_table(), indent=2)) people_tbl = CSVTable.CSVTable("people") print("Loaded people table = \n", people_tbl) start_time = time.time() select_result = people_tbl.find_by_template({'nameLast': 'Williams', 'nameFirst': 'Ted'}) end_time = time.time() print("Result = \n", select_result) elapsed_time = end_time - start_time print("\n\nElapsed time = ", elapsed_time) print_test_separator("Complete test_select_no_index_1")
def test_create_table_4(): """ Creates a table that includes several column definitions. :return: """ print_test_separator("Starting test_create_table_4") cleanup() cat = CSVCatalog.CSVCatalog('localhost', 3306, 'my_catalog', 'dbuser', 'dbuser', debug_mode=None) cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("teamID", "text", True)) cds.append( CSVCatalog.ColumnDefinition("yearID", column_type="text", not_null=True)) cds.append( CSVCatalog.ColumnDefinition("stint", column_type="number", not_null=True)) cds.append( CSVCatalog.ColumnDefinition("H", column_type="number", not_null=False)) cds.append( CSVCatalog.ColumnDefinition("AB", column_type="number", not_null=False)) t = cat.create_table("batting", "../data/Batting.csv", cds) t.define_primary_key(['playerID', 'teamID', 'yearID', 'stint']) print("People table", t.describe_table()) print_test_separator("Complete test_create_table_4")