def my_test_4(): """ Try add_column_definition if the column already exists. This should fail. :return: """ print_test_separator("Starting my_test_4") cleanup() cat = CSVCatalog.CSVCatalog() cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text")) t = cat.create_table("people", "data/People.csv", cds) print("People table", json.dumps(t.describe_table(), indent=2)) print("\nTry to add a duplicate column playerID:") try: c = CSVCatalog.ColumnDefinition("playerID") t.add_column_definition(c) except Exception as e: print("Exception e = ", e) print("\n##### Expected result: fail") print_test_separator("Complete my_test_4")
def test_create_table_3_fail(): """ Creates a table that includes several column definitions. This test should fail because one of the defined columns is not in the underlying CSV file. :return: """ print_test_separator("Starting test_create_table_3_fail") cleanup() cat = CSVCatalog.CSVCatalog('localhost', 3306, 'my_catalog', 'dbuser', 'dbuser', debug_mode=None) cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text")) cds.append(CSVCatalog.ColumnDefinition("canary")) try: t = cat.create_table("people", "../data/People.csv", cds) print_test_separator("FAILURE test_create_table_3") print("People table", t.describe_table()) except Exception as e: print("Exception e = ", e) print_test_separator("Complete test_create_table_3_fail successfully")
def test_create_table_4(): """ Creates a table that includes several column definitions. :return: """ print_test_separator("Starting test_create_table_4") cleanup() cat = CSVCatalog.CSVCatalog() cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("teamID", "text", True)) cds.append( CSVCatalog.ColumnDefinition("yearID", column_type="text", not_null=True)) cds.append( CSVCatalog.ColumnDefinition("stint", column_type="number", not_null=True)) cds.append( CSVCatalog.ColumnDefinition("H", column_type="number", not_null=False)) cds.append( CSVCatalog.ColumnDefinition("AB", column_type="number", not_null=False)) t = cat.create_table( "batting", "/Users/donaldferguson/Dropbox/ColumbiaCourse/Courses/Fall2018/W4111-Projects/CSVDB/Data/core/Batting.csv", cds) t.define_primary_key(['playerID', 'teamID', 'yearID', 'stint']) print("People table", json.dumps(t.describe_table(), indent=2)) print_test_separator("Complete test_create_table_4")
def test_create_table_3_fail(): """ Creates a table that includes several column definitions. This test should fail because one of the defined columns is not in the underlying CSV file. :return: """ print_test_separator("Starting test_create_table_3_fail") cleanup() cat = CSVCatalog.CSVCatalog() cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text")) cds.append(CSVCatalog.ColumnDefinition("canary")) try: t = cat.create_table( "people", "/Users/donaldferguson/Dropbox/ColumbiaCourse/Courses/Fall2018/W4111-Projects/CSVDB/Data/core/People.csv", cds) print_test_separator("FAILURE test_create_table_3") print("People table", json.dumps(t.describe_table(), indent=2)) except Exception as e: print("Exception e = ", e) print_test_separator("Complete test_create_table_3_fail successfully")
def my_test_3(): """ Try passing multiple index definitions in table definition. Should pass. """ print_test_separator("Starting my_test_3") print( "##### Try passing multiple index definitions in table definition. Should pass.\n" ) cleanup() cat = CSVCatalog.CSVCatalog() cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text")) ids = [] ## TODO test this? # ids.append(CSVCatalog.IndexDefinition("PRIMARY", "PRIMARY", ["playerID"])) # ids.append(CSVCatalog.IndexDefinition("my_idx", "INDEX", ["playerID"])) ids.append(CSVCatalog.IndexDefinition("PRIMARY", "PRIMARY", ["playerID"])) ids.append( CSVCatalog.IndexDefinition("my_idx", "INDEX", ["nameLast", "nameFirst"])) t = cat.create_table("people", "data/People.csv", cds, ids) print("People table", json.dumps(t.describe_table(), indent=2)) print("\n##### Expected result: pass") print_test_separator("Complete my_test_3")
def test_find_by_template_index_batting(): print_test_separator("Starting test_find_by_template_index_batting") cleanup() cat = CSVCatalog.CSVCatalog() cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("teamID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("yearID", column_type="text", not_null=True)) cds.append(CSVCatalog.ColumnDefinition("stint", column_type="number", not_null=True)) cds.append(CSVCatalog.ColumnDefinition("H", column_type="number", not_null=False)) cds.append(CSVCatalog.ColumnDefinition("AB", column_type="number", not_null=False)) t = cat.create_table("batting", '/Users/apple/Desktop/HW3/src/Data/Batting.csv', cds) t.define_index("team_year_idx", ['teamID', 'yearID'], "INDEX") batting_tbl = CSVTable.CSVTable("batting") start_time = time.time() tmp = {'playerID':'willite01','yearID':'1939','stint': '1','teamID':'BOS'} fields = ['playerID','yearID','stint','teamID'] result = batting_tbl.find_by_template(tmp,fields) print("Result = \n",json.dumps(result,indent=2)) print("table:\n") print_out(result) end_time = time.time() elapsed_time = end_time - start_time print("\nElapsed time to execute queries = ",elapsed_time) print_test_separator("Starting test_find_by_template_index_batting")
def test_create_table_4_fail(): """ Creates a table that includes several column definitions and a primary key. The primary key references an undefined column, which is an error. NOTE: You should check for other errors. You do not need to check in the CSV file for uniqueness but should test other possible failures. :return: """ print_test_separator("Starting test_create_table_4_fail") cleanup() cat = CSVCatalog.CSVCatalog() cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("teamID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("yearID", column_type="text", not_null=True)) cds.append(CSVCatalog.ColumnDefinition("stint", column_type="number", not_null=True)) cds.append(CSVCatalog.ColumnDefinition("H", column_type="number", not_null=False)) cds.append(CSVCatalog.ColumnDefinition("AB", column_type="number", not_null=False)) t = cat.create_table("batting", "/Users/Yan/Desktop/hw3/data/Batting.csv", cds) try: t.define_primary_key(['playerID', 'teamID', 'yearID', 'HR']) print("Batting table", json.dumps(t.describe_table(), indent=2)) print_test_separator("FAILURES test_create_table_4_fail") except Exception as e: print("Exception e = ", e) print_test_separator("SUCCESS test_create_table_4_fail should fail.")
def test_table_9_fail(): """ Simple create of table definition. No columns or indexes. :return: """ cleanup() print_test_separator("Starting test_table_9") cat = CSVCatalog.CSVCatalog() try: t = cat.create_table( "teams", "../data/Teams.csv") print("Teams table", json.dumps( t.describe_table(), indent = 2)) t.add_column_definition(CSVCatalog.ColumnDefinition("Rank", "number")) t.add_column_definition(CSVCatalog.ColumnDefinition("AB", "number")) t.add_column_definition(CSVCatalog.ColumnDefinition("teamID", "text")) t.drop_column_definition("Rank") print("Teams table", json.dumps(t.describe_table(), indent = 2)) t.define_primary_key(["teamID"]) print("Teams table", json.dumps(t.describe_table(), indent = 2)) t.define_index("data1", ["AB", "HB"], "INDEX") # should raise an error -403 here since there's no "HB" in column definitions except Exception as e: print("Exception e = ", e) print_test_separator("SUCCESS test_table_9 should fail.") print_test_separator("Complete test_table_9")
def test_to_json(): try: print("\n*********** Testing to JSON. *******************\n") cat = CSVCatalog.CSVCatalog() cat.drop_table("teams") cds = [] cds.append(CSVCatalog.ColumnDefinition('teamID', 'text', True)) cds.append(CSVCatalog.ColumnDefinition('yearID', 'text', True)) cds.append(CSVCatalog.ColumnDefinition('W', column_type='number')) tbl = CSVCatalog.TableDefinition( "teams", "data/Teams.csv", column_definitions=cds, cnx=cat.cnx) # maybe this is inproper? r = json.dumps(tbl.to_json(), indent=2) print("Teams definition = \n", r) with open("unit_tests_catalog_json.txt", "w") as result_file: result_file.write(r) print("\n\n") except Exception as e: print( "My implementation throws a custom exception. You can print any meaningful error you want." ) print("Could not create table. Exception = ", e) print("\n*********** Testing to JSON. *******************\n")
def test3(): print_test_separator("Starting test_create_table_1") cat = CSVCatalog.CSVCatalog() t = cat.create_table( "people", "/Users/Jinyang_Li/Dropbox/ColumbiaCourse/Courses/Fall2018/W4111/Data/People.csv" )
def define_tables(): cleanup() cat = CSVCatalog.CSVCatalog() cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text")) cds.append(CSVCatalog.ColumnDefinition("birthCity", column_type="text")) cds.append(CSVCatalog.ColumnDefinition("birthCountry", column_type="text")) cds.append(CSVCatalog.ColumnDefinition("throws", column_type="text")) #DDL: t = cat.create_table("people", '/Users/apple/Desktop/HW3/src/Data/People.csv',cds) cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("H", column_type="number", not_null=True)) cds.append(CSVCatalog.ColumnDefinition("AB", column_type="number")) cds.append(CSVCatalog.ColumnDefinition("teamID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("yearID", column_type="text", not_null=True)) cds.append(CSVCatalog.ColumnDefinition("stint", column_type="number", not_null=True)) t = cat.create_table("batting", '/Users/apple/Desktop/HW3/src/Data/Batting.csv',cds)
def my_test_1(): """ Test add_column_definition. Should pass. :return: """ print_start("my_test_1", "Test add_column_definition. Should pass.") cleanup() cat = CSVCatalog.CSVCatalog() cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("teamID", "text", True)) cds.append( CSVCatalog.ColumnDefinition("yearID", column_type="text", not_null=True)) cds.append( CSVCatalog.ColumnDefinition("stint", column_type="number", not_null=True)) t = cat.create_table("batting", "data/Batting.csv", cds) print("Before adding another column:\n", json.dumps(t.describe_table(), indent=2)) new_cd = CSVCatalog.ColumnDefinition("AB", column_type="number", not_null=False) t.add_column_definition(new_cd) print("\nAfter adding another column:\n", json.dumps(t.describe_table(), indent=2)) print_end("my_test_1", "pass")
def test_create_table_5_prep(): """ Creates a table that includes several column definitions and a primary key. :return: """ print_test_separator("Starting test_create_table_5_prep") cleanup() cat = CSVCatalog.CSVCatalog() cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("teamID", "text", True)) cds.append( CSVCatalog.ColumnDefinition("yearID", column_type="text", not_null=True)) cds.append( CSVCatalog.ColumnDefinition("stint", column_type="number", not_null=True)) cds.append( CSVCatalog.ColumnDefinition("H", column_type="number", not_null=False)) cds.append( CSVCatalog.ColumnDefinition("AB", column_type="number", not_null=False)) t = cat.create_table("batting", "../data/Batting.csv", cds) t.define_primary_key(['playerID', 'teamID', 'yearID', 'stint']) print("Batting table", json.dumps(t.describe_table(), indent=2)) print_test_separator("Completed test_create_table_5_prep")
def test_find_by_template(optimize=False): cleanup() print_test_separator("Starting test_optimizable_2, optimize = " + str(optimize)) cat = CSVCatalog.CSVCatalog('localhost', 3306, db_name, user_name, pwd, debug_mode=None) cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text")) cds.append(CSVCatalog.ColumnDefinition("birthCity", "text")) cds.append(CSVCatalog.ColumnDefinition("birthCountry", "text")) cds.append(CSVCatalog.ColumnDefinition("throws", column_type="text")) t = cat.create_table( "people", data_dir + "People.csv", cds) print("People table metadata = \n", t.describe_table()) t.define_index("nameLast_idx", ['nameLast'], "INDEX") people_tbl = CSVTable.CSVTable("people") print("Loaded people table = \n", people_tbl) tmp = {"nameLast": "Williams"} result = people_tbl.find_by_template(tmp); print("result = \n", len(result));
def test_join_optimizable_3(optimize=False): """ :return: """ cleanup() print_test_separator("Starting test_optimizable_3, optimize = " + str(optimize)) cat = CSVCatalog.CSVCatalog() cds = [] cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text")) cds.append(CSVCatalog.ColumnDefinition("birthCity", "text")) cds.append(CSVCatalog.ColumnDefinition("birthCountry", "text")) cds.append(CSVCatalog.ColumnDefinition("throws", column_type="text")) t = cat.create_table("people", "/Users/Yan/Desktop/hw3/data/People.csv", cds) t.define_index("pid_idx", "INDEX", ['playerID']) print("People table metadata = \n", json.dumps(t.describe_table(), indent=2)) cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("H", "number", True)) cds.append(CSVCatalog.ColumnDefinition("AB", column_type="number")) cds.append(CSVCatalog.ColumnDefinition("teamID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("yearID", "text", True)) cds.append( CSVCatalog.ColumnDefinition("stint", column_type="number", not_null=True)) t = cat.create_table("batting", "/Users/Yan/Desktop/hw3/data/Batting.csv", cds) print("Batting table metadata = \n", json.dumps(t.describe_table(), indent=2)) t.define_index("pid_idx", "INDEX", ['playerID']) people_tbl = CSVDataTable.CSVTable("people") batting_tbl = CSVDataTable.CSVTable("batting") print("Loaded people table = \n", people_tbl) print("Loaded batting table = \n", batting_tbl) start_time = time.time() tmp = {"playerID": "willite01"} join_result = people_tbl.join(batting_tbl, ['playerID'], tmp) end_time = time.time() print("Result = \n", join_result) elapsed_time = end_time - start_time print("\n\nElapsed time = ", elapsed_time) print_test_separator("Complete test_join_optimizable_3")
def test_table_10_fail(): """ Simple create of table definition. No columns or indexes. :return: """ cleanup() print_test_separator("Starting test_table_10") cat = CSVCatalog.CSVCatalog() try: t = cat.create_table( "teams", "../data/Teams.csv") print("Teams table", json.dumps(t.describe_table(), indent = 2)) t.add_column_definition(CSVCatalog.ColumnDefinition("Rank", "number")) t.add_column_definition(CSVCatalog.ColumnDefinition("AB", "number")) t.add_column_definition(CSVCatalog.ColumnDefinition("CG", "number")) t.add_column_definition(CSVCatalog.ColumnDefinition("teamID", "text", not_null=True)) t.drop_column_definition("Rank") print("Teams table", json.dumps(t.describe_table(), indent = 2)) t.define_primary_key(["teamID"]) print("Teams table", json.dumps(t.describe_table(), indent = 2)) t.define_index("data1", ["AB"], "INDEX") t.define_index("data2", ["CG"]) print("Teams table", json.dumps(t.describe_table(), indent = 2)) t.drop_column_definition("AB") print("Teams table", json.dumps(t.describe_table(), indent = 2)) t.drop_index("data1") # should raise an error -403 here: because we dropped "AB", the "data1" index should disappear except Exception as e: print("Exception e = ", e) print_test_separator("SUCCESS test_table_10 should fail.") print_test_separator("Complete test_table_10")
def test_join_not_optimized(optimize=False): """ :return: """ print_test_separator("Starting test_optimizable_1, optimize = " + str(optimize)) print("\n\nDude. This takes 30 minutes. Trust me.\n\n") return cleanup() print_test_separator("Starting test_optimizable_1, optimize = " + str(optimize)) cat = CSVCatalog.CSVCatalog() cds = [] cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text")) cds.append(CSVCatalog.ColumnDefinition("birthCity", "text")) cds.append(CSVCatalog.ColumnDefinition("birthCountry", "text")) cds.append(CSVCatalog.ColumnDefinition("throws", column_type="text")) t = cat.create_table( "people", data_dir + "People.csv", cds) print("People table metadata = \n", json.dumps(t.describe_table(), indent=2)) cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("H", "number", True)) cds.append(CSVCatalog.ColumnDefinition("AB", column_type="number")) cds.append(CSVCatalog.ColumnDefinition("teamID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("yearID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("stint", column_type="number", not_null=True)) t = cat.create_table( "batting", data_dir + "Batting.csv", cds) print("Batting table metadata = \n", json.dumps(t.describe_table(), indent=2)) people_tbl = CSVTable.CSVTable("people") batting_tbl = CSVTable.CSVTable("batting") start_time = time.time() tmp = { "playerID": "willite01"} join_result = people_tbl.join(batting_tbl,['playerID'], tmp, optimize=optimize) end_time = time.time() print("Result = \n", json.dumps(join_result,indent=2)) elapsed_time = end_time - start_time print("\n\nElapsed time = ", elapsed_time) print_test_separator("Complete test_join_optimizable")
def test_join_optimizable_3(optimize=False): """ :return: """ cleanup() print_test_separator("Starting test_optimizable_3, optimize = " + str(optimize)) cat = CSVCatalog.CSVCatalog('localhost', 3306, db_name, user_name, pwd, debug_mode=None) cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text")) cds.append(CSVCatalog.ColumnDefinition("birthCity", "text")) cds.append(CSVCatalog.ColumnDefinition("birthCountry", "text")) cds.append(CSVCatalog.ColumnDefinition("throws", column_type="text")) t = cat.create_table( "people", data_dir + "People.csv", cds) t.define_index("pid_idx", ['playerID'], "INDEX") print("People table metadata = \n", t.describe_table()) cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("H", "number", True)) cds.append(CSVCatalog.ColumnDefinition("AB", column_type="number")) cds.append(CSVCatalog.ColumnDefinition("teamID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("yearID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("stint", column_type="number", not_null=True)) t = cat.create_table( "batting", data_dir + "Batting.csv", cds) print("Batting table metadata = \n", t.describe_table()) t.define_index("pid_idx", ['playerID'], "INDEX") people_tbl = CSVTable.CSVTable("people") batting_tbl = CSVTable.CSVTable("batting") print("Loaded people table = \n", people_tbl) print("Loaded batting table = \n", batting_tbl) start_time = time.time() tmp = {"playerID": "willite01"} join_result = people_tbl.join(batting_tbl, ['playerID'], tmp, optimize=optimize) end_time = time.time() print("Result = \n", json.dumps(join_result, indent=2)) print("Rows count=", len(join_result)) elapsed_time = end_time - start_time print("\n\nElapsed time = ", elapsed_time) print_test_separator("Complete test_join_optimizable_3")
def test_join_optimizable_2(optimize=False): """ Calling this with optimize=True turns on optimizations in the JOIN code. :return: """ cleanup() print_test_separator("Starting test_optimizable_2, optimize = " + str(optimize)) cat = CSVCatalog.CSVCatalog() cds = [] cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text")) cds.append(CSVCatalog.ColumnDefinition("birthCity", "text")) cds.append(CSVCatalog.ColumnDefinition("birthCountry", "text")) cds.append(CSVCatalog.ColumnDefinition("throws", column_type="text")) t = cat.create_table("people", data_dir + "People.csv", cds) print("People table metadata = \n", json.dumps(t.describe_table(), indent=2)) t.define_index("pid_idx", ['playerID'], "INDEX") cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("H", "number", True)) cds.append(CSVCatalog.ColumnDefinition("AB", column_type="number")) cds.append(CSVCatalog.ColumnDefinition("teamID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("yearID", "text", True)) cds.append( CSVCatalog.ColumnDefinition("stint", column_type="number", not_null=True)) t = cat.create_table("batting", data_dir + "Batting.csv", cds) print("Batting table metadata = \n", json.dumps(t.describe_table(), indent=2)) people_tbl = CSVTable.CSVTable("people") batting_tbl = CSVTable.CSVTable("batting") print("Loaded people table = \n", people_tbl) print("Loaded batting table = \n", batting_tbl) start_time = time.time() join_result = people_tbl.join(batting_tbl, ['playerID'], None, optimize=optimize) end_time = time.time() print("Result = \n", join_result) elapsed_time = end_time - start_time print("\n\nElapsed time = ", elapsed_time) print_test_separator("Complete test_join_optimizable_2")
def __load_info__(self): """ Loads metadata from catalog and sets __description__ to hold the information. :return: """ cat = CSVCatalog.CSVCatalog() t = cat.get_table(self.__table_name__) self.__description__ = t.describe_table()
def test_create_table_1(): cleanup() print_test_separator("Starting test_create_table_1") cat = CSVCatalog.CSVCatalog() t = cat.create_table("people", "/Users/apple/Desktop/HW3/src/Data/People.csv") print("People table", json.dumps(t.describe_table())) print_test_separator("Complete test_create_table_1")
def test_join_optimizable_2_op(optimize=False): ''' I used limit=15 here, because I just want to show the first 15 records and the whole table is so big ''' cleanup() print_test_separator("Starting test_join_optimizable_2_op, optimize = " + str(optimize)) cat = CSVCatalog.CSVCatalog() cds = [] cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text")) cds.append(CSVCatalog.ColumnDefinition("birthCity", "text")) cds.append(CSVCatalog.ColumnDefinition("birthCountry", "text")) cds.append(CSVCatalog.ColumnDefinition("throws", column_type="text")) t = cat.create_table( "people", data_dir + "People.csv", cds) print("People table metadata = \n", json.dumps(t.describe_table(), indent=2)) t.define_index("pid_idx", ['playerID'],"INDEX") cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("H", "number", True)) cds.append(CSVCatalog.ColumnDefinition("AB", column_type="number")) cds.append(CSVCatalog.ColumnDefinition("teamID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("yearID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("stint", column_type="number", not_null=True)) t = cat.create_table( "batting", data_dir + "Batting.csv", cds) print("Batting table metadata = \n", json.dumps(t.describe_table(), indent=2)) t.define_index("pid_idx", ['playerID'],"INDEX") people_tbl = CSVTable.CSVTable("people") batting_tbl = CSVTable.CSVTable("batting") start_time = time.time() tmp = {"playerID": "willite01"} join_result = people_tbl.join(batting_tbl,['playerID'], None, optimize=optimize) end_time = time.time() print('table = \n') print_out(join_result,limit=15) elapsed_time = end_time - start_time print("\n\nElapsed time = ", elapsed_time) print_test_separator("Complete test_join_optimizable_2_op")
def cleanup(): """ Deletes previously created information to enable re-running tests. :return: None """ cat = CSVCatalog.CSVCatalog() cat.drop_table("people") cat.drop_table("batting") cat.drop_table("teams")
def test4_drop(): cat = CSVCatalog.CSVCatalog() t = cat.get_table("people") #print("t: ",t.describe_table()) print("Initial status of table = \n", json.dumps(t.describe_table_test(), indent=2)) t = cat.drop_table('people') print("Initial status of table = \n", json.dumps(t.describe_table_test(), indent=2))
def cleanup(): """ Deletes previously created information to enable re-running tests. :return: None """ cat = CSVCatalog.CSVCatalog('localhost', 3306, db_name, user_name, pwd, debug_mode=None) cat.drop_table("people") cat.drop_table("batting") cat.drop_table("teams")
def test_create_table_1(): """ Simple create of table definition. No columns or indexes. :return: """ cat = CSVCatalog.CSVCatalog() t = cat.create_table( "people", "../data/People.csv")
def test_create_table_1(): """ Simple create of table definition. No columns or indexes. :return: """ cleanup() print_test_separator("Starting test_create_table_1") cat = CSVCatalog.CSVCatalog() t = cat.create_table("people", "../Data/People.csv") print("People table", json.dumps(t.describe_table(), indent=2)) print_test_separator("Complete test_create_table_1")
def test_table_6(): """ Simple create of table definition. No columns or indexes. :return: """ cleanup() print_test_separator("Starting test_table_6") cat = CSVCatalog.CSVCatalog() t = cat.create_table( "teams", "../data/Teams.csv") print("Test1: \n Teams table", json.dumps(t.describe_table(), indent = 2), "\n\n") # The output should be: Teams table {"definition": {"name": "teams", "path": "../data/Teams.csv"}, "columns": [], "indexes": {}} t.add_column_definition(CSVCatalog.ColumnDefinition("Rank", "number")) t.add_column_definition(CSVCatalog.ColumnDefinition("AB", "number")) t.add_column_definition(CSVCatalog.ColumnDefinition("CG", "number")) t.add_column_definition(CSVCatalog.ColumnDefinition("teamID", "text", not_null=True)) t.drop_column_definition("Rank") print("Test2: \n Teams table", json.dumps(t.describe_table(), indent = 2), "\n\n") # The output should have "AB" and "teamID"'s column definitions t.define_primary_key(["teamID"]) print("Test3: \n Teams table", json.dumps(t.describe_table(), indent = 2), "\n\n") # The output should have a "PRIMARY" index now t.define_index("data1", ["AB"], "INDEX") # The output should have a "data1" index now t.define_index("data2", ["CG"]) # The output should have a "data2" index now print("Test4: \n Teams table", json.dumps(t.describe_table(), indent = 2), "\n\n") # Test the index definitions t.drop_column_definition("AB") print("Test5: \n Teams table", json.dumps(t.describe_table(), indent = 2), "\n\n") # Test the column and index definitions, column "AB" and index "data1" should disappear test_create_table_1() p = CSVCatalog.CSVCatalog().get_table("People") p.add_column_definition(CSVCatalog.ColumnDefinition("playerID", "TEXT", not_null=True)) p.define_primary_key(["playerID"]) print("Test6: \n People table", json.dumps(p.describe_table(), indent = 2), "\n\n") # Test reloading an existing table and altering the existing table t.drop_index("data2") print("Test7: \n Teams table", json.dumps(t.describe_table(), indent = 2), "\n\n") # Test dropping an existing index, the index "data2" should disappear print_test_separator("Complete test_table_6")
def test_find_by_template(): cleanup() print_test_separator("Starting test_find_by_template") cat = CSVCatalog.CSVCatalog() cds = [] cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text")) cds.append(CSVCatalog.ColumnDefinition("birthCity", "text")) cds.append(CSVCatalog.ColumnDefinition("birthCountry", "text")) cds.append(CSVCatalog.ColumnDefinition("throws", column_type="text")) t = cat.create_table("people", "/Users/Yan/Desktop/hw3/data/People.csv", cds) t.define_index("id_idx", "INDEX", ['nameLast']) print("People table metadata = \n", json.dumps(t.describe_table(), indent=2)) people_tbl = CSVDataTable.CSVTable("people") print("Loaded people table = \n", people_tbl) tries = 1000 start_time = time.time() templ = {"nameLast": "Williams"} print("Starting test on find using indexed field, templ = ", json.dumps(templ)) for i in range(0, tries): result = people_tbl.find_by_template( templ, ['playerID', 'nameLast', 'nameFirst']) if i == 0: print("Sample result = ", json.dumps(result)) end_time = time.time() print("Elapsed time for ", tries, "lookups = ", end_time - start_time) tries = 1000 start_time = time.time() templ = {"nameFirst": "Ted"} print("\n\nStarting test on find using NON-indexed field, tmpl = ", json.dumps(templ)) for i in range(0, tries): result = people_tbl.find_by_template( templ, ['playerID', 'nameLast', 'nameFirst']) if i == 0: print("Sample result = ", json.dumps(result)) end_time = time.time() print("Elapsed time for ", tries, "lookups = ", end_time - start_time) print_test_separator("Complete test_find_by_template")
def test_drop_index(): print_test_separator("Starting test_drop_index_5") # DO NOT CALL CLEANUP. Want to access preexisting table. cat = CSVCatalog.CSVCatalog() t = cat.get_table("batting") print("Initial status of table = \n", json.dumps(t.describe_table(), indent=2)) t.drop_column_(CSVCatalog.ColumnDefinition("G", "number")) print("Modified status of table = \n", json.dumps(t.describe_table(), indent=2)) print_test_separator("Success test_drop_index_5")