def t10(): "Test the time oj JOIN with and without optimization!! " rows, cols = load("../CSVFile/People.csv") t = CSVDataTable.CSVDataTable(table_name="People", column_names=cols, primary_key_columns=['playerID']) t.import_data(rows) print("T = ", t) rows, cols = load("../CSVFile/BattingSmall.csv") t2 = CSVDataTable.CSVDataTable(table_name="BattingSmall", column_names=cols, primary_key_columns=['playerID', 'teamID', 'yearID', 'stint']) t2.import_data(rows) print("T = ", t) print("\n\n With optimization\n") start_time = time.time() for i in range(0,1): j = t2.join(t, ['playerID'], w_clause={"People.nameLast": "Williams", "People.birthCity": "San Diego"}, p_clause=['playerID', "People.nameLast", "People.nameFirst", "BattingSmall.teamID", "BattingSmall.yearID", "BattingSmall.stint", "BattingSmall.H", "BattingSmall.AB"], optimize=True) end_time = time.time() print("Elapsed time = ", end_time-start_time) print("\n\n Without optimization\n") start_time = time.time() for i in range(0, 1): j = t2.join(t, ['playerID'], w_clause={"People.nameLast": "Williams", "People.birthCity": "San Diego"}, p_clause=['playerID', "People.nameLast", "People.nameFirst", "BattingSmall.teamID", "BattingSmall.yearID", "BattingSmall.stint", "BattingSmall.H", "BattingSmall.AB"], optimize=False) end_time = time.time() print("Elapsed time = ", end_time - start_time)
def t4(): rows, cols = load( "/Users/wangxinquan/Desktop/2sem/W4111Database/xw2566_HW3/Data/People.csv" ) t = CSVDataTable.CSVDataTable(table_name="People", column_names=cols, primary_key_columns=['playerID']) t.import_data(rows) print("T = ", t) rows, cols = load( "/Users/wangxinquan/Desktop/2sem/W4111Database/xw2566_HW3/Data/Batting.csv" ) t2 = CSVDataTable.CSVDataTable( table_name="Batting", column_names=cols, primary_key_columns=['playerID', 'teamID', "yearID", 'stint']) t2.import_data(rows) print("T = ", t2) j = t2.join(t, ['playerID'], w_clause={ "People.nameLast": "Williams", "People.birthCity": "San Diego" }, p_clause=[ 'playerID', "People.nameLast", "People.nameFirst", "Batting.teamID", "Batting.yearID", "Batting.stint", "Batting.H", "Batting.AB" ], optimize=True) print("Result = ", j) print("All rows =", json.dumps(j.get_rows(), indent=2))
def test_save_and_load(): print("\n\n*************TEST TEN, SAVE AND LOAD**************") rows, cols = load("CSVFile/offices.csv") print("\n ***READING CSV TO LOAD OFFICES.CSV") t=CSVDataTable.CSVDataTable(table_name = "Offices",column_names = cols, primary_key_columns=["officeCode"]) t.import_data(rows) print("\n ***SAVING DOWN THE CSVDATATABLE") t.save() print("TABLE INFO AND ITS INDEXES\n") print(t) for k,v in t._indexes.items(): print(v.to_json()) print("\n ***LOADING CSV DATATABLE***") t2 = CSVDataTable.CSVDataTable(table_name = "Offices",loadit=True) print("\n **TABLE LOADED CONFIRM THEY ARE THE SAME**\n") print(t2) print("\n ***CONFIRM INDEX INTEGRITY***") for k,v in t2._indexes.items(): print(v.to_json())
def t4(): """ Test join function. :return: """ print("test JOIN function. ") rows, cols = load("../CSVFile/People.csv") t = CSVDataTable.CSVDataTable(table_name="People", column_names=cols, primary_key_columns=['playerID']) t.import_data(rows) print("T = ", t) rows, cols = load("../CSVFile/BattingSmall.csv") t2 = CSVDataTable.CSVDataTable(table_name="BattingSmall", column_names=cols, primary_key_columns=['playerID', 'teamID', 'yearID', 'stint']) t2.import_data(rows) print("T = ", t) j = t2.join(t, ['playerID'], w_clause={"People.nameLast": "Williams", "People.birthCity": "San Diego"}, p_clause=['playerID', "People.nameLast", "People.nameFirst", "BattingSmall.teamID", "BattingSmall.yearID", "BattingSmall.stint", "BattingSmall.H", "BattingSmall.AB"], optimize=True) print("Result = ", j) print(j.get_rows()) print("All rows = ", json.dumps(j.get_rows(), indent=2))
def test_join_optimizable_3(optimize=False): """ :return: """ cleanup() print_test_separator("Starting test_optimizable_3, optimize = " + str(optimize)) cat = CSVCatalog.CSVCatalog() cds = [] cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text")) cds.append(CSVCatalog.ColumnDefinition("birthCity", "text")) cds.append(CSVCatalog.ColumnDefinition("birthCountry", "text")) cds.append(CSVCatalog.ColumnDefinition("throws", column_type="text")) t = cat.create_table("people", "/Users/Yan/Desktop/hw3/data/People.csv", cds) t.define_index("pid_idx", "INDEX", ['playerID']) print("People table metadata = \n", json.dumps(t.describe_table(), indent=2)) cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("H", "number", True)) cds.append(CSVCatalog.ColumnDefinition("AB", column_type="number")) cds.append(CSVCatalog.ColumnDefinition("teamID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("yearID", "text", True)) cds.append( CSVCatalog.ColumnDefinition("stint", column_type="number", not_null=True)) t = cat.create_table("batting", "/Users/Yan/Desktop/hw3/data/Batting.csv", cds) print("Batting table metadata = \n", json.dumps(t.describe_table(), indent=2)) t.define_index("pid_idx", "INDEX", ['playerID']) people_tbl = CSVDataTable.CSVTable("people") batting_tbl = CSVDataTable.CSVTable("batting") print("Loaded people table = \n", people_tbl) print("Loaded batting table = \n", batting_tbl) start_time = time.time() tmp = {"playerID": "willite01"} join_result = people_tbl.join(batting_tbl, ['playerID'], tmp) end_time = time.time() print("Result = \n", join_result) elapsed_time = end_time - start_time print("\n\nElapsed time = ", elapsed_time) print_test_separator("Complete test_join_optimizable_3")
def test_index(): print("\n\n*************TEST FOUR CREATING AN INDEX**************") t = CSVDataTable.CSVDataTable(table_name="Test",column_names=['last_name','first_name','uni'],loadit=None) i = CSVDataTable.Index(index_name="Bob", index_columns = ["last_name","first_name"],kind="INDEX", table = t) print(i) r = {"last_name": "Ferguson", "first_name": "Donald", "uni":"sure"} kv = i.compute_key(r) i.add_to_index(row=r,rid="2") print("KV = ",kv, "added to index")
def test_conflict(): print("\n\n*************TEST FIVE CREATING CONFLICTING INDEX THROWS ERROR WHEN UNIQUE**************") t= CSVDataTable.CSVDataTable(table_name="Test",column_names=['last_name','first_name','uni'],loadit=None) i = CSVDataTable.Index(index_name="Bob", index_columns = ["last_name","first_name"],kind="UNIQUE",table=t) r = {"last_name": "Ferguson", "first_name": "Donald", "uni":"sure"} kv = i.compute_key(r) i.add_to_index(row=r,rid="2") try: i.add_to_index(row=r, rid="3") except Exception as e: print("We caught an error: ", e)
def test_save_db4(): logging.debug("-------- test_save_db ---------") rows, cols = load( "/home/zhida/Desktop/Database/code/HW3/CSVFile/beach-weather-stations-automated-sensors-1.csv" ) t = CSVDataTable.CSVDataTable(table_name="automated-sensors", column_names=cols, primary_key_columns=["Measurement ID"], loadit=None) t.import_data(rows=rows) t.add_index("Station Name", ["Station Name"], "INDEX") t.save() print("save DB success!\n") print("\n\n\n\n\n\n") time1 = time.time() for i in range(100): t.find_by_template(tmp={ "Station Name": "Oak Street Weather Station", "Wet Bulb Temperature": 55 }, use_index=False) time2 = time.time() print("takes time = ", str(time2 - time1) + " second") time1 = time.time() for i in range(100): t.find_by_template(tmp={ "Station Name": "Oak Street Weather Station", "Wet Bulb Temperature": 55 }, use_index=True) time2 = time.time() print("takes time = ", str(time2 - time1) + " second")
def test_find(): print("\n\n*************TEST TWO, FIND BY TEMPLATE AND COMPARE RUN TIME WITH INDEX vs WITHOUT**************") tmp = {"playerID":"willite01"} rows, cols = load("CSVFile/people.csv") t=CSVDataTable.CSVDataTable(table_name = "People",column_names = cols, primary_key_columns=['playerID']) t.import_data(rows) print("\n***TESTING RUN WITHOUT INDEX***") start = time.time() for i in range(0,5): r = t.find_by_template(tmp,fields=None,use_index=False) if i ==0: print("Row = ",r) end = time.time() elapsed = end - start print("Elapsed time=", elapsed) print("\n***TESTING RUNNING WITH INDEX***") start = time.time() for i in range(0,5): r=t.find_by_template(tmp,fields=None,use_index=True) if i==0: print("Row = ",r) end = time.time() elapsed = end-start print("Elapsed time = ",elapsed)
def t2(): """ Test add_index function, use index. :return: """ print("Test add_index function, find_by_template use index.") rows, cols = load("../CSVFile/People.csv") t = CSVDataTable.CSVDataTable(table_name="People", column_names=cols, primary_key_columns=['playerID']) t.import_data(rows) print("T = ", t) tmp = {'playerID': 'willite01', "nameLast": "Williams"} t.add_index("LASTNAME", ['nameLast', 'nameFirst'], "INDEX") start = time.time() for i in range(0, 1000): r = t.find_by_template(tmp, fields=None, use_index=True).get_rows() if i == 0: print("Row = ", r) end = time.time() elapsed = end - start print("Time : ", elapsed)
def test_insert_and_import_data(): print("\n\n*************TEST ONE, TEST LOAD FUNCTION**************") print("*** HERE WE TEST THE IMPORT_DATA FUNCTION WHICH LEVEREAGES INSERT. LATER WE ALSO TEST LOADIT = TRUE AND THE LOAD FUNCTION***") rows, cols = load("CSVFile/people.csv") t=CSVDataTable.CSVDataTable(table_name = "People",column_names = cols, primary_key_columns=['playerID']) t.import_data(rows) print("T = ",t)
def t1(): """ Test import_data and find_by_template functions. :return: """ print("Test import_data(which including add_rows(), and test find_by_tmp not use index. ") rows, cols = load("../CSVFile/People.csv") t = CSVDataTable.CSVDataTable(table_name="People", column_names=cols, primary_key_columns=['playerID']) t.import_data(rows) print("T = ", t) tmp = {'playerID': 'willite01'} start = time.time() for i in range(0, 1000): r = t.find_by_template(tmp, fields=None, use_index=False).get_rows() if i == 0: print("Row = ", r) end = time.time() elapsed = end - start print("Time : ", elapsed) t.save()
def test_index_or_no_index(): logging.debug("-------- test_index_or_no_index ---------") rows, cols = load( "/home/zhida/Desktop/Database/code/HW3/CSVFile/beach-weather-stations-automated-sensors-1.csv" ) t = CSVDataTable.CSVDataTable(table_name="automated-sensors", column_names=cols, primary_key_columns=["Measurement ID"], loadit=None) t.import_data(rows=rows) t.add_index("Station Name", ["Station Name"], "INDEX") t.add_index("Wet Bulb Temperature", ["Wet Bulb Temperature"], "INDEX") print("\n\n\n") time1 = time.time() for i in range(1000): t.find_by_template(tmp={"Wet Bulb Temperature": '7.0'}, use_index=False, debug_log=False) time2 = time.time() print("[-No index-] -> run find_by_template(), 1000 iterations\n") print("takes time = ", str(time2 - time1) + " second\n") time1 = time.time() for i in range(1000): t.find_by_template(tmp={"Wet Bulb Temperature": '7.0'}, use_index=True, debug_log=False) time2 = time.time() print("[-Use index-] -> run find_by_template(), 1000 iterations\n") print("takes time = ", str(time2 - time1) + " second")
def test_specific_project(): rows, cols = load("CSVFile/People.csv") t=CSVDataTable.CSVDataTable(table_name = "People",column_names = cols, primary_key_columns=["playerID"]) t.import_data(rows) print("T = ",t) p = ["playerID","People.nameLast","Batting.H"] print("Specific template = ", t._get_specific_project())
def test3(): i = CSVDataTable.Index(name="Bob", columns=["Last_Name", "First_Name"], kind="UNIQUE") r = {"Last_Name": "Ferguson", "First_Name": "Donald", "Uni": "sure"} kv = i.compute_key(r) print("KV = ", kv) i.add_to_index(row=r, rid="2") print("I = ", i)
def t9(): "Test delete function. Check if row id is correct after deletion. " rows, cols = load("../CSVFile/rings.csv") t = CSVDataTable.CSVDataTable(table_name="Rings", column_names=cols, primary_key_columns=['uni']) t.import_data(rows) r = t.delete({"last_name": "Baggins"}) print("delete:", r) print("\n\nafter delete, T = ", t)
def test_add_index(): print("\n\n*************TEST SIX, CREATE WITH INDEX AND THEN ADD ANOTHER INDEX**************") rows, cols = load("CSVFile/offices.csv") t=CSVDataTable.CSVDataTable(table_name = "Offices",column_names = cols, primary_key_columns=["officeCode"]) print("\n***ORIGINALLY ***\n") print(t) cols = ["city","state","postalCode"] t.add_index(name="location",column_list=cols,kind="INDEX") print("\n***ADDING ANOTHER INDEX, LOCATION ***\n") print(t)
def test_incomplete_index_info(): ###CHECK FOR not complete index info ADD_INDEX FUNCTION print("\n\n*************TEST EIGHT, NOT ENOUGH INFORMATION TO CREATE INDEX THROWS AN ERROR**************") rows, cols = load("CSVFile/offices.csv") t=CSVDataTable.CSVDataTable(table_name = "Offices",column_names = cols, primary_key_columns=["officeCode"]) cols = ["city","state","postalCode"] try: t.add_index("location",cols) except Exception as e: print("\n We caught an error, you did not provide enough information to create the key" + "\n",e)
def test_join(): ###CHECK FOR not complete index info ADD_INDEX FUNCTION print("\n\n*************TEST ELEVEN JOIN AND ALSO SEE THAT IT SWAPS TABLESX**************") rows, cols = load("CSVFile/People.csv") t=CSVDataTable.CSVDataTable(table_name = "People",column_names = cols, primary_key_columns=["playerID"]) t.import_data(rows) print("T = ",t) rows, cols = load("CSVFile/Batting.csv") t2=CSVDataTable.CSVDataTable(table_name = "Batting",column_names = cols, primary_key_columns=["playerID","teamID","yearID","stint"]) t2.import_data(rows) print("T2 = ",t2) j = t2.join(t, ['playerID'], w_clause ={"People.nameLast":"Williams", "People.birthCity": "San Diego"}, p_clause =['playerID',"People.nameLast","People.nameFirst","Batting.teamID","Batting.yearID","Batting.stint","Batting.H","Batting.AB"] ,optimize = True) print("Result = " ,j) print("All rows = ", json.dumps(j._rows, indent=2))
def t8(): rows, cols = load( "/Users/wangxinquan/Desktop/2sem/W4111Database/xw2566_HW3/Data/offices.csv" ) t = CSVDataTable.CSVDataTable(table_name="offices", column_names=cols, primary_key_columns=['officeCode']) t.import_data(rows) t.save()
def test2(): new_r, cols = load( "/Users/wangxinquan/Desktop/2sem/W4111Database/xw2566_HW3/Data/offices.csv" ) t = CSVDataTable.CSVDataTable(table_name="offices.", column_names=cols, primary_key_columns=['officeCode'], loadit=None) t.import_data(new_r) print("t = ", t)
def test_create_table(): logging.debug("-------- test_create_table ---------") t = CSVDataTable.CSVDataTable( table_name="students", column_names=["uni", "first_name", "last_name"], primary_key_columns=["uni"], loadit=None) print(t) print("\n\n\n\n\n\n")
def t9(): rows, cols = load( "/Users/wangxinquan/Desktop/2sem/W4111Database/xw2566_HW3/Data/offices.csv" ) t = CSVDataTable.CSVDataTable(table_name="offices", column_names=cols, primary_key_columns=['officeCode']) t.import_data(rows) r = t.delete_by_template({"officeCode": "11"}) print(r)
def test_load_data(): logging.debug("-------- test_load_data ---------") rows, cols = load("/home/zhida/Desktop/Database/code/HW3/CSVFile/city.csv") t = CSVDataTable.CSVDataTable(table_name="city", column_names=cols, primary_key_columns=["id"], loadit=None) t.import_data(rows=rows) print("t = ", json.dumps(t._rows, indent=2)) print("\n\n\n\n\n\n")
def test_save_db2(): logging.debug("-------- test_save_db ---------") rows, cols = load("/home/zhida/Desktop/Database/code/HW3/CSVFile/city.csv") t = CSVDataTable.CSVDataTable(table_name="city", column_names=cols, primary_key_columns=["id"], loadit=None) t.import_data(rows=rows) t.save() print("save DB success!\n") print("\n\n\n\n\n\n")
def t3(): rows, cols = load( "/Users/wangxinquan/Desktop/2sem/W4111Database/xw2566_HW3/Data/Batting.csv" ) t2 = CSVDataTable.CSVDataTable( table_name="Batting", column_names=cols, primary_key_columns=['playerID', 'teamID', "yearID", 'stint']) t2.import_data(rows) print("T = ", t2) tmp = {"last_name": "Baggins", "first_name": "Bilbo"} print("Specific template = ", t2._get_specific_where(tmp))
def test_find_by_template(): cleanup() print_test_separator("Starting test_find_by_template") cat = CSVCatalog.CSVCatalog() cds = [] cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text")) cds.append(CSVCatalog.ColumnDefinition("birthCity", "text")) cds.append(CSVCatalog.ColumnDefinition("birthCountry", "text")) cds.append(CSVCatalog.ColumnDefinition("throws", column_type="text")) t = cat.create_table("people", "/Users/Yan/Desktop/hw3/data/People.csv", cds) t.define_index("id_idx", "INDEX", ['nameLast']) print("People table metadata = \n", json.dumps(t.describe_table(), indent=2)) people_tbl = CSVDataTable.CSVTable("people") print("Loaded people table = \n", people_tbl) tries = 1000 start_time = time.time() templ = {"nameLast": "Williams"} print("Starting test on find using indexed field, templ = ", json.dumps(templ)) for i in range(0, tries): result = people_tbl.find_by_template( templ, ['playerID', 'nameLast', 'nameFirst']) if i == 0: print("Sample result = ", json.dumps(result)) end_time = time.time() print("Elapsed time for ", tries, "lookups = ", end_time - start_time) tries = 1000 start_time = time.time() templ = {"nameFirst": "Ted"} print("\n\nStarting test on find using NON-indexed field, tmpl = ", json.dumps(templ)) for i in range(0, tries): result = people_tbl.find_by_template( templ, ['playerID', 'nameLast', 'nameFirst']) if i == 0: print("Sample result = ", json.dumps(result)) end_time = time.time() print("Elapsed time for ", tries, "lookups = ", end_time - start_time) print_test_separator("Complete test_find_by_template")
def t3(): """ Test compute_index_value function. :return: """ print("Test compute_index_value function.") rows, cols = load("../CSVFile/rings.csv") t = CSVDataTable.CSVDataTable(table_name="rings", column_names=cols, primary_key_columns=['uni']) t.import_data(rows) print("T = ", t) i = CSVDataTable.Index(name="Bob", table= t, columns=['last_name', 'first_name'], kind="UNIQUE") r = {"last_name": "Wei", "first_name": "Erica", "uni":"cw1177"} kv = i.compute_index_value(r) print(" KV = ", kv) i.add_to_index(row=r, rid="3") print(i) #print("Should return error here. Duplicate"), correct #i.add_to_index(row=r, rid="4") t.save()
def test_compute_key_add_to_index(): logging.debug("-------- test_compute_key_add_to_index ---------") i = CSVDataTable.Index(index_name="test", index_columns=["last_name", "first_name"], kind="INDEX") r = {"last_name": "zhang", "first_name": "zhida", "uni": "zz2578"} key = i.compute_key(r) print("key = ", key) i.add_to_index(r, "2") i.add_to_index(r, "3") print("i = ", i) print("\n\n\n\n\n\n")
def t5(): """ Test get_specific_project function :return: """ print("Test get_specific_project function.") rows, cols = load("../CSVFile/People.csv") t = CSVDataTable.CSVDataTable(table_name="People", column_names=cols, primary_key_columns=['playerID']) t.import_data(rows) print("T = ", t) p = ["playerID", "People.nameLast", "Batting.H"] print("Specific template = ", t._get_specific_project(p))