示例#1
0
def t10():

    "Test the time oj JOIN with and without optimization!! "

    rows, cols = load("../CSVFile/People.csv")
    t = CSVDataTable.CSVDataTable(table_name="People", column_names=cols, primary_key_columns=['playerID'])
    t.import_data(rows)
    print("T = ", t)

    rows, cols = load("../CSVFile/BattingSmall.csv")
    t2 = CSVDataTable.CSVDataTable(table_name="BattingSmall", column_names=cols, primary_key_columns=['playerID', 'teamID', 'yearID', 'stint'])
    t2.import_data(rows)
    print("T = ", t)

    print("\n\n With optimization\n")
    start_time = time.time()
    for i in range(0,1):
        j = t2.join(t, ['playerID'],
                    w_clause={"People.nameLast": "Williams", "People.birthCity": "San Diego"},
                    p_clause=['playerID', "People.nameLast", "People.nameFirst", "BattingSmall.teamID",
                              "BattingSmall.yearID", "BattingSmall.stint", "BattingSmall.H", "BattingSmall.AB"],
                    optimize=True)
    end_time = time.time()
    print("Elapsed time = ", end_time-start_time)

    print("\n\n Without optimization\n")
    start_time = time.time()
    for i in range(0, 1):
        j = t2.join(t, ['playerID'],
                    w_clause={"People.nameLast": "Williams", "People.birthCity": "San Diego"},
                    p_clause=['playerID', "People.nameLast", "People.nameFirst", "BattingSmall.teamID",
                              "BattingSmall.yearID", "BattingSmall.stint", "BattingSmall.H", "BattingSmall.AB"],
                    optimize=False)
    end_time = time.time()
    print("Elapsed time = ", end_time - start_time)
示例#2
0
def t4():
    rows, cols = load(
        "/Users/wangxinquan/Desktop/2sem/W4111Database/xw2566_HW3/Data/People.csv"
    )
    t = CSVDataTable.CSVDataTable(table_name="People",
                                  column_names=cols,
                                  primary_key_columns=['playerID'])
    t.import_data(rows)
    print("T = ", t)

    rows, cols = load(
        "/Users/wangxinquan/Desktop/2sem/W4111Database/xw2566_HW3/Data/Batting.csv"
    )
    t2 = CSVDataTable.CSVDataTable(
        table_name="Batting",
        column_names=cols,
        primary_key_columns=['playerID', 'teamID', "yearID", 'stint'])
    t2.import_data(rows)
    print("T = ", t2)

    j = t2.join(t, ['playerID'],
                w_clause={
                    "People.nameLast": "Williams",
                    "People.birthCity": "San Diego"
                },
                p_clause=[
                    'playerID', "People.nameLast", "People.nameFirst",
                    "Batting.teamID", "Batting.yearID", "Batting.stint",
                    "Batting.H", "Batting.AB"
                ],
                optimize=True)
    print("Result = ", j)
    print("All rows =", json.dumps(j.get_rows(), indent=2))
示例#3
0
def test_save_and_load():
    print("\n\n*************TEST TEN, SAVE AND LOAD**************")
    rows, cols = load("CSVFile/offices.csv")

    print("\n ***READING CSV TO LOAD OFFICES.CSV")
    t=CSVDataTable.CSVDataTable(table_name = "Offices",column_names = cols, primary_key_columns=["officeCode"])
    t.import_data(rows)

    print("\n ***SAVING DOWN THE CSVDATATABLE")
    t.save()


    print("TABLE INFO AND ITS INDEXES\n")
    print(t)
    for k,v in t._indexes.items():
        print(v.to_json())

    print("\n ***LOADING CSV DATATABLE***")
    t2 = CSVDataTable.CSVDataTable(table_name = "Offices",loadit=True)

    print("\n **TABLE LOADED CONFIRM THEY ARE THE SAME**\n")
    print(t2)
    print("\n ***CONFIRM INDEX INTEGRITY***")
    for k,v in t2._indexes.items():
        print(v.to_json())
示例#4
0
def t4():
    """
    Test join function.
    :return:
    """

    print("test JOIN function. ")

    rows, cols = load("../CSVFile/People.csv")
    t = CSVDataTable.CSVDataTable(table_name="People", column_names=cols, primary_key_columns=['playerID'])
    t.import_data(rows)
    print("T = ", t)

    rows, cols = load("../CSVFile/BattingSmall.csv")
    t2 = CSVDataTable.CSVDataTable(table_name="BattingSmall", column_names=cols, primary_key_columns=['playerID', 'teamID', 'yearID', 'stint'])
    t2.import_data(rows)
    print("T = ", t)

    j = t2.join(t, ['playerID'],
                w_clause={"People.nameLast": "Williams", "People.birthCity": "San Diego"},
                p_clause=['playerID', "People.nameLast", "People.nameFirst", "BattingSmall.teamID",
                          "BattingSmall.yearID", "BattingSmall.stint", "BattingSmall.H", "BattingSmall.AB"], optimize=True)

    print("Result = ", j)
    print(j.get_rows())
    print("All rows = ", json.dumps(j.get_rows(), indent=2))
示例#5
0
def test_join_optimizable_3(optimize=False):
    """
    :return:
    """
    cleanup()
    print_test_separator("Starting test_optimizable_3, optimize = " +
                         str(optimize))

    cat = CSVCatalog.CSVCatalog()
    cds = []

    cds = []
    cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text"))
    cds.append(CSVCatalog.ColumnDefinition("birthCity", "text"))
    cds.append(CSVCatalog.ColumnDefinition("birthCountry", "text"))
    cds.append(CSVCatalog.ColumnDefinition("throws", column_type="text"))

    t = cat.create_table("people", "/Users/Yan/Desktop/hw3/data/People.csv",
                         cds)
    t.define_index("pid_idx", "INDEX", ['playerID'])
    print("People table metadata = \n", json.dumps(t.describe_table(),
                                                   indent=2))

    cds = []
    cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("H", "number", True))
    cds.append(CSVCatalog.ColumnDefinition("AB", column_type="number"))
    cds.append(CSVCatalog.ColumnDefinition("teamID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("yearID", "text", True))
    cds.append(
        CSVCatalog.ColumnDefinition("stint",
                                    column_type="number",
                                    not_null=True))

    t = cat.create_table("batting", "/Users/Yan/Desktop/hw3/data/Batting.csv",
                         cds)
    print("Batting table metadata = \n",
          json.dumps(t.describe_table(), indent=2))
    t.define_index("pid_idx", "INDEX", ['playerID'])

    people_tbl = CSVDataTable.CSVTable("people")
    batting_tbl = CSVDataTable.CSVTable("batting")

    print("Loaded people table = \n", people_tbl)
    print("Loaded batting table = \n", batting_tbl)

    start_time = time.time()

    tmp = {"playerID": "willite01"}
    join_result = people_tbl.join(batting_tbl, ['playerID'], tmp)

    end_time = time.time()

    print("Result = \n", join_result)
    elapsed_time = end_time - start_time
    print("\n\nElapsed time = ", elapsed_time)

    print_test_separator("Complete test_join_optimizable_3")
示例#6
0
def test_index():
    print("\n\n*************TEST FOUR CREATING AN INDEX**************")
    t = CSVDataTable.CSVDataTable(table_name="Test",column_names=['last_name','first_name','uni'],loadit=None)
    i = CSVDataTable.Index(index_name="Bob", index_columns = ["last_name","first_name"],kind="INDEX", table = t)
    print(i)
    r = {"last_name": "Ferguson", "first_name": "Donald", "uni":"sure"}
    kv = i.compute_key(r)
    i.add_to_index(row=r,rid="2")
    print("KV = ",kv, "added to index")
示例#7
0
def test_conflict():
    print("\n\n*************TEST FIVE CREATING CONFLICTING INDEX THROWS ERROR WHEN UNIQUE**************")
    t= CSVDataTable.CSVDataTable(table_name="Test",column_names=['last_name','first_name','uni'],loadit=None)
    i = CSVDataTable.Index(index_name="Bob", index_columns = ["last_name","first_name"],kind="UNIQUE",table=t)
    r = {"last_name": "Ferguson", "first_name": "Donald", "uni":"sure"}
    kv = i.compute_key(r)
    i.add_to_index(row=r,rid="2")
    try:
        i.add_to_index(row=r, rid="3")
    except Exception as e:
        print("We caught an error: ", e)
示例#8
0
def test_save_db4():

    logging.debug("-------- test_save_db ---------")
    rows, cols = load(
        "/home/zhida/Desktop/Database/code/HW3/CSVFile/beach-weather-stations-automated-sensors-1.csv"
    )
    t = CSVDataTable.CSVDataTable(table_name="automated-sensors",
                                  column_names=cols,
                                  primary_key_columns=["Measurement ID"],
                                  loadit=None)
    t.import_data(rows=rows)
    t.add_index("Station Name", ["Station Name"], "INDEX")
    t.save()
    print("save DB success!\n")
    print("\n\n\n\n\n\n")

    time1 = time.time()
    for i in range(100):
        t.find_by_template(tmp={
            "Station Name": "Oak Street Weather Station",
            "Wet Bulb Temperature": 55
        },
                           use_index=False)
    time2 = time.time()
    print("takes time = ", str(time2 - time1) + " second")

    time1 = time.time()
    for i in range(100):
        t.find_by_template(tmp={
            "Station Name": "Oak Street Weather Station",
            "Wet Bulb Temperature": 55
        },
                           use_index=True)
    time2 = time.time()
    print("takes time = ", str(time2 - time1) + " second")
示例#9
0
def test_find():
    print("\n\n*************TEST TWO,  FIND BY TEMPLATE AND COMPARE RUN TIME WITH INDEX vs WITHOUT**************")
    tmp = {"playerID":"willite01"}
    rows, cols = load("CSVFile/people.csv")
    t=CSVDataTable.CSVDataTable(table_name = "People",column_names = cols, primary_key_columns=['playerID'])
    t.import_data(rows)

    print("\n***TESTING RUN WITHOUT INDEX***")
    start = time.time()
    for i in range(0,5):
        r = t.find_by_template(tmp,fields=None,use_index=False)
        if i ==0:
            print("Row = ",r)
    end = time.time()
    elapsed = end - start
    print("Elapsed time=", elapsed)

    print("\n***TESTING RUNNING WITH INDEX***")
    start = time.time()
    for i in range(0,5):
        r=t.find_by_template(tmp,fields=None,use_index=True)
        if i==0:
            print("Row = ",r)
    end = time.time()
    elapsed = end-start
    print("Elapsed time = ",elapsed)
示例#10
0
def t2():
    """
    Test add_index function, use index.
    :return:
    """

    print("Test add_index function, find_by_template use index.")

    rows, cols = load("../CSVFile/People.csv")
    t = CSVDataTable.CSVDataTable(table_name="People", column_names=cols, primary_key_columns=['playerID'])
    t.import_data(rows)
    print("T = ", t)

    tmp = {'playerID': 'willite01', "nameLast": "Williams"}
    t.add_index("LASTNAME", ['nameLast', 'nameFirst'], "INDEX")

    start = time.time()

    for i in range(0, 1000):
        r = t.find_by_template(tmp, fields=None, use_index=True).get_rows()
        if i == 0:
            print("Row = ", r)

    end = time.time()
    elapsed = end - start
    print("Time : ", elapsed)
示例#11
0
def test_insert_and_import_data():
    print("\n\n*************TEST ONE, TEST LOAD FUNCTION**************")
    print("*** HERE WE TEST THE IMPORT_DATA FUNCTION WHICH LEVEREAGES INSERT. LATER WE ALSO TEST LOADIT = TRUE AND THE LOAD FUNCTION***")
    rows, cols = load("CSVFile/people.csv")
    t=CSVDataTable.CSVDataTable(table_name = "People",column_names = cols, primary_key_columns=['playerID'])
    t.import_data(rows)
    print("T = ",t)
示例#12
0
def t1():
    """
    Test import_data and find_by_template functions.
    :return:
    """
    print("Test import_data(which including add_rows(), and test find_by_tmp not use index. ")

    rows, cols = load("../CSVFile/People.csv")
    t = CSVDataTable.CSVDataTable(table_name="People", column_names=cols, primary_key_columns=['playerID'])
    t.import_data(rows)
    print("T = ", t)

    tmp = {'playerID': 'willite01'}

    start = time.time()
    for i in range(0, 1000):
        r = t.find_by_template(tmp, fields=None, use_index=False).get_rows()
        if i == 0:
            print("Row = ", r)

    end = time.time()
    elapsed = end - start
    print("Time : ", elapsed)

    t.save()
示例#13
0
def test_index_or_no_index():
    logging.debug("-------- test_index_or_no_index ---------")

    rows, cols = load(
        "/home/zhida/Desktop/Database/code/HW3/CSVFile/beach-weather-stations-automated-sensors-1.csv"
    )
    t = CSVDataTable.CSVDataTable(table_name="automated-sensors",
                                  column_names=cols,
                                  primary_key_columns=["Measurement ID"],
                                  loadit=None)
    t.import_data(rows=rows)
    t.add_index("Station Name", ["Station Name"], "INDEX")
    t.add_index("Wet Bulb Temperature", ["Wet Bulb Temperature"], "INDEX")
    print("\n\n\n")

    time1 = time.time()
    for i in range(1000):
        t.find_by_template(tmp={"Wet Bulb Temperature": '7.0'},
                           use_index=False,
                           debug_log=False)
    time2 = time.time()
    print("[-No index-] -> run find_by_template(), 1000 iterations\n")
    print("takes time = ", str(time2 - time1) + " second\n")

    time1 = time.time()
    for i in range(1000):
        t.find_by_template(tmp={"Wet Bulb Temperature": '7.0'},
                           use_index=True,
                           debug_log=False)
    time2 = time.time()
    print("[-Use index-] -> run find_by_template(), 1000 iterations\n")
    print("takes time = ", str(time2 - time1) + " second")
示例#14
0
def test_specific_project():
    rows, cols = load("CSVFile/People.csv")
    t=CSVDataTable.CSVDataTable(table_name = "People",column_names = cols, primary_key_columns=["playerID"])
    t.import_data(rows)
    print("T = ",t)

    p = ["playerID","People.nameLast","Batting.H"]
    print("Specific template = ", t._get_specific_project())
示例#15
0
def test3():
    i = CSVDataTable.Index(name="Bob",
                           columns=["Last_Name", "First_Name"],
                           kind="UNIQUE")
    r = {"Last_Name": "Ferguson", "First_Name": "Donald", "Uni": "sure"}
    kv = i.compute_key(r)
    print("KV = ", kv)
    i.add_to_index(row=r, rid="2")
    print("I = ", i)
示例#16
0
def t9():

    "Test delete function. Check if row id is correct after deletion. "
    rows, cols = load("../CSVFile/rings.csv")
    t = CSVDataTable.CSVDataTable(table_name="Rings", column_names=cols, primary_key_columns=['uni'])
    t.import_data(rows)
    r = t.delete({"last_name": "Baggins"})
    print("delete:", r)
    print("\n\nafter delete, T = ", t)
示例#17
0
def test_add_index():
    print("\n\n*************TEST SIX, CREATE WITH INDEX AND THEN ADD ANOTHER INDEX**************")
    rows, cols = load("CSVFile/offices.csv")
    t=CSVDataTable.CSVDataTable(table_name = "Offices",column_names = cols, primary_key_columns=["officeCode"])
    print("\n***ORIGINALLY ***\n")
    print(t)
    cols = ["city","state","postalCode"]
    t.add_index(name="location",column_list=cols,kind="INDEX")
    print("\n***ADDING ANOTHER INDEX, LOCATION ***\n")
    print(t)
示例#18
0
def test_incomplete_index_info():
    ###CHECK FOR not complete index info ADD_INDEX FUNCTION 
    print("\n\n*************TEST EIGHT, NOT ENOUGH INFORMATION TO CREATE INDEX THROWS AN ERROR**************")
    rows, cols = load("CSVFile/offices.csv")
    t=CSVDataTable.CSVDataTable(table_name = "Offices",column_names = cols, primary_key_columns=["officeCode"])
    cols = ["city","state","postalCode"]
    try:
        t.add_index("location",cols)
    except Exception as e:
        print("\n We caught an error, you did  not provide enough information to create the key" + "\n",e)
示例#19
0
def test_join():
    ###CHECK FOR not complete index info ADD_INDEX FUNCTION 
    print("\n\n*************TEST ELEVEN JOIN AND ALSO SEE THAT IT SWAPS TABLESX**************")
    rows, cols = load("CSVFile/People.csv")
    t=CSVDataTable.CSVDataTable(table_name = "People",column_names = cols, primary_key_columns=["playerID"])
    t.import_data(rows)
    print("T = ",t)

    rows, cols = load("CSVFile/Batting.csv")
    t2=CSVDataTable.CSVDataTable(table_name = "Batting",column_names = cols, primary_key_columns=["playerID","teamID","yearID","stint"])
    t2.import_data(rows)
    print("T2 = ",t2)

    j = t2.join(t, ['playerID'],
        w_clause ={"People.nameLast":"Williams", "People.birthCity": "San Diego"},
        p_clause =['playerID',"People.nameLast","People.nameFirst","Batting.teamID","Batting.yearID","Batting.stint","Batting.H","Batting.AB"]
        ,optimize = True)

    print("Result = " ,j)
    print("All rows = ", json.dumps(j._rows, indent=2))
示例#20
0
def t8():
    rows, cols = load(
        "/Users/wangxinquan/Desktop/2sem/W4111Database/xw2566_HW3/Data/offices.csv"
    )
    t = CSVDataTable.CSVDataTable(table_name="offices",
                                  column_names=cols,
                                  primary_key_columns=['officeCode'])

    t.import_data(rows)

    t.save()
示例#21
0
def test2():
    new_r, cols = load(
        "/Users/wangxinquan/Desktop/2sem/W4111Database/xw2566_HW3/Data/offices.csv"
    )
    t = CSVDataTable.CSVDataTable(table_name="offices.",
                                  column_names=cols,
                                  primary_key_columns=['officeCode'],
                                  loadit=None)

    t.import_data(new_r)
    print("t = ", t)
示例#22
0
def test_create_table():

    logging.debug("-------- test_create_table ---------")
    t = CSVDataTable.CSVDataTable(
        table_name="students",
        column_names=["uni", "first_name", "last_name"],
        primary_key_columns=["uni"],
        loadit=None)

    print(t)
    print("\n\n\n\n\n\n")
示例#23
0
def t9():
    rows, cols = load(
        "/Users/wangxinquan/Desktop/2sem/W4111Database/xw2566_HW3/Data/offices.csv"
    )
    t = CSVDataTable.CSVDataTable(table_name="offices",
                                  column_names=cols,
                                  primary_key_columns=['officeCode'])

    t.import_data(rows)

    r = t.delete_by_template({"officeCode": "11"})
    print(r)
示例#24
0
def test_load_data():

    logging.debug("-------- test_load_data ---------")
    rows, cols = load("/home/zhida/Desktop/Database/code/HW3/CSVFile/city.csv")
    t = CSVDataTable.CSVDataTable(table_name="city",
                                  column_names=cols,
                                  primary_key_columns=["id"],
                                  loadit=None)
    t.import_data(rows=rows)

    print("t = ", json.dumps(t._rows, indent=2))
    print("\n\n\n\n\n\n")
示例#25
0
def test_save_db2():

    logging.debug("-------- test_save_db ---------")
    rows, cols = load("/home/zhida/Desktop/Database/code/HW3/CSVFile/city.csv")
    t = CSVDataTable.CSVDataTable(table_name="city",
                                  column_names=cols,
                                  primary_key_columns=["id"],
                                  loadit=None)
    t.import_data(rows=rows)
    t.save()
    print("save DB success!\n")
    print("\n\n\n\n\n\n")
示例#26
0
def t3():
    rows, cols = load(
        "/Users/wangxinquan/Desktop/2sem/W4111Database/xw2566_HW3/Data/Batting.csv"
    )
    t2 = CSVDataTable.CSVDataTable(
        table_name="Batting",
        column_names=cols,
        primary_key_columns=['playerID', 'teamID', "yearID", 'stint'])
    t2.import_data(rows)
    print("T = ", t2)
    tmp = {"last_name": "Baggins", "first_name": "Bilbo"}
    print("Specific template = ", t2._get_specific_where(tmp))
示例#27
0
def test_find_by_template():

    cleanup()
    print_test_separator("Starting test_find_by_template")

    cat = CSVCatalog.CSVCatalog()
    cds = []

    cds = []
    cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text"))
    cds.append(CSVCatalog.ColumnDefinition("birthCity", "text"))
    cds.append(CSVCatalog.ColumnDefinition("birthCountry", "text"))
    cds.append(CSVCatalog.ColumnDefinition("throws", column_type="text"))

    t = cat.create_table("people", "/Users/Yan/Desktop/hw3/data/People.csv",
                         cds)
    t.define_index("id_idx", "INDEX", ['nameLast'])
    print("People table metadata = \n", json.dumps(t.describe_table(),
                                                   indent=2))

    people_tbl = CSVDataTable.CSVTable("people")
    print("Loaded people table = \n", people_tbl)

    tries = 1000
    start_time = time.time()
    templ = {"nameLast": "Williams"}
    print("Starting test on find using indexed field, templ = ",
          json.dumps(templ))
    for i in range(0, tries):
        result = people_tbl.find_by_template(
            templ, ['playerID', 'nameLast', 'nameFirst'])
        if i == 0:
            print("Sample result = ", json.dumps(result))
    end_time = time.time()
    print("Elapsed time for ", tries, "lookups = ", end_time - start_time)

    tries = 1000
    start_time = time.time()
    templ = {"nameFirst": "Ted"}
    print("\n\nStarting test on find using NON-indexed field, tmpl = ",
          json.dumps(templ))
    for i in range(0, tries):
        result = people_tbl.find_by_template(
            templ, ['playerID', 'nameLast', 'nameFirst'])
        if i == 0:
            print("Sample result = ", json.dumps(result))
    end_time = time.time()
    print("Elapsed time for ", tries, "lookups = ", end_time - start_time)

    print_test_separator("Complete test_find_by_template")
示例#28
0
def t3():

    """
    Test compute_index_value function.
    :return:
    """
    print("Test compute_index_value function.")
    rows, cols = load("../CSVFile/rings.csv")
    t = CSVDataTable.CSVDataTable(table_name="rings", column_names=cols, primary_key_columns=['uni'])
    t.import_data(rows)
    print("T = ", t)

    i = CSVDataTable.Index(name="Bob", table= t, columns=['last_name', 'first_name'], kind="UNIQUE")
    r = {"last_name": "Wei", "first_name": "Erica", "uni":"cw1177"}
    kv = i.compute_index_value(r)
    print(" KV = ", kv)
    i.add_to_index(row=r, rid="3")
    print(i)
    #print("Should return error here. Duplicate"), correct
    #i.add_to_index(row=r, rid="4")

    t.save()
示例#29
0
def test_compute_key_add_to_index():

    logging.debug("-------- test_compute_key_add_to_index ---------")
    i = CSVDataTable.Index(index_name="test",
                           index_columns=["last_name", "first_name"],
                           kind="INDEX")
    r = {"last_name": "zhang", "first_name": "zhida", "uni": "zz2578"}
    key = i.compute_key(r)
    print("key = ", key)

    i.add_to_index(r, "2")
    i.add_to_index(r, "3")
    print("i = ", i)
    print("\n\n\n\n\n\n")
示例#30
0
def t5():
    """
    Test get_specific_project function
    :return:
    """
    print("Test get_specific_project function.")

    rows, cols = load("../CSVFile/People.csv")
    t = CSVDataTable.CSVDataTable(table_name="People", column_names=cols, primary_key_columns=['playerID'])
    t.import_data(rows)
    print("T = ", t)

    p = ["playerID", "People.nameLast", "Batting.H"]
    print("Specific template = ", t._get_specific_project(p))