Пример #1
0
def test3(intervals, n):
    count = 0
    # 1. Loading IB-Tree
    print "Loading IB-Tree..."
    # 2. Insert buckets into IB-Tree
    tree = IBTree()
    tree.readMetaData()
    # 2.1. Print IB-Tree
    print "Done!"
    #print "Printing IB-Tree..."
    #tree.printIBTree()

    # 3. Loading IB+-Tree
    print "Loading IB+-Tree"
    plusTree = IBPlusTree()

    plusTree.readMetaData()

    plusTree.setIBTree(tree)
    print "Done!"

    # 4. Query data for a given interval
    for i in range(0, n):
        listBuckets = ListBuckets()
        listTuples = ListTuples()
        tt = timer()
        tt.start()
        plusTree.search(listTuples, listBuckets, intervals[i])
        tt.end()
        # 5. Print result
        print intervals[i]
        print "Buckets (IB+-Tree): ", len(listBuckets.results)
        print "Time1: ", tt.resultInSecond
    #readDB(listBuckets.results, "ibPlusTreeDB.dat")
    return
Пример #2
0
def test1():
    count = 0
    # 1. Loading IB-Tree
    print "Loading IB-Tree..."
    # 2. Insert buckets into IB-Tree
    tree = IBTree()
    tree.readMetaData()
    # 2.1. Print IB-Tree
    print "Done!"
    #print "Printing IB-Tree..."
    #tree.printIBTree()

    # 3. Loading IB+-Tree
    print "Loading IB+-Tree"
    plusTree = IBPlusTree()

    plusTree.readMetaData()

    plusTree.setIBTree(tree)
    print "Done!"

    # 4. Query data for a given interval
    listBuckets1 = ListBuckets()
    listBuckets = ListBuckets()
    listTuples = ListTuples()
    plusTree.search(listTuples, listBuckets, [0, 0.15])
    # 5. Print result
    print "Buckets (IB+-Tree): ", listBuckets.results
    tree.search(listBuckets1, [0, 0.15])
    print "Buckets (IB-Tree): ", listBuckets1.results
    return
Пример #3
0
def run_test3():
    #Test IB-Tree
    #1. Read data from listBuckets_2.txt
    #2. Insert buckets into IB-Tree
    #3. Print IB-Tree
    #4. IB+-Tree = copy structure of IB-Tree
    #5. Print structure of IB+-Tree

    #1. Read data from listBuckets.txt
    anEntry = IBEntry()
    print "Reading data and inserting into IB-Tree..."
    fin = open("listBuckets_2.txt", "r")
    #2. Insert buckets into IB-Tree
    interval = [0.0 for x in range(2)]
    bucketID = 0
    tree = IBTree()
    count = 0
    number = input('Enter a number: ')
    for line in fin:
        tokens = line.split(' ')
        if (tokens[0] == "interval"):
            if (count >= number):
                break
            bucketID = int(tokens[1].replace(':', ''))
            interval[0] = float(tokens[2])
            interval[1] = float(tokens[3])
            #print "BucketID: " + str(bucketID) + "\t [" + str(interval[0]) + ", " + str(interval[1]) + "]"
            #print "Inserting the bucketID: ", bucketID
            tree.insertBucket(interval, bucketID)
            count += 1
    #3. Print IB-Tree
    print "Done!"
    print "Printing IB-Tree..."
    tree.printIBTree()
    fin.close()
    print "Number of buckets: " + str(bucketID + 1)
    #4. IB+-Tree = copy structure of IB-Tree
    plusTree = IBPlusTree(tree)
    plusTree.copyStructure(tree)
    #5. Print structure of IB+-Tree
    plusTree.printIBPlusTree(False)
    print "Finished!"
    return
def test_ImprintsIBTree(initInput, initNum, input, numInput, queryInterval,
                        startID):
    # 1. Read sorted data from InitImprintsIBTree_sorted_v3.dat file (around 1 000 000 rows = 1 000 buckets)
    listPT = [0.0 for x in range(0, Constants.IMPRINTS_NUM_PT)]
    ibTree = IBTree()
    finput = open(initInput, "r")
    counter = 0
    numRows = 0
    print("1. Read sorted data from InitImprintsIBTree_sorted_v3.dat")
    print("2 & 3. Build imprints and insert into IBTree")
    for line in finput:
        listValues = line.split('\'')
        lenList = len(listValues)
        nValue = int(lenList / 2)
        for i in range(0, nValue):
            pt = float(listValues[i * 2 + 1])
            listPT[numRows] = pt
            numRows += 1
            if (numRows == Constants.IMPRINTS_NUM_PT):
                # 2. Build Imprints for these data
                interval = imprintsBucket(listPT)
                # 3. Insert these data into IBTree
                ibTree.insertBucket(interval, startID)
                numRows = 0
                startID += 1
        counter += 1
        if (counter == initNum):
            break
    finput.close()
    print("4. Copy IBTree structure to IB+-Tree")
    # 4. Copy IBTree structure to IB+Tree
    ibPlusTree = IBPlusTree(ibTree)
    ibPlusTree.copyStructure(ibTree)
    print("5. Read random data from IniImprintsIBTree_random_v4.data file")
    print("6. Insert these data into IB+-Tree")
    # 5. Read random data from InitImprintsIBTree_random_v4.data file (around 1 000 000 rows = 1 000 buckets)
    fin = open(input, "r")
    counter = 0
    for line in fin:
        listValues = line.split('\'')
        lenList = len(listValues)
        nValue = int(lenList / 2)
        pts = []
        for i in range(0, nValue):
            pt = float(listValues[i * 2 + 1])
            pts.append(pt)
            #imprints = imprintsBucket(pts)
            imprintMark = imprintsHash(pts)  # skip at this moment
            # 6. Insert these data into IB+-Tree and thus IB-Tree
            ibPlusTree.insertTuple(imprintMark, str(pts))
        counter += 1
        if (counter == numInput):
            break
    fin.close()
    print("7. Run the queries and obtain the results")
    # 7. Run the queries to obtain the results
    listBuckets = ListBuckets()
    listTuples = ListTuples()
    ibPlusTree.searchImprintsAll(listTuples, listBuckets, queryInterval)
    print "Result: ", listBuckets.results
    ibTree.printEvalInfo()
    ibTree.evaluation.printEvalInfoToFile("ImprintsIBTree_output.txt")
    #ibPlusTree.printEvalInfo()
    #ibPlusTree.evaluation.printEvalInfoToFile()

    return Constants.FUNC_TRUE
Пример #5
0
def run_loadData_DB(sortedFile, numRows, numTuples):
    count = 0
    # 1. Read data from listBuckets_sorted_2.txt
    print "Reading data and inserting into IB-Tree..."
    fin1 = open(sortedFile, "r")
    # 2. Insert buckets into IB-Tree
    interval = [0.0 for x in range(2)]
    bucketID = 0
    tree = IBTree()
    count = 0
    if (numRows <= 0):
        number = input('Enter a number: ')
    else:
        number = numRows
    for line in fin1:
        tokens = line.split(' ')
        if (tokens[0] == "interval"):
            if (count >= number):
                break
            bucketID = int(tokens[1].replace(':', ''))
            interval[0] = float(tokens[2])
            interval[1] = float(tokens[3])
            tree.insertBucket(interval, bucketID)
            count += 1
    # 2.1. Print IB-Tree
    print "Done!"
    print "Printing IB-Tree..."
    tree.printIBTree()
    fin1.close()
    print "Number of buckets: " + str(bucketID + 1)

    # IB+-Tree
    # 3. IB+-Tree = copy structure of IB-Tree
    plusTree = IBPlusTree()
    plusTree.copyStructure(tree)

    #4. Read data from the database
    #5. Insert data into the IBPlusTree
    count = 0
    print "Connecting to the database..."
    try:
        conn = psycopg2.connect(
            "dbname='nyc-taxi-data' user='******' password='******' host='localhost' "
        )
        cursor = conn.cursor()
    except:
        print "Error: Cannot connect to the database!"
        return
    print "Connected!"

    start = 0
    end = 0
    size = 250000
    trace = 0
    while (end < numTuples):
        if ((numTuples - end) > size):
            end = start + size
        else:
            end = start + (numTuples - end)
        strQuery = "select * from trips where id >=" + str(
            start) + " and id <" + str(end)
        trace += 1
        if (trace % 10 == 0):
            print trace
        cursor.execute(strQuery)
        data = cursor.fetchall()
        for row in data:
            strValue = "%s\n" % ", ".join(map(str, row))
            key = float(row[12])
            plusTree.insertTuple(key, strValue)
        start = end
    #5. Write the metadata info file
    plusTree.flush()
    print "=================<<<<>>>>=================="
    plusTree.writeMetaData()

    #plusTree.readMetaData()

    #tree.writeMetaData()
    plusTree.ibTree.writeMetaData()

    #tree.readMetaData()

    return
Пример #6
0
def run_test4(inputNum=0, inputNum2=0):
    #Test IBPlus-Tree

    #1. Read data from listBuckets_sorted_2.txt
    print "Reading data and inserting into IB-Tree..."
    fin1 = open("listBuckets_sorted_2.txt", "r")
    #2. Insert buckets into IB-Tree
    interval = [0.0 for x in range(2)]
    bucketID = 0
    tree = IBTree()
    count = 0
    if (inputNum <= 0):
        number = input('Enter a number: ')
    else:
        number = inputNum
    for line in fin1:
        tokens = line.split(' ')
        if (tokens[0] == "interval"):
            if (count >= number):
                break
            bucketID = int(tokens[1].replace(':', ''))
            interval[0] = float(tokens[2])
            interval[1] = float(tokens[3])
            tree.insertBucket(interval, bucketID)
            count += 1
    #3. Print IB-Tree
    print "Done!"
    print "Printing IB-Tree..."
    tree.printIBTree()
    fin1.close()
    print "Number of buckets: " + str(bucketID + 1)

    #IB+-Tree
    #4. IB+-Tree = copy structure of IB-Tree
    plusTree = IBPlusTree(tree)
    plusTree.copyStructure(tree)
    ##5. Print structure of IB+-Tree
    #plusTree.printIBPlusTree(False)
    #6. Read tuples from listBuckets_2.txt
    print "Reading data and inserting into IB+-Tree..."
    fin2 = open("listBuckets_2.txt", "r")
    count2 = 0
    if (inputNum2 <= 0):
        numberIBPlus = input(
            'Please select number of tuples to be insert into IB+-Tree: ')
    else:
        numberIBPlus = inputNum2
    #7. Insert these tuples into IB+-Tree
    for line in fin2:
        tokens = line.split(' ')
        if ((tokens[0] == "interval") | (tokens[0] == "bucketID:")):
            continue
        if (re.match('^#.*$', line)):
            continue
        if (count2 >= numberIBPlus):
            break
        values = line.rstrip('\n').split(', ')
        key = Decimal(values[12])
        #print "Key: ", key
        #print "Tuple: ", values
        #print "Inserting tuple whose key " + str(key) + " into IB+-Tree..."
        plusTree.insertTuple(key, values)
        #print "Done!"
        count2 += 1

    print "Number of tuples: " + str(count2)
    ## 8. Print structure and all data in IB+-Tree
    #plusTree.printIBPlusTree(True)

    # 9. Query data for a given interval
    listBuckets1 = ListBuckets()
    listBuckets = ListBuckets()
    listTuples = ListTuples()
    plusTree.search(listTuples, listBuckets, [0, 0.15])
    # 10. Print result
    print "Buckets (IB+-Tree): ", listBuckets.results
    tree.search(listBuckets1, [0, 0.15])
    print "Buckets (IB-Tree): ", listBuckets1.results
    #strTemp = ""
    #for i in range(len(listTuples.results)):
    #    strTemp += str(listTuples.results[i][12]) + " "
    #    #print "Tuples: ", listTuples.results[i]
    #print "Keys: ", strTemp

    fin2.close()

    print "=================<<<<>>>>=================="
    plusTree.writeMetaData()

    plusTree.readMetaData()

    tree.writeMetaData()

    tree.readMetaData()

    #print "Printing IB-Tree..."
    #tree.printIBTree()

    #plusTree.ibTree.rootNode = tree.rootNode
    # 9. Query data for a given interval
    listBuckets1 = ListBuckets()
    listBuckets = ListBuckets()
    listTuples = ListTuples()
    plusTree.search(listTuples, listBuckets, [0, 0.15])
    # 10. Print result
    print "Buckets (IB+-Tree): ", listBuckets.results
    tree.search(listBuckets1, [0, 0.15])
    print "Buckets (IB-Tree): ", listBuckets1.results
    #strTemp = ""
    #for i in range(len(listTuples.results)):
    #    strTemp += str(listTuples.results[i][12]) + " "
    #    #print "Tuples: ", listTuples.results[i]
    #print "Keys: ", strTemp
    #tree.printEvalInfo()
    #tree.evaluation.printEvalInfoToFile(output)
    print "Finished!"
    return
Пример #7
0
def run_insertion_NYC(position):
    #1. Connect to database
    print "Connecting to the database..."
    try:
        conn = psycopg2.connect("dbname='nyc-taxi-data' user='******' password='******' host='localhost' ")
        cursor = conn.cursor()
    except:
        print "Error: Cannot connect to the nyc-taxi-data database!"
        return
    print "Connected!"

    #2. Initialize IB-Tree and IB+-Tree
    #2.1 Get data from postgresql DB (1 000 000 rows)
    strGetData = "select * from trips where id >= 1 and id <=1000000 order by trip_distance;"
    cursor.execute(strGetData)
    data = cursor.fetchall()
    #strInsert = "INSERT INTO trips_bucket (bucketID, bucketData) VALUES (%s, %s)"
    #2.2 Insert into IB-Tree and copy structure to IB+-Tree
    index = 0
    low = Constants.MAX_DISTANCE
    high = Constants.MIN_DISTANCE
    interval = [0.0 for x in range(2)]
    bucketID = 10
    tree = IBTree()
    temp = ""
    for row in data:
        index += 1
        distance = Decimal(row[12])
        if(low > distance):
            low = distance
        if(high < distance):
            high = distance
        for j in range(0,len(row)):
            temp += str(row[j])
        if(index == 1000):
            bucketID += 1
            interval[0] = float(low)
            interval[1] = float(high)
            # Insert into IB-Tree
            tree.insertBucket(interval, bucketID)
            # Write data into postgresql (not necessary)
            #cursor.execute(strInsert, (bucketID, temp))
            #conn.commit()
            index = 0
            low = Constants.MAX_DISTANCE
            high = Constants.MIN_DISTANCE
    #print "Printing IB-Tree..."
    #tree.printIBTree()
    #IB+-Tree = copy structure of IB-Tree
    plusTree = IBPlusTree(tree)
    plusTree.copyStructure(tree)
    print "Finish initializing IB-Tree and IB+-Tree. Start to evaluate (20 times)...\n"
    tree = None
    tree1 = IBTree()
    plusTree.ibTree = tree1
    #3. Repeat 20 times, measuring the insert performance
    t = timer()
    #3.1 Get data from postgresql database
    for j in range(0, 100):
        strGetData = "select * from trips where id >= " + str((position)*1000000 + j * 10000)
        strGetData += " and id < " + str((position)*1000000 + (j + 1)*10000) + ";"
        #print strGetData
        cursor.execute(strGetData)
        data = cursor.fetchall()
        #3.2 Insert into IB+-Tree and measure the time
        t.start()
        for row in data:
            key = Decimal(row[12])
            temp = ""
            for k in range(0, len(row)):
                temp += str(row[k])
            plusTree.insertTuple(key, temp)
        t.end()
        if(j % 10 == 0):
            print str(j)
    print str(position) + "th -- execution time: " + str(t.getResult()) + "(s)"
    print "Finished!"
Пример #8
0
def run_test(mapFile, inputfile, inputNum, inputNum2, queryInterval, output):
    print "Loading the mapFile..."
    fin1 = open(mapFile, 'r')
    numBucket = 0
    index = 0
    listCTID = ["" for x in range(0, 60000)]
    for line in fin1:
        values = line.split(',', 1)
        listCTID[index] = values[1]
        index += 1
    fin1.close()

    #1. Read data from inputfile
    print "Reading data and inserting into IB-Tree..."
    fin2 = open(inputfile, "r")
    #2. Insert buckets into IB-Tree
    interval = [0.0 for x in range(2)]
    bucketID = 0
    tree = IBTree()
    count = 0
    if (inputNum <= 0):
        number = input('Enter a number: ')
    else:
        number = inputNum
    for line in fin2:
        tokens = line.split(' ')
        if (tokens[0] == "interval"):
            if (count >= number):
                break
            #bucketID = int(tokens[1].replace(':', ''))
            interval[0] = float(tokens[2])
            interval[1] = float(tokens[3])
            #print "BucketID: " + str(bucketID) + "\t [" + str(interval[0]) + ", " + str(interval[1]) + "]"
            #print "Inserting the bucketID: ", bucketID
            tree.insertBucket(interval, bucketID)
            count += 1
            bucketID += 1
    #3. Print IB-Tree
    print "Done!"
    #print "Printing IB-Tree..."
    #tree.printIBTree()
    fin2.close()
    print "Number of buckets: " + str(bucketID + 1)

    #IB+-Tree
    #4. IB+-Tree = copy structure of IB-Tree
    plusTree = IBPlusTree(tree)
    plusTree.copyStructure(tree)
    plusTree.ibPlusBuffer.setStartPointBucketID(bucketID)
    #5. Print structure of IB+-Tree
    #plusTree.printIBPlusTree(False)
    #6. Read tuples from BigData_listMuonBuckets_100_200_2_1->23.txt
    count2 = 0
    if (inputNum2 <= 0):
        numberIBPlus = input(
            'Please select number of tuples to be insert into IB+-Tree: ')
    else:
        numberIBPlus = inputNum2
    isStop = False
    print "Reading data and inserting into IB+-Tree..."
    for i in range(1, 24):
        filename = "BigData_listMuonBuckets_100_200_2_" + str(i) + ".txt"
        if (isStop):
            break
        fin2 = open(filename, "r")
        #7. Insert these tuples into IB+-Tree
        for line in fin2:
            tokens = line.split(' ')
            if ((tokens[0] == "interval") | (tokens[0] == "bucketID:")):
                continue
            if (re.match('^#.*$', line)):
                continue
            if (count2 >= numberIBPlus):
                isStop = True
                break
            values1 = line.rstrip('\n').split(', ')
            if (len(values1) < 5):
                #print "##############Warning:", values1
                continue
            for j in range(0, len(values1)):
                if (values1[j].find("u\'pt\'") != -1):
                    temp = values1[j].split(":")
                    key = Decimal(temp[1])
                    break
            plusTree.insertTuple(key, values1)
            #print "Done!"
            count2 += 1
        fin2.close()

    print "Number of tuples: " + str(count2)
    # 8. Print structure and all data in IB+-Tree
    #plusTree.printIBPlusTree(True)
    # 9. Query data for a given interval

    #runQuery(plusTree, tree, [3.0, 3.3675], listCTID, output)
    #runQuery(plusTree, tree, [3.0, 3.735], listCTID, output)
    #runQuery(plusTree, tree, [3.0, 4.47], listCTID, output)
    #runQuery(plusTree, tree, [3.0, 5.94], listCTID, output)
    #runQuery(plusTree, tree, [3.0, 10.35], listCTID, output)
    #10. Connect to DB
    print "Connecting to the database..."
    try:
        conn = psycopg2.connect(
            "dbname='BigDataTest' user='******' password='******' host='localhost' "
        )
        cursor = conn.cursor()
    except:
        print "Error: Cannot connect to the database!"
        return
    print "Connected!"

    listBuckets = ListBuckets()
    listTuples = ListTuples()
    timeCalculator = timer()
    timeCalculator.start()
    plusTree.search(listTuples, listBuckets, queryInterval)
    resultLength = len(listBuckets.results)
    readQuery = ""
    if (resultLength >= 1):
        readQuery = "select * from MuonBucket where "
        tempID = int(listBuckets.results[0])
        tempCTID = listCTID[tempID].rstrip('\n')
        strAdd = "CTID = \'" + str(tempCTID) + "\' "
        readQuery += strAdd
        for i in range(1, resultLength):
            tempID = int(listBuckets.results[i])
            tempCTID = listCTID[tempID].rstrip('\n')
            if (tempCTID == ''):
                continue
            strAdd = "or CTID = \'" + str(tempCTID) + "\' "
            readQuery += strAdd
        cursor.execute(readQuery)
        bufferData = cursor.fetchall()

    timeCalculator.end()

    # 10. Print result
    tempString = "Buckets: " + str(listBuckets.results)
    tempString += "\nList Bucket's length: " + str(resultLength)
    tempString += "\nList Tuple's length: " + str(len(listTuples.results))
    tempString += "\nExecution time (s): " + str(
        timeCalculator.getResult()) + "\n"
    print tempString
    tree.printEvalInfo()
    tree.evaluation.printEvalInfoToFile(output)

    fout2 = open(output, 'a+')
    fout2.write(tempString)
    fout2.write(readQuery)
    fout2.close()
    print "Finished!"

    return
Пример #9
0
def run_test4():
    #Test IB-Tree
    #1. Read data from listBuckets_2.txt
    #2. Insert buckets into IB-Tree
    #3. Print IB-Tree
    #4. IB+-Tree = copy structure of IB-Tree
    #5. Print structure of IB+-Tree
    #6. Continue to read tuples from list_Buckets_2.txt
    #7. Insert these tuples into IB+-Tree
    #8. Print structure and all data in IB+-Tree
    #9. Query data for a given interval
    #10. Print result
    ########################################################
    #1. Read data from listBuckets.txt
    anEntry = IBEntry()
    print "Reading data and inserting into IB-Tree..."
    fin = open("listBuckets_2.txt", "r")
    #2. Insert buckets into IB-Tree
    interval = [0.0 for x in range(2)]
    bucketID = 0
    tree = IBTree()
    countIB = 0
    number = input('Select the number of buckets in IB-Tree: ')
    for line in fin:
        tokens = line.split(' ')
        if (tokens[0] == "interval"):
            if (countIB >= number):
                break
            bucketID = int(tokens[1].replace(':', ''))
            interval[0] = float(tokens[2])
            interval[1] = float(tokens[3])
            #print "BucketID: " + str(bucketID) + "\t [" + str(interval[0]) + ", " + str(interval[1]) + "]"
            #print "Inserting the bucketID: ", bucketID
            tree.insertBucket(interval, bucketID)
            countIB += 1
    #3. Print IB-Tree
    print "Done!"
    print "Printing IB-Tree..."
    tree.printIBTree()
    print "Number of buckets: " + str(bucketID + 1)
    #4. IB+-Tree = copy structure of IB-Tree
    plusTree = IBPlusTree(tree)
    plusTree.copyStructure(tree)
    #5. Print structure of IB+-Tree
    plusTree.printIBPlusTree(False)
    #6. Continue to read tuples from list_Buckets_2.txt
    #7. Insert these tuples into IB+-Tree
    countIBPlus = 0
    numberIBPlus = input(
        'Select the number of tuples to insert into IB+-Tree: ')
    for line in fin:  #Continue to read tuples from file
        tokens = line.split(' ')
        if ((tokens[0] == "interval") | (tokens[0] == "bucketID:")):
            continue
        if (re.match('^#.*$', line)):
            continue
        if (countIBPlus >= numberIBPlus):
            break
        #print "Line: ", line
        values = line.rstrip('\n').split(', ')
        key = Decimal(values[12])
        print "Key: ", key
        print "Tuple: ", values
        print "Inserting tuple whose key " + str(key) + " into IB+-Tree..."
        plusTree.insertTuple(key, values)
        print "Done!"
        countIBPlus += 1
    print "Number of tuples: " + str(countIBPlus)
    #8. Print structure and all data in IB+-Tree
    plusTree.printIBPlusTree(True)
    #9. Query data for a given interval
    listBuckets = ListBuckets()
    listTuples = ListTuples()
    plusTree.search(listTuples, listBuckets, [0.0, 0.5])
    #10. Print result
    print "Buckets: ", listBuckets.results
    strTemp = ""
    for i in range(len(listTuples.results)):
        strTemp += str(listTuples.results[i][12]) + " "
        print "Tuples: ", listTuples.results[i]
    print "Keys: ", strTemp
    #print "Created Tuples: " + str(plusTree.ibPlusBuffer.createdTuples)
    #print "Released Tuples: " + str(plusTree.ibPlusBuffer.releasedTuples)
    #print "Moved Tuples: " + str(plusTree.ibPlusBuffer.movedTuples)
    fin.close()
    print "Finished!"
    return
Пример #10
0
def buildOutputIBPlusLayout(inputNum=0, start=0, inputNum2=0):
    #Test IBPlus-Tree

    #1. Read data from listBuckets_sorted_2.txt
    print "Reading data and inserting into IB-Tree..."
    fin1 = open("listBuckets_sorted_4.txt", "r")
    #2. Insert buckets into IB-Tree
    interval = [0.0 for x in range(2)]
    bucketID = 0
    tree = IBTree()
    count = 0
    if(inputNum <= 0):
        number = input('Enter a number: ')
    else:
        number = inputNum

    numTuple = 0
    min = 0.0
    max = 0.0
    for line in fin1:
        tokens = line.split(' ')
        if(count >= number): #reach limitation of number of bucket
            break
        if(re.match('^#.*$', line)):
            continue
        if((tokens[0] == "interval") | (tokens[0] == "bucketID:")):
            continue
        values = line.rstrip('\n').split(', ')
        value = Decimal(values[12])
        if(numTuple == 0):
            min = max = value
            numTuple += 1
        else:
            if(min > value):
                min = value
            if(max < value):
                max = value
            numTuple += 1
            if(numTuple == 1000):
                interval[0] = min
                interval[1] = max
                tree.insertBucket(interval, bucketID)
                bucketID += 1
                numTuple = 0
                min = max = 0.0
                count += 1
    #3. Print IB-Tree
    print "Done!"
    #print "Printing IB-Tree..."
    #tree.printIBTree()
    fin1.close()
    print "Number of buckets: " + str(bucketID)

    #IB+-Tree
    #4. IB+-Tree = copy structure of IB-Tree
    plusTree = IBPlusTree(tree)
    plusTree.copyStructure(tree)
    ##5. Print structure of IB+-Tree
    #plusTree.printIBPlusTree(False)
    #6. Read tuples from listBuckets_2.txt
    print "Reading data and inserting into IB+-Tree..."
    fin2 = open("listBuckets_random_3.txt", "r")
    count2 = 0
    if(inputNum2 <= 0):
        numberIBPlus = input('Please select number of tuples to be insert into IB+-Tree: ')
    else:
        numberIBPlus = inputNum2
    #7. Insert these tuples into IB+-Tree
    cStart = 0
    for line in fin2:
        tokens = line.split(' ')
        if(count2 >= numberIBPlus):
            break
        if(re.match('^#.*$', line)):
            continue
        if((tokens[0] != "interval") & (tokens[0] != "bucketID:")):
            cStart += 1
            if(cStart < start):
                continue
            values = line.rstrip('\n').split(', ')
            value = Decimal(values[12])
            #tuple = Tuple()
            #tuple.key = value
            #tuple.data = [value]
            plusTree.insertTuple(value, [value])
            count2 += 1

    print "Number of tuples: " + str(count2)
    ## 8. Print structure and all data in IB+-Tree
    #plusTree.printIBPlusTree(True)
    fin2.close()
    plusTree.flush(tree)
    print "Finished!"
    return
Пример #11
0
def run_test(output, queryInterval, inputNum=50000, inputNum2=50000):
    #Test IBPlus-Tree

    #1. Read data from BigData_listBuckets_sorted.txt
    print "Reading data and inserting into IB-Tree..."
    fin1 = open("BigData_listBuckets_sorted.txt", "r")
    #2. Insert buckets into IB-Tree
    interval = [0.0 for x in range(2)]
    bucketID = 0
    tree = IBTree()
    count = 0
    if (inputNum <= 0):
        number = input(
            'Select the number of buckets to be inserted into IB-Tree: ')
    else:
        number = inputNum
    for line in fin1:
        tokens = line.split(' ')
        if (tokens[0] == "interval"):
            if (count >= number):
                break
            bucketID = int(tokens[1].replace(':', ''))
            interval[0] = float(tokens[2])
            interval[1] = float(tokens[3])
            tree.insertBucket(interval, bucketID)
            count += 1
    #3. Print IB-Tree
    print "Done!"
    print "Printing IB-Tree..."
    tree.printIBTree()
    fin1.close()
    print "Number of buckets: " + str(bucketID + 1)

    #IB+-Tree
    #4. IB+-Tree = copy structure of IB-Tree
    plusTree = IBPlusTree(tree)
    plusTree.copyStructure(tree)
    #5. Print structure of IB+-Tree
    plusTree.printIBPlusTree(False)
    #6. Read tuples from BigData_listMuonBuckets_100_200_2_5->10.txt
    print "Reading data and inserting into IB+-Tree..."
    count2 = 0
    if (inputNum2 <= 0):
        numberIBPlus = input(
            'Please select number of tuples to be insert into IB+-Tree: ')
    else:
        numberIBPlus = inputNum2
    for j in range(5, 11):
        filename = "BigData_listMuonBuckets_100_200_2_" + str(j) + ".txt"
        fin2 = open(filename, "r")
        #7. Insert these tuples into IB+-Tree
        for line in fin2:
            tokens = line.split(' ')
            if ((tokens[0] == "interval") | (tokens[0] == "bucketID:")):
                continue
            if (re.match('^#.*$', line)):
                continue
            if (count2 >= numberIBPlus):
                break
            values = line.rstrip('\n').split(', ')
            for i in range(0, len(values)):
                if (values[i].find("u\'pt\'") != -1):
                    temp = values[i].split(":")
                    key = Decimal(temp[1])
                    break
            #key = Decimal(values[12])
            plusTree.insertTuple(key, values)
            count2 += 1
        fin2.close()
    print "Number of tuples: " + str(count2)
    # 8. Print structure and all data in IB+-Tree
    #plusTree.printIBPlusTree(True)
    # 9. Query data for a given interval
    listBuckets = ListBuckets()
    listTuples = ListTuples()
    plusTree.search(listTuples, listBuckets, queryInterval)
    # 10. Print result
    print "Buckets: ", listBuckets.results
    strTemp = ""
    #for i in range(len(listTuples.results)):
    #    strTemp += str(listTuples.results[i][12]) + " "
    #    print "Tuples: ", listTuples.results[i]
    #print "Keys: ", strTemp

    tree.printEvalInfo()
    tree.evaluation.printEvalInfoToFile(output)
    print "Finished!"
    return