def __init__(self, name, num_columns, key): self.name = name self.key = key self.num_columns = num_columns self.page_directory = { } # Replace with index, and all references inside table and query with index API self.index = Index(self, self.num_columns) self.buffer_pool_range = BufferPoolRange(BUFFER_POOL_SIZE_RANGE, num_columns) # self.page_ranges = [] # self.page_ranges.append(PageRange(self.num_columns)) self.curr_page_range = 0 self.insertRecord([0] * num_columns)
def open(self, path): self.path = path if not os.path.isdir(path): os.mkdir(path) return for file in os.listdir(path): # print(file) t_path = self.path + '/' + str(file) + '/' + str(file) + '.pkl' # print(t_path) f = open(t_path, 'rb+') table = pickle.load(f) f.close() table.index = Index(table) table.buffer = Bufferpool(table) table.sem = threading.RLock() # obj.index.create_index(0) for i in range(table.num_index): index_path = self.path + '/' + file + '/table_index_col' + str(i) + '.txt' indexObj = open(index_path, 'r+') for line in indexObj.readlines(): line = line.split('_') for rid in line[1: -1]: table.index.insert(i, int(line[0]), rid) indexObj.close() # self.tables_directory.append(obj) self.append_table(table) # print(len(self.tables_directory)) self.num_table += 1
def __init__(self, name, num_columns, key, bufferpool): self.name = name self.key = key # Total columns = num_columns + 4 internal columns (RID, Indirection, Schema, Timestamp) self.total_columns = num_columns + 4 self.num_columns = num_columns self.page_directory = {} #{RID: (pageId, offset)} self.index_directory = {} # {Key: RID} self.index = Index(self) # index object # rid_counter keeps track of the current rid to avoid duplicates self.rid_counter = 0 # num_pages keeps track of the pageID we're currently adding to. Initially, this is one. self.num_page = 2 # Put all of the config constants into one variable self.config = init() # base_pages_internal is a list of page Id's that belong to the internal pages of a base record # base_page_external is a list of page Id's that belong to the external pages of a base record self.base_pages_internal = [1] self.base_pages_external = [2] # tail_pages is a list of pageId's that belong to tail pages. self.tail_pages = [0] # Every table in the database has access to the shared bufferpool object self.bufferpool = bufferpool # Implementing locks self.shared_locks = {} self.exclusive_locks = {}
def __init__(self, name, num_columns, key): self.name = name self.key = key self.num_columns = num_columns self.bufferpool = BufferPool(self.num_columns) #self.page_directory = {} self.basePage_dir = {} self.tailPage_dir = { } # Store tailRID: tailLocation, so that we can find a tail record self.tailRIDTOBaseRID = {} self.index = Index(self) self.num_PageRanges = 1 # baseRID and tailRID are initialized to 1, 0 is for deleted record self.baseRID = 1 self.tailRID = 1 #merge self.mergeQ = [] #self.deallocateQ = [] self.mergedCount = 0 thread = threading.Thread(target=self.merge, args=()) thread.daemon = True thread.start()
def __init__(self, name, num_columns, key): self.name = name self.key = key self.num_columns = num_columns self.page_directory = {} self.index = Index(self) pass
def __init__(self, name, num_columns, key): self.name = name self.key = key self.num_columns = num_columns self.page_directory = {} self.index = Index(self) self.pageRanges = [PageRange(self.num_columns)] self.keyToBaseRID = {} self.baseRID = 1 self.tailRID = 1 pass
def __init__(self, name, num_columns, Table_key, path): self.path = path self.name = name self.Table_key = Table_key self.num_columns = num_columns # self.prange_directory = {} #'col_num': 'page_range_list' self.page_directory = {} #'RID': 'record obj' self.origin_base_page_memory = [] # original unmerged and idk??? self.after_merge_base_page_memory = { } # the new copy that is being merged,idk??? self.buffer = Bufferpool(self) self.index = Index(self) self.prange_num = 0 self.free_brid = 0 self.free_trid = 0 self.rid_list = [] self.key_list = set() self.rif_trash = [] #?????idk self.merge_waiting_set = set() # storing rid which needs to be merged self.merge_times = 0 self.num_index = 0 self.sem = threading.RLock() self.merge_start()
def __init__(self, name, num_columns, key): self.name = name self.key = key self.num_columns = num_columns self.bufferpool = BufferPool(self.num_columns) #self.page_directory = {} self.tailPage_lib = {} # Store tailRID: tailLocation, so that we can find a tail record self.index = Index(self) self.num_PageRanges = 1 # baseRID and tailRID are initialized to 1, 0 is for deleted record self.baseRID = 1 self.tailRID = 1 #merge self.mergeQ = queue.Queue() self.deallocateQ = queue.Queue() '''
class Table: """ :param name: string #Table name :param num_columns: int #Number of Columns: all columns are integer :param key: int #Index of table key in columns """ def __init__(self, name, num_columns, key): self.name = name self.key = key self.num_columns = num_columns self.page_directory = { } # Replace with index, and all references inside table and query with index API self.index = Index(self, self.num_columns) self.buffer_pool_range = BufferPoolRange(BUFFER_POOL_SIZE_RANGE, num_columns) # self.page_ranges = [] # self.page_ranges.append(PageRange(self.num_columns)) self.curr_page_range = 0 self.insertRecord([0] * num_columns) # Future function to merge tail records into base records def __merge(self): # create deep copy of page range #for x offset in base page pass def __str__(self): return self.name def createIndex(self, column_num): self.index.create_index(column_num) def getIndex(self, column_num): return self.index.getIndex(column_num) def close(self): self.buffer_pool_range.evictAll() def getName(self): return self.name # Creates a new PageRange if needed, and appends it to page_ranges def newPageRange(self): # self.page_ranges.append(PageRange(self.num_columns)) # self.buffer_pool_range. self.curr_page_range = self.curr_page_range + 1 # Helper function for the translation of RID value to RID components def getOffset(self, rid): return rid % (PAGE_SIZE // COL_DATA_SIZE) # Helper function for the translation of RID value to RID components def getPageR(self, rid): return rid // ((BASE_CONST + TAIL_CONST) * (PAGE_SIZE // COL_DATA_SIZE)) # Helper function for the translation of RID value to RID components def getPageB(self, rid): return (rid // (PAGE_SIZE // COL_DATA_SIZE)) % (BASE_CONST + TAIL_CONST) # Helper function for the translation of RID components to RID value def getRID(self, pageR, pageB, offset): return (pageR * (BASE_CONST + TAIL_CONST) * (PAGE_SIZE // COL_DATA_SIZE)) + ( pageB * (PAGE_SIZE // COL_DATA_SIZE)) + offset # Helper function to find the value of the next RID before writing to basepages def nextBaseRid(self): # Calls for calculation of the first two RID components # #prerid = self.page_ranges[self.curr_page_range].nextBaseRid() prerid = self.buffer_pool_range.nextBaseRid_Pool(self.curr_page_range) # Calculates the last RID component and adds it together with the previous for the next base RID rid = self.curr_page_range * (BASE_CONST + TAIL_CONST) * ( PAGE_SIZE // COL_DATA_SIZE) + prerid return rid # Helper function to find the value of the next tail RID before writing to tail pages def nextTailRid(self, pageR): # Calls for calculation of the first two RID components # #prerid = self.page_ranges[self.curr_page_range].nextTailRid() prerid = self.buffer_pool_range.nextTailRid_Pool(pageR) # print("prerid:", prerid) # Calculates the last RID component and adds it together with the previous for the next tail RID rid = pageR * (BASE_CONST + TAIL_CONST) * (PAGE_SIZE // COL_DATA_SIZE) + prerid return rid # Helper function unique for this metadata scheme def formatCols(self, indir, rid, timestamp, schema, columns): format_cols = [] format_cols.append(indir) format_cols.append(rid) format_cols.append(timestamp) format_cols.append(schema) for index in range(self.num_columns): format_cols.append(columns[index]) return format_cols # Function to set the RID of a record to the invalid value def deleteRecord(self, rid): pageR = self.getPageR(rid) pageB = self.getPageB(rid) offset = self.getOffset(rid) # #self.page_ranges[pageR].deleteRecord(pageB, offset) self.buffer_pool_range.deleteRecord_Pool(pageR, pageB, offset) # Function to check the indirection value of a record before doing a full read def checkIndirection(self, rid): pageR = self.getPageR(rid) pageB = self.getPageB(rid) offset = self.getOffset(rid) # print(pageR, pageB, offset) # #indir = self.page_ranges[pageR].getIndirection(pageB, offset) indir = self.buffer_pool_range.getIndirection_Pool( pageR, pageB, offset) # print("checked indir", indir) if indir == 0: return rid else: return indir def readRecord(self, rid): # Gets the true rid for the most recent version of the data trueRID = self.checkIndirection(rid) # Does the math to calculate pageR, pageB, and offset for record retrieval pageR = self.getPageR(trueRID) pageB = self.getPageB(trueRID) offset = self.getOffset(trueRID) # print("RID:",rid) # print("TrueRID:",trueRID) # print("Reading: Rid=",rid," pageR=",pageR," pageB=",pageB," offset=",offset) # Retrieves record # #full_record = self.page_ranges[pageR].readBlock(pageB, offset) full_record = self.buffer_pool_range.readBlock_Pool( pageR, pageB, offset) # print("Full record:", full_record) if full_record[RID_COLUMN] == 0: return None data_record = full_record[len(full_record) - self.num_columns:] # print(full_record) ret_record = Record(rid, data_record[self.key], data_record) return ret_record def insertRecord(self, columns): # Check for room for base page, if not make more room # #if self.page_ranges[self.curr_page_range].hasCapacityBase() == False: if self.buffer_pool_range.hasCapacityBase_Pool( self.curr_page_range) == False: self.newPageRange() indir = 0 schema_encoding = 0 # '0' * self.num_columns cur_Time = 0 # time() base_rid = self.nextBaseRid() format_columns = self.formatCols(indir, base_rid, cur_Time, schema_encoding, columns) # print(format_columns) # #self.page_ranges[self.curr_page_range].writeBaseBlock(format_columns) self.buffer_pool_range.writeBaseBlock_Pool(self.curr_page_range, format_columns) self.index.insertPair(self.key, columns[self.key], base_rid) return base_rid def updateRecord(self, rid, columns): # Check for room for tail page, if not make more room # #if self.page_ranges[self.curr_page_range].hasCapacityTail() == False: if self.buffer_pool_range.hasCapacityTail_Pool( self.curr_page_range) == False: self.newPageRange() page_R = self.getPageR(rid) page_B = self.getPageB(rid) page_offset = self.getOffset(rid) # #prev_vers = self.page_ranges[page_R].getIndirection(page_B, page_offset) prev_vers = self.buffer_pool_range.getIndirection_Pool( page_R, page_B, page_offset) schema_encoding = 0 # '0' * self.num_columns currTime = 0 # time() tail_rid = self.nextTailRid(page_R) prev_record = self.readRecord(rid) if prev_record is None: return prev_columns = prev_record.getColumns() # print(prev_columns) new_columns = [] for index in range(self.num_columns): if type(columns[index]) == type(None): new_columns.append(prev_columns[index]) else: new_columns.append(columns[index]) # print(new_columns) # print("tail rid:", tail_rid) format_columns = self.formatCols(prev_vers, tail_rid, currTime, schema_encoding, new_columns) # print(format_columns) # #self.page_ranges[self.curr_page_range].writeTailBlock(format_columns) # val = self.buffer_pool_range.writeTailBlock_Pool(self.curr_page_range, format_columns) val = self.buffer_pool_range.writeTailBlock_Pool( page_R, format_columns) tail_rid2 = int(val) # print("written tail rid:", tail_rid2) # #self.page_ranges[page_R].editBlock(page_B, INDIRECTION_COLUMN, page_offset, tail_rid) self.buffer_pool_range.editBlock_Pool(page_R, page_B, INDIRECTION_COLUMN, page_offset, tail_rid)
from template.db import Database from template.query import Query from template.config import init from template.index import Index from random import choice, randint, sample, seed init() db = Database() db.open('./ECS165') grades_table = db.create_table('Grades', 5, 0) index = Index(grades_table) query = Query(grades_table) # repopulate with random data records = {} seed(3562901) for i in range(0, 1000): key = 92106429 + i records[key] = [ key, randint(0, 20), randint(0, 20), randint(0, 20), randint(0, 20) ] query.insert(*records[key]) keys = sorted(list(records.keys())) print("Insert finished")
class Table: """ :param name: string #Table name :param num_columns: int #Number of Columns: all columns are integer :param key: int #Index of table key in columns """ def __init__(self, name, num_columns, key): self.name = name self.key = key self.num_columns = num_columns self.page_directory = {} # Replace with index, and all references inside table and query with index API self.index = Index(self, self.num_columns) self.buffer_pool_range = BufferPoolRange(BUFFER_POOL_SIZE_RANGE, num_columns) # self.page_ranges = [] # self.page_ranges.append(PageRange(self.num_columns)) self.curr_page_range = 0 self.insertRecord([0] * num_columns) #self.merge_queue = Queue() self.lock = threading.Lock() self.lock_manager_rids = {} self.lock_manager_pageRanges = {} def __str__(self): return self.name def lockReadRid(self, rid): self.lock.acquire() val = self.lock_manager_rids.get(rid) if val is None: self.lock_manager_rids[rid] = 0 val = 0 else: pass if val >= 0: self.lock_manager_rids[rid] = val + 1 retval = True else: retval = False self.lock.release() return retval def unlockReadRid(self, rid): self.lock.acquire() val = self.lock_manager_rids.get(rid) if val is None: self.lock_manager_rids[rid] = 0 val = 0 else: pass if val > 0: self.lock_manager_rids[rid] = val - 1 retval = True else: retval = False self.lock.release() return retval def lockWriteRid(self, rid): self.lock.acquire() val = self.lock_manager_rids.get(rid) if val is None: self.lock_manager_rids[rid] = 0 val = 0 else: pass if val == 0: self.lock_manager_rids[rid] = val - 1 retval = True else: retval = False self.lock.release() return retval def unlockWriteRid(self, rid): self.lock.acquire() val = self.lock_manager_rids.get(rid) if val is None: self.lock_manager_rids[rid] = 0 val = 0 else: pass if val == -1: self.lock_manager_rids[rid] = val + 1 retval = True else: retval = False self.lock.release() return retval def lockWriteRange(self, pageR): self.lock.acquire() val = self.lock_manager_pageRanges.get(pageR) if val is None: self.lock_manager_pageRanges[pageR] = 0 val = 0 else: pass if val == 0: self.lock_manager_pageRanges[pageR] = val - 1 retval = True else: retval = False self.lock.release() return retval def unlockWriteRange(self, pageR): self.lock.acquire() val = self.lock_manager_pageRanges.get(pageR) if val is None: self.lock_manager_pageRanges[pageR] = 0 val = 0 else: pass if val == -1: self.lock_manager_pageRanges[pageR] = val + 1 retval = True else: retval = False self.lock.release() return retval # Future function to merge tail records into base records def merge(self, pageR): # create deep copy of page range deep_copy = self.buffer_pool_range.loadMerge(pageR) tail_blocks = deep_copy.tail_count - 1 #indexes clear for removal, shift constant new_copy = PageRange(self.num_columns) read_block = None # for all base blocks in deep_copy page range for pageB in range(0, deep_copy.max_base): # for all base records in a base block for offset in range(0, deep_copy.page_blocks[pageB].pages[0].num_records): # check if records is invalid read_block = [0] * deep_copy.page_blocks[0].total # print("Empty read block:", read_block) if deep_copy.getRID(pageB, offset) == 0: # empty record found, create empty record for writing pass else: # record found, check indir column for most up to date version indir = deep_copy.getIndirection(pageB, offset) if indir == 0: # version is up to date, copy over from original base record read_block = deep_copy.readBlock(pageB, offset) else: # version is not up to date, copy over from indir tail record inder_pageB = self.getPageB(indir) inder_offset = self.getOffset(indir) read_block = deep_copy.readBlock(inder_pageB, inder_offset) # set record indir column to the original indir value read_block[INDIRECTION_COLUMN] = indir # print("read_block:", read_block) checker = new_copy.hasCapacityBase() if checker: new_copy.writeBaseBlock(read_block) else: print("Unexpected error, new_copy at capacity") self.buffer_pool_range.submitMerge(pageR, new_copy, tail_blocks) ''' def enqueueMerge(self, pageR): self.merge_queue.enqueue def checkMerge(self): if self.merge_queue.empty(): pass else: # run merge next = self.merge_queue.get() self.__merge(next) pass ''' def createIndex(self, column_num): self.index.create_index(column_num) def getIndex(self, column_num): return self.index.getIndex(column_num) def close(self): self.buffer_pool_range.evictAll() self.buffer_pool_range.lock = None def getName(self): return self.name # Creates a new PageRange if needed, and appends it to page_ranges def newPageRange(self): # self.page_ranges.append(PageRange(self.num_columns)) # self.buffer_pool_range. self.curr_page_range = self.curr_page_range + 1 # Helper function for the translation of RID value to RID components def getOffset(self, rid): return rid % (PAGE_SIZE // COL_DATA_SIZE) # Helper function for the translation of RID value to RID components def getPageR(self, rid): return rid // ((BASE_CONST + TAIL_CONST) * (PAGE_SIZE // COL_DATA_SIZE)) # Helper function for the translation of RID value to RID components def getPageB(self, rid): return (rid // (PAGE_SIZE // COL_DATA_SIZE)) % (BASE_CONST + TAIL_CONST) # Helper function for the translation of RID components to RID value def getRID(self, pageR, pageB, offset): return (pageR * (BASE_CONST + TAIL_CONST) * (PAGE_SIZE // COL_DATA_SIZE)) + ( pageB * (PAGE_SIZE // COL_DATA_SIZE)) + offset # Helper function to find the value of the next RID before writing to basepages def nextBaseRid(self): # Calls for calculation of the first two RID components # #prerid = self.page_ranges[self.curr_page_range].nextBaseRid() prerid = self.buffer_pool_range.nextBaseRid_Pool(self.curr_page_range) # Calculates the last RID component and adds it together with the previous for the next base RID rid = self.curr_page_range * (BASE_CONST + TAIL_CONST) * (PAGE_SIZE // COL_DATA_SIZE) + prerid return rid # Helper function to find the value of the next tail RID before writing to tail pages def nextTailRid(self, pageR): # Calls for calculation of the first two RID components # #prerid = self.page_ranges[self.curr_page_range].nextTailRid() prerid = self.buffer_pool_range.nextTailRid_Pool(pageR) # print("prerid:", prerid) # Calculates the last RID component and adds it together with the previous for the next tail RID rid = pageR * (BASE_CONST + TAIL_CONST) * (PAGE_SIZE // COL_DATA_SIZE) + prerid return rid # Helper function unique for this metadata scheme def formatCols(self, indir, rid, timestamp, schema, columns): format_cols = [] format_cols.append(indir) format_cols.append(rid) format_cols.append(timestamp) format_cols.append(schema) for index in range(self.num_columns): format_cols.append(columns[index]) return format_cols # Function to set the RID of a record to the invalid value def deleteRecord(self, rid): pageR = self.getPageR(rid) pageB = self.getPageB(rid) offset = self.getOffset(rid) # #self.page_ranges[pageR].deleteRecord(pageB, offset) self.buffer_pool_range.deleteRecord_Pool(pageR, pageB, offset) # Function to check the indirection value of a record before doing a full read def checkIndirection(self, rid): pageR = self.getPageR(rid) pageB = self.getPageB(rid) offset = self.getOffset(rid) # print(pageR, pageB, offset) # #indir = self.page_ranges[pageR].getIndirection(pageB, offset) indir = self.buffer_pool_range.getIndirection_Pool(pageR, pageB, offset) # print("checked indir", indir) if indir == 0: return rid else: return indir def readFullRecord(self,rid): # Gets the true rid for the most recent version of the data trueRID = self.checkIndirection(rid) # Does the math to calculate pageR, pageB, and offset for record retrieval pageR = self.getPageR(trueRID) pageB = self.getPageB(trueRID) offset = self.getOffset(trueRID) # print("RID:",rid) # print("TrueRID:",trueRID) # print("Reading: Rid=",rid," pageR=",pageR," pageB=",pageB," offset=",offset) # Retrieves record # #full_record = self.page_ranges[pageR].readBlock(pageB, offset) full_record = self.buffer_pool_range.readBlock_Pool(pageR, pageB, offset) return full_record def readRecord(self, rid): # Gets the true rid for the most recent version of the data trueRID = self.checkIndirection(rid) # Does the math to calculate pageR, pageB, and offset for record retrieval pageR = self.getPageR(trueRID) pageB = self.getPageB(trueRID) offset = self.getOffset(trueRID) # print("RID:",rid) # print("TrueRID:",trueRID) # print("Reading: Rid=",rid," pageR=",pageR," pageB=",pageB," offset=",offset) # Retrieves record # #full_record = self.page_ranges[pageR].readBlock(pageB, offset) full_record = self.buffer_pool_range.readBlock_Pool(pageR, pageB, offset) # print("Full record:", full_record) if full_record[RID_COLUMN] == 0: return None data_record = full_record[len(full_record) - self.num_columns:] # print(full_record) ret_record = Record(rid, data_record[self.key], data_record) return ret_record def insertRecord(self, columns): # Check for room for base page, if not make more room # #if self.page_ranges[self.curr_page_range].hasCapacityBase() == False: if self.buffer_pool_range.hasCapacityBase_Pool(self.curr_page_range) == False: self.newPageRange() indir = 0 schema_encoding = 0 # '0' * self.num_columns cur_Time = 0 # time() base_rid = self.nextBaseRid() format_columns = self.formatCols(indir, base_rid, cur_Time, schema_encoding, columns) # print(format_columns) # #self.page_ranges[self.curr_page_range].writeBaseBlock(format_columns) self.buffer_pool_range.writeBaseBlock_Pool(self.curr_page_range, format_columns) self.index.insertPair(self.key, columns[self.key], base_rid) # Redundant to make sure we always have a current working page range if self.buffer_pool_range.hasCapacityBase_Pool(self.curr_page_range) == False: self.newPageRange() return base_rid def updateRecord(self, rid, columns): # Check for room for tail page, if not make more room # #if self.page_ranges[self.curr_page_range].hasCapacityTail() == False: # print("update record space check", self.buffer_pool_range.hasCapacityTail_Pool(self.curr_page_range)) if self.buffer_pool_range.hasCapacityTail_Pool(self.curr_page_range) == False: self.newPageRange() page_R = self.getPageR(rid) page_B = self.getPageB(rid) page_offset = self.getOffset(rid) # #prev_vers = self.page_ranges[page_R].getIndirection(page_B, page_offset) prev_vers = self.buffer_pool_range.getIndirection_Pool(page_R, page_B, page_offset) schema_encoding = 0 # '0' * self.num_columns currTime = 0 # time() tail_rid = self.nextTailRid(page_R) prev_record = self.readRecord(rid) if prev_record is None: return prev_columns = prev_record.getColumns() # print(prev_columns) new_columns = [] for index in range(self.num_columns): if type(columns[index]) == type(None): new_columns.append(prev_columns[index]) else: new_columns.append(columns[index]) # print(new_columns) # print("tail rid:", tail_rid) format_columns = self.formatCols(prev_vers, tail_rid, currTime, schema_encoding, new_columns) # print(format_columns) # #self.page_ranges[self.curr_page_range].writeTailBlock(format_columns) # val = self.buffer_pool_range.writeTailBlock_Pool(self.curr_page_range, format_columns) val = self.buffer_pool_range.writeTailBlock_Pool(page_R, format_columns) tail_rid2 = int(val) # print("written tail rid:", tail_rid2) # #self.page_ranges[page_R].editBlock(page_B, INDIRECTION_COLUMN, page_offset, tail_rid) self.buffer_pool_range.editBlock_Pool(page_R, page_B, INDIRECTION_COLUMN, page_offset, tail_rid)
class Table: """ :param name: string #Table name :param num_columns: int #Number of Columns: all columns are integer :param key: int #Index of table key in columns """ def __init__(self, name, num_columns, Table_key, path): self.path = path self.name = name self.Table_key = Table_key self.num_columns = num_columns # self.prange_directory = {} #'col_num': 'page_range_list' self.page_directory = {} #'RID': 'record obj' self.origin_base_page_memory = [] # original unmerged and idk??? self.after_merge_base_page_memory = { } # the new copy that is being merged,idk??? self.buffer = Bufferpool(self) self.index = Index(self) self.prange_num = 0 self.free_brid = 0 self.free_trid = 0 self.rid_list = [] self.key_list = set() self.rif_trash = [] #?????idk self.merge_waiting_set = set() # storing rid which needs to be merged self.merge_times = 0 self.num_index = 0 self.sem = threading.RLock() self.merge_start() # RIDs are shared in one table # once a record, take out one RID from the pool # MILESTONE1 never put RID back to pool # eazy to use BinarySearch in B-tree # TODO: remove hard code, and set it in config # self.total_RID = 100000000 # b_page and t_page will have seperate rids def merge_start(self): thread = threading.Thread(target=self.merge) thread.daemon = True thread.start() def next_free_rid(self, page_pos): if page_pos == 0: rid = 'b' + str(self.free_brid) # if rid >=100000000: # return -1 self.free_brid = self.free_brid + 1 if page_pos == 1: rid = 't' + str(self.free_trid) self.free_trid += 1 return rid """ Create page ranges for each columns (categories) Assign each page range with one base page with nothing in there, and write down their location to the page_directory For example: A table called UCD, which has 3 columns (categories): student name, year, grade. """ def get_prange_num(self): return self.prange_num # To Do: build index during looping def create(self): for i in range(self.num_columns + META_DATA_COL_NUM): b_page = Page(0) t_page = Page(0) prange = Prange(b_page, t_page, self.prange_num) # tree = Index(self) # self.index.append(tree) # self.prange_directory.update({i: [prange]}) self.buffer.load_prange(prange) # ZYW: add a return statement to return the prange? # def insert_meta_data(self, prange, record): # prange.b_page[record.page_pos]. def add_prange(self, column, times): for i in range(times): b_page = Page(0) t_page = Page(0) # print('adding prange' + str(self.prange_num)) prange = Prange(b_page, t_page, self.prange_num) # self.prange_directory.get(column).append(prange) self.buffer.load_prange(prange) def create_record(self, rid, indirect, value, data, first): # print('indirect', indirect) # if indirect == None: # print('empty', rid) record = Record(rid, indirect, value, data) if first == True: self.page_directory.update({rid: record}) self.rid_list.append(rid) return record # To Do: inset record to index def insert_record(self, *data): if data[0] in self.key_list: # print(data, 'key existed----') # self.sem.release() return False self.key_list.add(data[0]) record = None first = None rid = None prange_ = None meta_cols = [] read = False # print(data) # flag = False if len(self.buffer.pool) == 0: read = True self.sem.acquire() for i in range(self.num_columns + META_DATA_COL_NUM): if i == 0: first = True else: first = False prange_ = self.buffer.get_(i, self.prange_num, 'in') if prange_[0].b_page[-1].has_capacity() == True: prange_[0].dirty = True if i < self.num_columns: if first == True: rid = self.next_free_rid(0) if rid == None: # print('line 174 error: rid cannot be None') self.sem.release() return -1 record = self.create_record(rid, rid, data[i], data, first) Lock().addLock(LOCK_MUTEX, [record]) meta_cols = record.get_meta() # print(meta_cols) # prange = self.prange_directory.get(i)[-1] record.offset = prange_[0].b_page[-1].writeRecord( data[i]) record.page_pos = len(prange_[0].b_page) - 1 record.prange_pos = self.prange_num self.index.insert(i, data[i], record.rid) else: # prange = self.prange_directory.get(i)[-1] # print(record.rid) prange_[0].b_page[-1].writeRecord(data[i]) if self.index.indices[i] is not None: self.index.insert(i, data[i], record.rid) else: # print('pos',i,record.prange_pos,record.page_pos,meta_cols[i - self.num_columns]) # print(meta_cols[0]) # prange = self.prange_directory.get(i)[record.prange_pos] # prange = self.buffer.get_(i, record.prange_pos) prange_[0].b_page[record.page_pos].writeRecord( meta_cols[i - self.num_columns]) else: if self.insert_page_to(i) == -1: # print('i:',i) if first == True: # print('adding prange') # flag = True self.prange_num += 1 self.add_prange(i, self.num_columns + META_DATA_COL_NUM) # self.free_trid = 0 if i < self.num_columns: if first == True: rid = self.next_free_rid(0) record = self.create_record( rid, rid, data[i], data, first) Lock().addLock(LOCK_MUTEX, [record]) meta_cols = record.get_meta() # prange = self.prange_directory.get(i)[-1] # print(self.prange_num) prange_ = self.buffer.get_(i, self.prange_num, 'in') # prange_[0].isFull = True # print('228',prange_[0].b_page[0].num_records) record.offset = prange_[0].b_page[-1].writeRecord( data[i]) # print(self.buffer.pool[1][0][0].b_page[-1].num_records) # print('done') record.page_pos = len(prange_[0].b_page) - 1 record.prange_pos = self.prange_num self.index.insert(i, data[i], record.rid) else: # prange = self.prange_directory.get(i)[-1] prange_ = self.buffer.get_(i, self.prange_num, 'in') # prange_[0].isFull = True # print('236',prange_[0].b_page[0].num_records) prange_[0].b_page[-1].writeRecord(data[i]) if self.index.indices[i] is not None: self.index.insert(i, data[i], record.rid) else: # prange = # self.prange_directory.get(i)[record.prange_pos] prange_ = self.buffer.get_(i, self.prange_num, 'in') # print('242',prange_[0].b_page[0].num_records) # prange_[0].isFull = True prange_[0].b_page[record.page_pos].writeRecord( meta_cols[i - self.num_columns]) prange_[0].dirty = True # self.free_brid = 0 else: # print('i:',i) # self.free_trid = 0 prange_[0].dirty = True if i < self.num_columns: if first == True: rid = self.next_free_rid(0) record = self.create_record( rid, rid, data[i], data, first) Lock().addLock(LOCK_MUTEX, [record]) meta_cols = record.get_meta() # prange = self.prange_directory.get(i)[-1] # print('252',prange_[0].b_page[-1].num_records) record.offset = prange_[0].b_page[-1].writeRecord( data[i]) record.page_pos = len(prange_[0].b_page) - 1 record.prange_pos = self.prange_num self.index.insert(i, data[i], record.rid) else: # prange = self.prange_directory.get(i)[-1] prange_[0].b_page[-1].writeRecord(data[i]) if self.index.indices[i] is not None: self.index.insert(i, data[i], record.rid) else: # prange = # self.prange_directory.get(i)[record.prange_pos] # print('265',prange_[0].b_page[-1].num_records, 'page_pos', record.page_pos) prange_[0].b_page[record.page_pos].writeRecord( meta_cols[i - self.num_columns]) # Lock().releaseLock(LOCK_MUTEX, [record]) self.sem.release() return True def checkSchema(self, record, data): schema = '' for i in range(self.num_columns): # print(len(data)) if record.columns[i] == data[i] or data[i] == None: schema += '1' else: schema += '2' # print('243. schema', schema) return int(schema) def addtps(self, key, num): base_record = self.page_directory.get(key) tail_record = self.page_directory.get(base_record.indirect) tail_record.tps = base_record.update_num # if key does not exist then return false # To Do: update record to index def update_record(self, key, brid, *data, delete): # print(data) if len(self.buffer.buffer_bin) != 0: self.merge_start() # self.merge_times += 1 schema = None if key == None: print('empty key') data = list(data) self.sem.acquire() base_record = self.page_directory.get(brid) if Lock().check(LOCK_MUTEX, [base_record]) == False: self.sem.release() return False elif Lock().check(LOCK_MUTEX, [base_record]) == 'pass': pass else: Lock().addLock(LOCK_MUTEX, [base_record]) base_record.update_num += 1 rid = self.next_free_rid(1) ##print("update type = ", type(base_record)) # if base_record.indirect == None: # print('got none indirect', key) # return if base_record == None: print('record doesnt exist') # Lock().releaseLock(LOCK_MUTEX, [base_record]) self.sem.release() return False # get current prange position cur_prange_pos = base_record.prange_pos prev_record = self.page_directory.get(base_record.indirect) if prev_record.rid == None: print('None prev_record') # Lock().releaseLock(LOCK_MUTEX, [base_record]) self.sem.release() return False if delete == True: schema = int('1' * self.num_columns) else: schema = self.checkSchema(prev_record, data) data[0] = key cur_record = Record(rid, prev_record.indirect, key, data) cur_record.schema = schema meta_cols = cur_record.get_meta() # construct linked list base_record.indirect = cur_record.rid base_record.schema = schema brecord_meta = base_record.get_meta() self.sem.acquire() self.buffer.get_(self.num_columns + INDIRECTION_COLUMN, base_record.prange_pos, 'up')[0].b_page[base_record.page_pos].updateRecord( base_record.offset, brecord_meta[INDIRECTION_COLUMN]) self.buffer.get_(self.num_columns + SCHEMA_ENCODING_COLUMN, base_record.prange_pos, 'up')[0].b_page[base_record.page_pos].updateRecord( base_record.offset, schema) self.sem.release() cur_record.indirect = prev_record.rid self.page_directory.update({cur_record.rid: cur_record}) for i in range(self.num_columns + META_DATA_COL_NUM): prev_data = None if i < self.num_columns: prev_data = self.get_data(prev_record.rid, i, prev_record.prange_pos, prev_record.page_pos, prev_record.offset) if prev_data == '/' or prev_data == None: if data[i] != None and self.index.indices[i] is not None: self.index.update(i, None, data[i], base_record.rid) prev_data = None else: # print(i, prev_data, data[i]) if self.index.indices[i] is not None: self.index.update(i, prev_data, data[i], base_record.rid) # print('prev_data', prev_data) # self.index.update(i, prev_data, data[i], base_record.rid) data_ = data[i] # handle the case when the data is empty, we will emerge data # from previous record if data[i] == None: data_ = prev_data # handle the case when the data is always empty, we use '/' to # represent the final value if delete == True or data_ == '/': data_ = None # print('176',prev_record.rid, prev_record.prange_pos, # prev_record.page_pos, data_) prange_ = self.buffer.get_(i, cur_prange_pos, 'up') if prange_[0].t_page[-1].has_capacity(): if i < self.num_columns: # print(i) # prange = self.prange_directory.get(i)[cur_prange_pos] # print(data_) prange_[0].dirty = True cur_record.columns[i] = data_ cur_record.offset = prange_[0].t_page[-1].writeRecord( data_) cur_record.page_pos = len(prange_[0].t_page) - 1 cur_record.prange_pos = cur_prange_pos # if i == 0: # self.index.update(i, prev_data, data_, base_record.rid) else: # print('writing meta data', i , cur_prange_pos, cur_record.page_pos) # prange = self.prange_directory.get(i)[cur_prange_pos] prange_[0].t_page[cur_record.page_pos].writeRecord( meta_cols[i - self.num_columns]) else: if i < self.num_columns: prange_[0].dirty = True self.update_page_to(prange_) # prange = self.prange_directory.get(i)[cur_prange_pos] cur_record.columns[i] = data_ cur_record.offset = prange_[0].t_page[-1].writeRecord( data_) cur_record.page_pos = len(prange_[0].t_page) - 1 cur_record.prange_pos = cur_prange_pos # if i == 0: # self.index.update(i, prev_data, data_, base_record.rid) else: self.update_page_to(prange_) # print('writing meta data') # prange = self.prange_directory.get(i)[cur_prange_pos] prange_[0].t_page[cur_record.page_pos].writeRecord( meta_cols[i - self.num_columns]) self.merge_waiting_set.add(base_record.rid) self.addtps(brid, base_record.update_num) if delete == True: # self.rid_list.remove() # Lock().releaseLock(LOCK_MUTEX, [base_record]) self.sem.release() return cur_record # Lock().releaseLock(LOCK_MUTEX, [base_record]) self.sem.release() return True def insert_page_to(self, ith_column): # prange = self.prange_directory.get(ith_column)[-1] prange = self.buffer.get_(ith_column, self.prange_num, 'in') if prange[0].append_page(0) == -1: return -1 def get_rid_list(self): return self.rid_list def update_page_to(self, prange): # prange = self.prange_directory.get(ith_column)[ith_prange] # prange = self.buffer.get_(ith_column, ith_prange, 'up') prange[0].append_page(1) def get_data(self, rid, ith_col, prange_pos, page_pos, offset): if rid[0] == 'b': # page = # self.prange_directory.get(ith_col)[prange_pos].b_page[page_pos] page = self.buffer.get_(ith_col, prange_pos, 'up')[0].b_page[page_pos] page.pin = True return page.readRecord(offset) page = self.buffer.get_(ith_col, prange_pos, 'up')[0].t_page[page_pos] page.pin = True # page = self.prange_directory.get(ith_col)[prange_pos].t_page[page_pos] # print(page.readRecord(offset)) return page.readRecord(offset) def read_record(self, key): record = self.page_directory.get(key) if record == None: print('The key doesnt exist or empty key') if Lock().check(LOCK_SHARED, [record]) == False: return False else: Lock().addLock(LOCK_SHARED, [record]) rid = record.indirect # print(rid) record = self.page_directory.get(rid) offset = record.offset page_pos = record.page_pos prange_pos = record.prange_pos # print('record info', page_pos, prange_pos, offset) res = [] self.sem.acquire() for i in range(self.num_columns + META_DATA_COL_NUM): if rid[0] == 'b': # page = # self.prange_directory.get(i)[prange_pos]. page = self.buffer.get_(i, prange_pos, 're')[0].b_page[page_pos] data = page.readRecord(offset) # print(data) res.append(data) else: # print(offset) # page = # self.prange_directory.get(i)[prange_pos].t_page[page_pos] page = self.buffer.get_(i, prange_pos, 're')[0].t_page[page_pos] data = page.readRecord(offset) # print(data) res.append(data) # Lock().releaseLock(LOCK_SHARED, [record]) self.sem.release() return res def merge(self): # print('merge starting') #find lastest tail page # print(self.merge_waiting_set) while len(self.merge_waiting_set) != 0: rid = self.merge_waiting_set.pop() cpy_base_record = self.page_directory.get(rid) prange_list = self.buffer.findTrash(cpy_base_record.prange_pos) # print(cpy_base_record.prange_pos,'\n') if prange_list != -1: #print("type = ", type(base_record)) tail_record = self.page_directory.get(cpy_base_record.indirect) #merge data cpy_base_record.columns_ = tail_record.columns_ cpy_base_record.column = tail_record.column for i in range(1, self.num_columns): cpy_page = copy.copy( prange_list[i][0].b_page[cpy_base_record.page_pos]) cpy_page.updateRecord(cpy_base_record.offset, cpy_base_record.columns_[i]) prange_list[i][0].b_page[ cpy_base_record.page_pos] = cpy_page #update_tps cpy_base_record.tps = tail_record.tps self.page_directory.update({rid: cpy_base_record}) self.merge_times += 1 print('done')