def __init__(self, name, num_columns, key): self.name = name self.key = key self.num_columns = num_columns # TODO: invalid input -> columns > MAX_COLUMNS self.index = Index(self) self.num_updates = 0 self.num_records = 0 self.merge_pid = None self.merged_record = {}
def create_table(self, name, num_columns, key): expanded = os.path.expanduser(self.filename) db_file = open(expanded, "rb") db = pickle.load(db_file) db_file.close() self.tables.append( Table(name, num_columns, key, self.bp, Index(num_columns))) return self.tables[len(self.tables) - 1]
def __init__(self, name, key=None, num_columns=None): self.name = name self.key = key self.num_columns = num_columns self.pageranges = {} self.index = Index(self) if (key != None and num_columns != None): # create table from scratch self.index.set_width(self.num_columns) self.current_Rid_base = 1 self.current_Rid_tail = 2**64 - 2 self.current_Prid = 0 self.total_base_phys_pages = num_columns + NUM_METADATA_COLUMNS self.total_tail_phys_pages = num_columns + NUM_METADATA_COLUMNS # create the first empty page range self.pageranges[0] = PageRange(self.name, 0, self.num_columns, True) else: # table will have to be initialized manually self.current_Rid_base = None self.current_Rid_tail = None self.current_Prid = None self.total_base_phys_pages = None self.total_tail_phys_pages = None
def __init__(self, name, num_columns, key, path, next_rid=START_RID): self.name = name self.key = key self.num_columns = num_columns self.index = Index(self) self._next_rid = next_rid self.path = Path(path) self.page_directory = PageDirectory() self._latch = Lock() self._page_ranges = [] self.merge_queue = queue.Queue() self.ranges_inside_queue = set() self._merge_thread = threading.Thread(target=self._merge, daemon=True) self._merge_thread.start()
def __init__(self, table): self.table = table if self.table.index is None: # self.table.index = Index(self.table.num_columns, table.bp) self.table.index = Index(self.table.num_columns)
def __init__(self, name, key, num_columns, bufferpool, latest_range_index, base_current_rid, tail_current_rid, tail_tracker, merge_tracker, base_tracker, method='create', verbose=False): self.name = name self.key = key self.num_columns = num_columns self.page_directory = { } # dictionary that maps rid to (range #, page_set #, offset) self.key_directory = { } # dictionary that maps key to (range #, page_set #, offset) self.index = Index(self) self.latest_range_index = latest_range_index self.bufferpool = bufferpool self.base_current_rid = base_current_rid self.tail_current_rid = tail_current_rid self.tail_tracker = tail_tracker self.merge_tracker = merge_tracker self.base_tracker = base_tracker self.pd_lock = threading.Lock() self.verbose = verbose self.lock = LockManager() self.lm_lock = threading.Lock() self.tt_lock = threading.Lock() self.base_rid_lock = threading.Lock() self.tail_rid_lock = threading.Lock() if method == 'create': if not os.path.exists(os.getcwd() + "/" + name): os.makedirs(name) os.chdir(name) if method == 'get': pgdir_file = os.getcwd() + "/pgdir.json" if not os.path.exists(pgdir_file): file = open(pgdir_file, "w+") file.close() else: with open(pgdir_file, "rb") as fp: pgdir_data = json.loads(fp.read()) self.page_directory = { int(k): v for k, v in pgdir_data.items() } fp.close() keydir_file = os.getcwd() + "/keydir.json" if not os.path.exists(keydir_file): file = open(keydir_file, "w+") file.close() else: with open(keydir_file, "rb") as fp: key_data = json.loads(fp.read()) self.key_directory = { int(k): v for k, v in key_data.items() } fp.close() pri_key_file = os.getcwd() + '/pri_index.json' if not os.path.exists(pri_key_file): file = open(pri_key_file, 'w+') file.close() else: with open(pri_key_file, 'rb') as fp: pri_key_data = json.loads(fp.read()) self.index.indexes[Config.NUM_META_COLS + self.key] = { int(k): v for k, v in pri_key_data.items() } fp.close() # Background thread stuff self.interval = Config.MERGE_INTERVAL self.thread = threading.Thread(target=self.__merge, args=()) self.thread.daemon = True self.thread.start()
def __init__(self, table): self.table = table self.index = Index(self.table) pass
class Query: # Creates a Query object that can perform different queries on the specified table def __init__(self, table): self.table = table self.index = Index(self.table) pass # internal Method # Read a record with specified RID # Returns True upon succesful deletion # Return False if record doesn't exist or is locked due to 2PL def delete(self, key): rid = self.index.locate(key, self.table.key)[0] self.table.__delete__(rid) self.index.drop_index(key) return True, self.table, rid # Insert a record with specified columns # Return True upon succesful insertion # Returns False if insert fails for whatever reason def insert(self, *columns): base_rid = 0 timestamp = process_time() timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S') indirection_index = 0 key_index = self.table.key rid = self.table.base_RID columns = [indirection_index, rid, timestamp, base_rid] + list(columns) self.table.__insert__(columns) #table insert self.index.add_index(rid, columns[lstore.config.Offset:]) self.table.base_RID += 1 # Insert is not being tested so might not need this statement return True, self.table, base_rid # Read a record with specified key # Returns a list of Record objects upon success # Returns False if record locked by TPL # Assume that select will never be called on a key that doesn't exist def select(self, key, column, query_columns): thread_lock = threading.RLock() thread_lock.acquire() entries = self.index.locate(key, column) thread_lock.release() rids = [] thread_lock.acquire() for rid in entries: #2PL: acquire shared locks if len(entries) == 0: print( "select returned false because it couldn't locate the key value" ) thread_lock.release() return False, self.table, rid #return false to the transaction class if rid not found or abort because of locks # T F - thread has write lock, # F T - write lock is zero so can get read lock, T T - write lock held by someon else if self.table.acquire_read(rid) == False: print( "select returned false because of locking error: tid is " + str(threading.get_ident()) + "outstanding_write rid is" + str(rid)) thread_lock.release() return False, self.table, rid else: pass #print("read has been acquired") rids.append(rid) thread_lock.release() result = [] for i in range(len(rids)): thread_lock.acquire() result.append(self.table.__read__(rids[i], query_columns)) thread_lock.release() return result, self.table, None #TODO: inspect this later, might be a faulty way of returning the last value # Update a record with specified key and columns # Returns True if update is succesful # Returns False if no records exist with given key or if the target record cannot be accessed due to 2PL locking def update(self, key, *columns): thread_lock = threading.RLock() timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S') indirection_index = 0 thread_lock.acquire() rid = self.table.tail_RID self.table.tail_RID -= 1 thread_lock.release() old_columns = self.select( key, self.table.key, [1] * self.table.num_columns )[0][ 0].columns #get every column and compare to the new one: cumulative update new_columns = list(columns) old_rid = self.index.locate( key, self.table.key)[0] #get the IndexEntry of the old key val #2PL: acquire exlcusive locks thread_lock.acquire() if self.table.acquire_write(rid) == False or self.table.acquire_write( old_rid) == False: thread_lock.release() return False, self.table, old_rid #return false to the transaction class if rid not found or abort thread_lock.release() compared_cols = compare_cols(old_columns, new_columns) columns = [indirection_index, rid, timestamp, old_rid] + compared_cols self.table.__update__(columns, old_rid) #add record to tail pages old_indirection = self.table.__return_base_indirection__( old_rid) #base record, do not update index only insert thread_lock.acquire() self.table.__update_indirection__( rid, old_indirection) #tail record gets base record's indirection index self.table.__update_indirection__( old_rid, rid) #base record's indirection column gets latest update RID self.index.update_index(old_rid, compared_cols) thread_lock.release() return True, self.table, old_rid """ :param start_range: int # Start of the key range to aggregate :param end_range: int # End of the key range to aggregate :param aggregate_columns: int # Index of desired column to aggregate """ # Returns the summation of the given range upon success # Returns False if no record exists in the given range def sum(self, start_range, end_range, aggregate_column_index): result = 0 for key in range(start_range, end_range + 1): temp_record = (self.select(key, self.table.key, [1] * self.table.num_columns)[0]) if temp_record == False: return False, self.table if temp_record == -1 or len(temp_record) == 0: continue result += temp_record[0].columns[aggregate_column_index] return result, self.table """ incremenets one column of the record this implementation should work if your select and update queries already work :param key: the primary of key of the record to increment :param column: the column to increment # Returns True is increment is successful # Returns False if no record matches key or if target record is locked by 2PL. """ def increment(self, key, column): r, _, _ = self.select(key, self.table.key, [1] * self.table.num_columns) if r is not False and r[0].rid is not False: updated_columns = [None] * self.table.num_columns updated_columns[column] = r[0].columns[column] + 1 u, table, rid = self.update(key, *updated_columns) return u, table, rid return False, self.table, None #TODO: check this!!!!!!
class Table: """ :param name: string #Table name :param num_columns: int #Number of Columns: all columns are integer :param key: int #Index of table key in columns """ def __init__(self, name, key=None, num_columns=None): self.name = name self.key = key self.num_columns = num_columns self.pageranges = {} self.index = Index(self) if (key != None and num_columns != None): # create table from scratch self.index.set_width(self.num_columns) self.current_Rid_base = 1 self.current_Rid_tail = 2**64 - 2 self.current_Prid = 0 self.total_base_phys_pages = num_columns + NUM_METADATA_COLUMNS self.total_tail_phys_pages = num_columns + NUM_METADATA_COLUMNS # create the first empty page range self.pageranges[0] = PageRange(self.name, 0, self.num_columns, True) else: # table will have to be initialized manually self.current_Rid_base = None self.current_Rid_tail = None self.current_Prid = None self.total_base_phys_pages = None self.total_tail_phys_pages = None def set_table_metadata(self, primary_key, num_user_columns, current_base_rid, current_tail_rid, current_prid): self.key = primary_key self.num_columns = num_user_columns self.index.set_width(self.num_columns) self.current_Rid_base = current_base_rid self.current_Rid_tail = current_tail_rid self.current_Prid = current_prid self.total_base_phys_pages = self.num_columns + NUM_METADATA_COLUMNS self.total_tail_phys_pages = self.num_columns + NUM_METADATA_COLUMNS """ :param prange_metadata: (bOffset, tOffset, cur_tid, mOffset, merge_f) """ def add_page_range(self, prid, prange_metadata): is_new_range = False self.pageranges[prid] = PageRange(self.name, prid, self.num_columns, is_new_range, prange_metadata) def add_pagedir_entry(self, rid, prid): #delete function pass def get_page_range(self, prid): return self.pageranges[prid] def get_timestamp(self): stamp = datetime.datetime.now() data = bytearray(8) data[0:1] = stamp.year.to_bytes(2, byteorder="big") data[2] = stamp.month data[3] = stamp.day data[4] = stamp.hour data[5] = stamp.minute data[6] = stamp.second return data """ # Handle creation of page ranges and partition record into page ranges # update rid -> page range id index """ def insert(self, *columns): lstore.globals.control.acquire() record = Record(self.current_Rid_base, self.key, columns) #handles page range indexing and allocating page ranges prid = (self.current_Rid_base - 1) // RANGESIZE # IF page range id is higher than current max prid -> make new page range if prid > self.current_Prid: self.current_Prid = prid self.pageranges[prid] = PageRange(self.name, prid, self.num_columns, True) #insert record into the pagerange with rid and current time self.pageranges[prid].insert(record, self.current_Rid_base, self.get_timestamp()) # update rid->page range id index self.current_Rid_base = self.current_Rid_base + 1 lstore.globals.control.release() def insert_lock(self): prid = (self.current_Rid_base - 1) // RANGESIZE # IF page range id > than current max prid -> take the lock for conceptual address of new page range / record if prid > self.current_Prid: lock_address = Address( prid, 0, 0, 1 ) # hardcode address of where the new record is supposed to be inserted is_locked = lstore.globals.lockManager.add_lock( INSERT, self.name, lock_address) return is_locked else: return self.pageranges[prid].insert_acquire_lock( self.current_Rid_base) """ Converts the schema bit string to schema bit array :param schema: integer bit string :return: schema bit array """ def getOffset(self, schema, col_num): if (col_num < 1): return [] offset = [0] * col_num bit = 2**(col_num - 1) itr = 0 while (bit > 0): if ((schema - bit) >= 0): offset[itr] = 1 schema = schema - bit itr = itr + 1 bit = bit // 2 return offset def return_record(self, rid, key, col_wanted): record_wanted = [] prid = (rid - 1) // RANGESIZE return Record(rid, key, self.pageranges[prid].return_record(rid, col_wanted)) def select(self, key, column, col_wanted): lstore.globals.control.acquire() self.index.create_index(column) rid = self.index.locate(column, key) record_set = [] for item in rid: record_set.append(self.return_record(item, key, col_wanted)) lstore.globals.control.release() return record_set def select_lock(self, key, column): self.index.create_index(column) ridList = self.index.locate(column, key) for rid in ridList: prid = (rid - 1) // RANGESIZE got_lock = self.pageranges[prid].acquire_lock(rid, SELECT) if (not got_lock): return False return True def update(self, key, tail_schema, *columns): lstore.globals.control.latch() self.index.create_index(self.key) rid = self.index.locate(self.key, key)[0] self.index.update( rid, self.return_record(rid, key, [1, 1, 1, 1, 1]).columns, *columns) record = Record(0, self.key, columns) prid = (rid - 1) // RANGESIZE self.pageranges[prid].update(rid, tail_schema, record, self.current_Rid_tail, self.get_timestamp()) self.current_Rid_tail = self.current_Rid_tail - 1 lstore.globals.control.unlatch() def update_lock(self, key): self.index.create_index(self.key) # print("primary key: " + str(key)) rid = self.index.locate(self.key, key)[0] prid = (rid - 1) // RANGESIZE return self.pageranges[prid].acquire_lock(rid, UPDATE) def delete(self, key): lstore.globals.control.acquire() rid = self.index.locate(self.key, key)[0] columns_wanted = [1] * self.num_columns record = self.return_record(rid, key, columns_wanted) for col_number in range(self.num_columns): self.index.delete(record.columns[col_number], rid, col_number) prid = (rid - 1) // RANGESIZE self.pageranges[prid].delete(rid) lstore.globals.control.release() def delete_lock(self, key): rid = self.index.locate(self.key, key)[0] prid = (rid - 1) // RANGESIZE return self.pageranges[prid].acquire_lock(rid, DELETE) def close(self): overall_page_directory = {} page_range_metadata = {} for prid in self.pageranges: pagedir_dict = self.pageranges[prid].get_pagedir_dict() overall_page_directory.update(pagedir_dict) page_range_metadata[prid] = (self.pageranges[prid].bOffSet, self.pageranges[prid].tOffSet, self.pageranges[prid].cur_tid, self.pageranges[prid].mOffSet, self.pageranges[prid].merge_f) # flush table and page range metadata into table index file lstore.globals.diskManager.flush_table_metadata( self.name, self.current_Rid_base, self.current_Rid_tail, self.current_Prid) lstore.globals.diskManager.flush_pagerange_metadata( self.name, page_range_metadata) lstore.globals.diskManager.flush_index(self.name, self.current_Rid_base, self.current_Rid_tail, self.current_Prid) # flush page ranges' page directories into page directory file lstore.globals.diskManager.flush_page_directory( self.name, overall_page_directory)