def load(self): try: self.derived = False # Reading a file means table is not derived. fn = self.__get_file_name() with open(fn, "r") as csvfile: # CSV files can be pretty complex. You can tell from all of the options on the various readers. # The two params here indicate that "," separates columns and anything in between " ... " should parse # as a single string, even if it has things like "," in it. reader = csv.DictReader(csvfile, delimiter=",", quotechar='"') # Loop through each line (well dictionary) in the input file. for r in reader: if self.headers is None: # Just sets the header if not set. self.headers = r.keys( ) # The keys for any row, contain column headers. if not self.__primary_keys_valid( ): # The columns in the file do not contain the named keys. raise DataTableExceptions.DataTableException( -1, "Mismatch between primary key fields and columns in the file." ) # Auto-increment the row ID and add to dictionary. self.next_row_id += 1 self.rows[ self. next_row_id] = r # Add the loaded dict to the dict of rows. except IOError as e: print("Got an I/O error = ", e) # In case I started to read, reset incomplete information. self.rows = None self.headers = None raise DataTableExceptions(-2, "Could not read file = ", fn)
def find_by_template(self, t, fields=None, limit=None, offset=None): """ Returns a new, derived table containing rows that match the template and the requested fields if any. Returns all row if template is None and all columns if fields is None. :param t: The template representing a select predicate. :param fields: The list of fields (project fields) :param limit: Max to return. Not implemented :param offset: Offset into the result. Not implemented. :return: New table containing the result of the select and project. """ if limit is not None or offset is not None: raise DataTableExceptions.DataTableException( -6, "Limit/offset not supported for CSVTable") # If there are rows and the template is not None if self.rows is not None: invc = self.__col_list_valid(t) if invc != True and len(invc) > 0: raise DataTableExceptions.DataTableException( -7, "Invalid columns in template.") # Determine if we can use an index. If we can, access via the index. index_name = self.__get_index_name(t) if index_name is not None: return self.__get_by_index(t, index_name, fields) # Continue with scan based selection. # Derived tables do not have names. Make the name SELECTED_ plus the base table name. # There are not keys or columns. result = CSVTable('SELECTED_' + self.table_name, None, None) result.derived = False # Temporarily set derived to false to allow inserts. # Add the rows that match the template to the newly created table. for k in self.rows: r = self.rows[k] if self.matches_template(r, t): result.insert(r) # Apply project if there are project fields. result = result.project(fields) # If there ARE result rows, the keys in a dictionary of the first row define the columns. if result.rows and result.headers is None: if len(result.rows) > 0: for k in result.rows: result.headers = list(result.rows[k].keys()) break result.derived = True else: result = None return result
def insert(self, r): """ Inserts a row into the table. :param r: A row to insert into the table. :return: None """ # Cannot insert into derived tables. if self.derived: raise DataTableExceptions.DataTableException( -309, "Cannot modify a derived table.") if self.rows is None: self.rows = {} keys = r.keys() # If there are no defined columns. The first insert defines the columns. if self.headers is None: self.next_row_id += 1 self.rows[self.next_row_id] = r self.headers = keys else: # Are there any invalid columns? invc = self.__col_list_valid(r) if invc != True and len(invc) > 0: raise DataTableExceptions.DataTableException( -301, "Invalid columns " + str(invc)) else: pk = self.get_key(r) # Form a template for the primary key. if pk is not None: for k in pk.keys( ): # This checking should be separate function. if pk[k] is None: raise DataTableExceptions.DataTableException( -401, "Null primary key column") # The key is valid. Now determine if there is an entry with this key. t = self.find_by_template(pk) # Does the result have rows, and the length is not empty. if t.rows is not None: if len(t.rows) > 0: raise DataTableExceptions.DataTableException( "Duplicate primary key") else: # Add to dictionary using auto-increment ID. self.next_row_id += 1 self.rows[self.next_row_id] = r else: self.next_row_id += 1 self.rows[self.next_row_id] = r
def project(self, rows, fields): """ Perform the project. Returns a new table with only the requested columns. :param fields: A list of column names. :return: A new table derived from this table by PROJECT on the specified column names. """ try: if fields is None: # If there is not project clause, return the base table return rows # Should really return a new, identical table but am lazy. else: result = [] for r in rows: # For every row in the table. tmp = {} # Not sure why I am using range. for j in range( 0, len(fields) ): # Make a new row with just the requested columns/fields. v = r[fields[j]] tmp[fields[j]] = v else: result.append(tmp) # Insert into new table when done. return result #list that represents new row in dict form {column_header: row_val} except KeyError as ke: # happens if the requested field not in rows. raise DataTableExceptions.DataTableException( -2, "Invalid field in project")
def project(self, fields): """ Perform the project. Returns a new table with only the requested columns. :param fields: A list of column names. :return: A new table derived from this table by PROJECT on the specified column names. """ try: if fields is None: # If there is not project clause, return the base table return self # Should really return a new, identical table but am lazy. else: # Derived tables do not have names unless you alias/rename them. # Just generate a name for the new table. result = CSVTable("PROJECT_" + self.table_name, None, None) result.derived = True result.headers = fields for k in self.rows: # For every row in the table. tmp = {} # Not sure why I am using range. r = self.rows[k] for j in range( 0, len(fields) ): # Make a new row with just the requested columns/fields. v = r[fields[j]] tmp[fields[j]] = v else: result.insert(tmp) # Insert into new table when done. return result except KeyError as ke: # happens if the requested field not in rows. raise DataTableExceptions.DataTableException( -2, "Invalid field in project")
def __find_by_template_scan__(self, t, fields=None, limit=None, offset=None): """ Returns a new, derived table containing rows that match the template and the requested fields if any. Returns all row if template is None and all columns if fields is None. :param t: The template representing a select predicate. :param fields: The list of fields (project fields) :param limit: Max to return. Not implemented :param offset: Offset into the result. Not implemented. :return: New table containing the result of the select and project. """ if limit is not None or offset is not None: raise DataTableExceptions.DataTableException( -101, "Limit/offset not supported for CSVTable") # If there are rows and the template is not None if self.__rows__ is not None: result = [] # Add the rows that match the template to the newly created table. for r in self.__rows__: if self.matches_template(r, t): result.append(r) result = self.project(result, fields) else: result = None return result
def create_table(self, table_name, file_name, column_definitions=None, primary_key_columns=None): r = self.run_q("select * from table_definitions where name='" + table_name + "'") if r: message = 'Table name ' + table_name + ' is duplicate' raise DataTableExceptions.DataTableException(code=-101, message=message) if column_definitions: column_names = [col.column_name for col in column_definitions] with open(file_name, 'r') as csvfile: reader = csv.reader(csvfile) headers = next(reader) for c in column_names: if c not in headers: message = 'Column ' + c + ' definition is invalid' raise DataTableExceptions.DataTableException( code=-100, message=message) for c in column_definitions: q = "insert into column_definitions values('" \ +c.column_name+"','" \ +c.column_type+"','" \ +str(c.not_null)+"','" \ +table_name+"')" r = self.run_q(q) else: column_definitions = [] q = "insert into table_definitions values('" + table_name + "','" + file_name + "')" r = self.run_q(q) t = TableDefinition(t_name=table_name, csv_f=file_name, column_definitions=column_definitions, cnx=self.cnx) return t
def create_index(self, columns): """ Creates a new index for the table. Columns is a list of column names to form the index. The column does not need to be unique. :param columns: Column name. :return: None. Creates the index on the table. """ # Raise an exception if there is l = self.__col_list_valid(columns) if l != True: raise DataTableExceptions.DataTableException( -501, "Invalid columns in index definition = " + str(l)) if self.indexes is None: self.indexes = {} # Index name is columns separated by "_". Would be bad if there were "_" in column names. index_name = "_".join(columns) idx = self.indexes.get(index_name, None) if idx is not None: raise DataTableExceptions.DataTableException( -502, "Duplicate index definition." + str(l)) # Create the place to hold the index information. self.indexes[index_name] = {} index = self.indexes[index_name] # Put every row in the index. The entry is of the form {index key value: row id} for (k, r) in self.rows.items(): # Get the index value from the current row. key = self.__get_index_values(r, index_name) # Find the "bucket," which is the list of IDs matching the index value. bucket = index.get(key, None) if bucket is None: bucket = {} index[key] = bucket bucket[k] = r
def delete(self, t): """ Deletes all rows that match a template. :param t: :return: None """ if self.derived: raise DataTableExceptions.DataTableException( -20, "Cannot modify a derived table.") try: new_rows = {} # I make a new list with the rows that should not be deleted. # Deleting elements in a list while iterating through the list freaks me out. for k, v in self.rows.items(): if not self.matches_template(v, t): new_rows[k] = v else: self.rows = new_rows except Exception as e: raise DataTableExceptions( -31, "Deleted failed. Original exception = " + e)
def save(self): """ Writes the data back to the file. :return: None """ fn = self.__get_file_name() try: with open(fn, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=self.headers) writer.writeheader() for r in self.rows.values( ): # Need to convert to a list without the generated IDs. writer.writerow(r) csvfile.close() except Exception as e: raise DataTableExceptions.DataTableException( -3, "Could not write data. Original exception was ", e)
def find_by_template(self, t, fields=None, limit=None, offset=None): # 1. Validate the template values relative to the defined columns. # 2. Determine if there is an applicable index, and call __find_by_template_index__ if one exists. # 3. Call __find_by_template_scan__ if not applicable index. if t: df_columns = self.__get_column_names__() for k in t: if k not in df_columns: raise DataTableExceptions.DataTableException( code=-102, message= "template values not relative to defined columns") r = self.__get_access_path__(t) if r: return self.__find_by_template_index__(t, r, fields) else: return self.__find_by_template_scan__(t, fields) else: return self.__rows__
def define_primary_key(self, columns): """ Define (or replace) primary key definition. :param columns: List of column values in order. :return: """ column_names = [c.column_name for c in self.column_definitions] for col in columns: if col not in column_names: raise DataTableExceptions.DataTableException( code=-1000, message='Invalid key columns') for col in columns: cursor = self.cnx.cursor() q = "insert into index_definitions values('PRIMARY','PRIMARY','" \ +col+"','" \ +self.t_name+"')" r = cursor.execute(q) self.cnx.commit() self.index_definitions['PRIMARY'] = IndexDefinition( "PRIMARY", columns, "PRIMARY").dic
def matches_template(self, row, t): """ :param row: A single dictionary representing a row in the table. :param t: A template :return: True if the row matches the template. I did it this way because delete and finds both need to compare rows to templates. """ # Basically, this means there is no where clause. A row always matches the None where clause. if t is None: return True try: c_names = list(t.keys()) # Get the column names from the template. for n in c_names: # For every column in the rows that is in the key if row[n] != t[n]: # The column does not match the template. return False else: return True except Exception as e: raise (DataTableExceptions( -4, "Some kind of problem with keys/column names"))
def get_key(self, r): if self.key_columns is None: return None result = {} try: # Look at every key in the list of key columns. for k in self.key_columns: # Get the value for the key column from the input row and add to # template that we will return. result[k] = r[k] # This is technically not correct but is in the code to handle possible # empty fields for columns from the Lahman 2017 tables. if result[k] == "": raise ValueError("Key field " + k + " is empty.") except KeyError as ke: raise DataTableExceptions.DataTableException( -201, "Key is missing attribute " + str(ke)) return result
def __load__(self): try: fn = self.__get_file_name__() with open(fn, "r") as csvfile: # CSV files can be pretty complex. You can tell from all of the options on the various readers. # The two params here indicate that "," separates columns and anything in between " ... " should parse # as a single string, even if it has things like "," in it. reader = csv.DictReader(csvfile, delimiter=",", quotechar='"') # Get the names of the columns defined for this table from the metadata. column_names = self.__get_column_names__() # Loop through each line (well dictionary) in the input file. for r in reader: # Only add the defined columns into the in-memory table. The CSV file may contain columns # that are not relevant to the definition. projected_r = self.project([r], column_names)[0] self.__add_row__(projected_r) except IOError as e: raise DataTableExceptions.DataTableException( code=DataTableExceptions.DataTableException.invalid_file, message="Could not read file = " + fn)
def delete(self, t): """ Deletes all rows that match a template. :param t: :return: None """ if self.derived: raise DataTableExceptions.DataTableException( -309, "Cannot modify a derived table.") try: new_rows = [] # I make a new list with the rows that should not be deleted. # Deleting elements in a list while iterating through the list freaks me out. for i in range(0, len(self.rows)): r = self.rows[i] if not self.matches_template(r, t): new_rows.append(r) else: self.rows = new_rows except Exception as e: print("Exception = " + str(e))
def update(self, t, change_values): raise DataTableExceptions.DataTableException( code=DataTableExceptions.DataTableException.not_implemented, message="Updated not implemented")
def delete(self, t): raise DataTableExceptions.DataTableException( code=DataTableExceptions.DataTableException.not_implemented, message="Delete not implemented")
def insert(self, r): raise DataTableExceptions.DataTableException( code=DataTableExceptions.DataTableException.not_implemented, message="Insert not implemented")
def insert(self, r): """ Inserts a row into the table. :param r: A row to insert into the table. :return: None """ try: # Cannot insert into derived tables. if self.derived: raise DataTableExceptions.DataTableException( -10, "Cannot modify a derived table.") if self.rows is None: self.rows = {} keys = r.keys() # If there are no defined columns. The first insert defines the columns. if self.headers is None: self.next_row_id += 1 self.rows[self.next_row_id] = r self.headers = keys else: # Are there any invalid columns? invc = self.__col_list_valid(r) if invc != True and len(invc) > 0: raise DataTableExceptions.DataTableException( -11, "Invalid columns " + str(invc)) else: pk = self.get_key( r) # Form a template for the primary key. if pk is not None: for k in pk.keys( ): # This checking should be separate function. if pk[k] is None: raise DataTableExceptions.DataTableException( -12, "Null primary key column") # The key is valid. Now determine if there is an entry with this key. t = self.find_by_template(pk) # Does the result have rows, and the length is not empty. if t.rows is not None: rows = t.rows keys = list(t.rows.keys()) l = len(list(keys)) print("l = ", l) print("") if l > 0: print("Hello") raise DataTableExceptions.DataTableException( -12, "Duplicate primary key") else: print("Adding") # Add to dictionary using auto-increment ID. self.next_row_id += 1 self.rows[self.next_row_id] = r else: self.next_row_id += 1 self.rows[self.next_row_id] = r # Do not automatically index the derived table. # User must explicitly call create index. except Exception as e: error = str(e) raise DataTableExceptions.DataTableException( -501, "Unknown error in insert(). Original e = " + error)