Пример #1
0
    def load(self):

        try:
            self.derived = False  # Reading a file means table is not derived.
            fn = self.__get_file_name()
            with open(fn, "r") as csvfile:
                # CSV files can be pretty complex. You can tell from all of the options on the various readers.
                # The two params here indicate that "," separates columns and anything in between " ... " should parse
                # as a single string, even if it has things like "," in it.
                reader = csv.DictReader(csvfile, delimiter=",", quotechar='"')

                # Loop through each line (well dictionary) in the input file.
                for r in reader:
                    if self.headers is None:  # Just sets the header if not set.
                        self.headers = r.keys(
                        )  # The keys for any row,  contain column headers.
                        if not self.__primary_keys_valid(
                        ):  # The columns in the file do not contain the named keys.
                            raise DataTableExceptions.DataTableException(
                                -1,
                                "Mismatch between primary key fields and columns in the file."
                            )

                    # Auto-increment the row ID and add to dictionary.
                    self.next_row_id += 1
                    self.rows[
                        self.
                        next_row_id] = r  # Add the loaded dict to the dict of rows.

        except IOError as e:
            print("Got an I/O error = ", e)
            # In case I started to read, reset incomplete information.
            self.rows = None
            self.headers = None
            raise DataTableExceptions(-2, "Could not read file = ", fn)
Пример #2
0
    def find_by_template(self, t, fields=None, limit=None, offset=None):
        """
        Returns a new, derived table containing rows that match the template and the requested fields if any.
        Returns all row if template is None and all columns if fields is None.
        :param t: The template representing a select predicate.
        :param fields: The list of fields (project fields)
        :param limit: Max to return. Not implemented
        :param offset: Offset into the result. Not implemented.
        :return: New table containing the result of the select and project.
        """

        if limit is not None or offset is not None:
            raise DataTableExceptions.DataTableException(
                -6, "Limit/offset not supported for CSVTable")

        # If there are rows and the template is not None
        if self.rows is not None:

            invc = self.__col_list_valid(t)
            if invc != True and len(invc) > 0:
                raise DataTableExceptions.DataTableException(
                    -7, "Invalid columns in template.")

            # Determine if we can use an index. If we can, access via the index.
            index_name = self.__get_index_name(t)
            if index_name is not None:
                return self.__get_by_index(t, index_name, fields)

            # Continue with scan based selection.

            # Derived tables do not have names. Make the name SELECTED_ plus the base table name.
            # There are not keys or columns.
            result = CSVTable('SELECTED_' + self.table_name, None, None)
            result.derived = False  # Temporarily set derived to false to allow inserts.

            # Add the rows that match the template to the newly created table.
            for k in self.rows:
                r = self.rows[k]
                if self.matches_template(r, t):
                    result.insert(r)

            # Apply project if there are project fields.
            result = result.project(fields)

            # If there ARE result rows, the keys in a dictionary  of the first row define the columns.
            if result.rows and result.headers is None:
                if len(result.rows) > 0:
                    for k in result.rows:
                        result.headers = list(result.rows[k].keys())
                        break
            result.derived = True
        else:
            result = None

        return result
Пример #3
0
    def insert(self, r):
        """
        Inserts a row into the table.
        :param r: A row to insert into the table.
        :return: None
        """

        # Cannot insert into derived tables.
        if self.derived:
            raise DataTableExceptions.DataTableException(
                -309, "Cannot modify a derived table.")

        if self.rows is None:
            self.rows = {}

        keys = r.keys()

        # If there are no defined columns. The first insert defines the columns.
        if self.headers is None:
            self.next_row_id += 1
            self.rows[self.next_row_id] = r
            self.headers = keys
        else:
            # Are there any invalid columns?
            invc = self.__col_list_valid(r)

            if invc != True and len(invc) > 0:
                raise DataTableExceptions.DataTableException(
                    -301, "Invalid columns " + str(invc))
            else:
                pk = self.get_key(r)  # Form a template for the primary key.
                if pk is not None:
                    for k in pk.keys(
                    ):  # This checking should be separate function.
                        if pk[k] is None:
                            raise DataTableExceptions.DataTableException(
                                -401, "Null primary key column")

                    # The key is valid. Now determine if there is an entry with this key.
                    t = self.find_by_template(pk)

                    # Does the result have rows, and the length is not empty.
                    if t.rows is not None:
                        if len(t.rows) > 0:
                            raise DataTableExceptions.DataTableException(
                                "Duplicate primary key")
                    else:
                        # Add to dictionary using auto-increment ID.
                        self.next_row_id += 1
                        self.rows[self.next_row_id] = r
                else:
                    self.next_row_id += 1
                    self.rows[self.next_row_id] = r
    def project(self, rows, fields):
        """
        Perform the project. Returns a new table with only the requested columns.
        :param fields: A list of column names.
        :return: A new table derived from this table by PROJECT on the specified column names.
        """
        try:
            if fields is None:  # If there is not project clause, return the base table
                return rows  # Should really return a new, identical table but am lazy.
            else:
                result = []
                for r in rows:  # For every row in the table.
                    tmp = {}  # Not sure why I am using range.
                    for j in range(
                            0, len(fields)
                    ):  # Make a new row with just the requested columns/fields.
                        v = r[fields[j]]
                        tmp[fields[j]] = v
                    else:
                        result.append(tmp)  # Insert into new table when done.

                return result  #list that represents new row in dict form {column_header: row_val}

        except KeyError as ke:
            # happens if the requested field not in rows.
            raise DataTableExceptions.DataTableException(
                -2, "Invalid field in project")
Пример #5
0
    def project(self, fields):
        """
        Perform the project. Returns a new table with only the requested columns.
        :param fields: A list of column names.
        :return: A new table derived from this table by PROJECT on the specified column names.
        """
        try:
            if fields is None:  # If there is not project clause, return the base table
                return self  # Should really return a new, identical table but am lazy.
            else:
                # Derived tables do not have names unless you alias/rename them.
                # Just generate a name for the new table.
                result = CSVTable("PROJECT_" + self.table_name, None, None)
                result.derived = True
                result.headers = fields

                for k in self.rows:  # For every row in the table.
                    tmp = {}  # Not sure why I am using range.
                    r = self.rows[k]
                    for j in range(
                            0, len(fields)
                    ):  # Make a new row with just the requested columns/fields.
                        v = r[fields[j]]
                        tmp[fields[j]] = v
                    else:
                        result.insert(tmp)  # Insert into new table when done.

                return result

        except KeyError as ke:
            # happens if the requested field not in rows.
            raise DataTableExceptions.DataTableException(
                -2, "Invalid field in project")
    def __find_by_template_scan__(self,
                                  t,
                                  fields=None,
                                  limit=None,
                                  offset=None):
        """
        Returns a new, derived table containing rows that match the template and the requested fields if any.
        Returns all row if template is None and all columns if fields is None.
        :param t: The template representing a select predicate.
        :param fields: The list of fields (project fields)
        :param limit: Max to return. Not implemented
        :param offset: Offset into the result. Not implemented.
        :return: New table containing the result of the select and project.
        """

        if limit is not None or offset is not None:
            raise DataTableExceptions.DataTableException(
                -101, "Limit/offset not supported for CSVTable")

        # If there are rows and the template is not None
        if self.__rows__ is not None:

            result = []

            # Add the rows that match the template to the newly created table.
            for r in self.__rows__:
                if self.matches_template(r, t):
                    result.append(r)

            result = self.project(result, fields)
        else:
            result = None

        return result
Пример #7
0
    def create_table(self,
                     table_name,
                     file_name,
                     column_definitions=None,
                     primary_key_columns=None):
        r = self.run_q("select * from table_definitions where name='" +
                       table_name + "'")
        if r:
            message = 'Table name ' + table_name + ' is duplicate'
            raise DataTableExceptions.DataTableException(code=-101,
                                                         message=message)

        if column_definitions:
            column_names = [col.column_name for col in column_definitions]
            with open(file_name, 'r') as csvfile:
                reader = csv.reader(csvfile)
                headers = next(reader)
            for c in column_names:
                if c not in headers:
                    message = 'Column ' + c + ' definition is invalid'
                    raise DataTableExceptions.DataTableException(
                        code=-100, message=message)

            for c in column_definitions:
                q = "insert into column_definitions values('" \
                +c.column_name+"','" \
                +c.column_type+"','" \
                +str(c.not_null)+"','" \
                +table_name+"')"
                r = self.run_q(q)
        else:
            column_definitions = []

        q = "insert into table_definitions values('" + table_name + "','" + file_name + "')"
        r = self.run_q(q)

        t = TableDefinition(t_name=table_name,
                            csv_f=file_name,
                            column_definitions=column_definitions,
                            cnx=self.cnx)
        return t
Пример #8
0
    def create_index(self, columns):
        """
        Creates a new index for the table. Columns is a list of column names to form the index.
        The column does not need to be unique.
        :param columns: Column name.
        :return: None. Creates the index on the table.
        """

        # Raise an exception if there is
        l = self.__col_list_valid(columns)
        if l != True:
            raise DataTableExceptions.DataTableException(
                -501, "Invalid columns in index definition = " + str(l))

        if self.indexes is None:
            self.indexes = {}

        # Index name is columns separated by "_". Would be bad if there were "_" in column names.
        index_name = "_".join(columns)
        idx = self.indexes.get(index_name, None)
        if idx is not None:
            raise DataTableExceptions.DataTableException(
                -502, "Duplicate index definition." + str(l))

        # Create the place to hold the index information.
        self.indexes[index_name] = {}
        index = self.indexes[index_name]

        # Put every row in the index. The entry is of the form {index key value: row id}
        for (k, r) in self.rows.items():

            # Get the index value from the current row.
            key = self.__get_index_values(r, index_name)

            # Find the "bucket," which is the list of IDs matching the index value.
            bucket = index.get(key, None)
            if bucket is None:
                bucket = {}
                index[key] = bucket
            bucket[k] = r
Пример #9
0
    def delete(self, t):
        """
        Deletes all rows that match a template.
        :param t:
        :return: None
        """

        if self.derived:
            raise DataTableExceptions.DataTableException(
                -20, "Cannot modify a derived table.")

        try:
            new_rows = {}

            # I make a new list with the rows that should not be deleted.
            # Deleting elements in a list while iterating through the list freaks me out.
            for k, v in self.rows.items():
                if not self.matches_template(v, t):
                    new_rows[k] = v
            else:
                self.rows = new_rows
        except Exception as e:
            raise DataTableExceptions(
                -31, "Deleted failed. Original exception = " + e)
Пример #10
0
 def save(self):
     """
     Writes the data back to the file.
     :return: None
     """
     fn = self.__get_file_name()
     try:
         with open(fn, 'w') as csvfile:
             writer = csv.DictWriter(csvfile, fieldnames=self.headers)
             writer.writeheader()
             for r in self.rows.values(
             ):  # Need to convert to a list without the generated IDs.
                 writer.writerow(r)
             csvfile.close()
     except Exception as e:
         raise DataTableExceptions.DataTableException(
             -3, "Could not write data. Original exception was ", e)
Пример #11
0
    def find_by_template(self, t, fields=None, limit=None, offset=None):
        # 1. Validate the template values relative to the defined columns.
        # 2. Determine if there is an applicable index, and call __find_by_template_index__ if one exists.
        # 3. Call __find_by_template_scan__ if not applicable index.
        if t:
            df_columns = self.__get_column_names__()
            for k in t:
                if k not in df_columns:
                    raise DataTableExceptions.DataTableException(
                        code=-102,
                        message=
                        "template values not relative to defined columns")

            r = self.__get_access_path__(t)
            if r:
                return self.__find_by_template_index__(t, r, fields)
            else:
                return self.__find_by_template_scan__(t, fields)
        else:
            return self.__rows__
Пример #12
0
    def define_primary_key(self, columns):
        """
        Define (or replace) primary key definition.
        :param columns: List of column values in order.
        :return:
        """
        column_names = [c.column_name for c in self.column_definitions]
        for col in columns:
            if col not in column_names:
                raise DataTableExceptions.DataTableException(
                    code=-1000, message='Invalid key columns')

        for col in columns:
            cursor = self.cnx.cursor()
            q = "insert into index_definitions values('PRIMARY','PRIMARY','" \
            +col+"','" \
            +self.t_name+"')"
            r = cursor.execute(q)
            self.cnx.commit()

        self.index_definitions['PRIMARY'] = IndexDefinition(
            "PRIMARY", columns, "PRIMARY").dic
Пример #13
0
    def matches_template(self, row, t):
        """
        :param row: A single dictionary representing a row in the table.
        :param t: A template
        :return: True if the row matches the template.

        I did it this way because delete and finds both need to compare rows to templates.
        """

        # Basically, this means there is no where clause. A row always matches the None where clause.
        if t is None:
            return True

        try:
            c_names = list(t.keys())  # Get the column names from the template.
            for n in c_names:  # For every column in the rows that is in the key
                if row[n] != t[n]:  # The column does not match the template.
                    return False
            else:
                return True
        except Exception as e:
            raise (DataTableExceptions(
                -4, "Some kind of problem with keys/column names"))
Пример #14
0
    def get_key(self, r):

        if self.key_columns is None:
            return None

        result = {}

        try:
            # Look at every key in the list of key columns.
            for k in self.key_columns:
                # Get the value for the key column from the input row and add to
                # template that we will return.
                result[k] = r[k]

                # This is technically not correct but is in the code to handle possible
                # empty fields for columns from the Lahman 2017 tables.
                if result[k] == "":
                    raise ValueError("Key field " + k + " is empty.")
        except KeyError as ke:
            raise DataTableExceptions.DataTableException(
                -201, "Key is missing attribute " + str(ke))

        return result
    def __load__(self):

        try:
            fn = self.__get_file_name__()
            with open(fn, "r") as csvfile:
                # CSV files can be pretty complex. You can tell from all of the options on the various readers.
                # The two params here indicate that "," separates columns and anything in between " ... " should parse
                # as a single string, even if it has things like "," in it.
                reader = csv.DictReader(csvfile, delimiter=",", quotechar='"')

                # Get the names of the columns defined for this table from the metadata.
                column_names = self.__get_column_names__()

                # Loop through each line (well dictionary) in the input file.
                for r in reader:
                    # Only add the defined columns into the in-memory table. The CSV file may contain columns
                    # that are not relevant to the definition.
                    projected_r = self.project([r], column_names)[0]
                    self.__add_row__(projected_r)

        except IOError as e:
            raise DataTableExceptions.DataTableException(
                code=DataTableExceptions.DataTableException.invalid_file,
                message="Could not read file = " + fn)
Пример #16
0
    def delete(self, t):
        """
        Deletes all rows that match a template.
        :param t:
        :return: None
        """

        if self.derived:
            raise DataTableExceptions.DataTableException(
                -309, "Cannot modify a derived table.")

        try:
            new_rows = []

            # I make a new list with the rows that should not be deleted.
            # Deleting elements in a list while iterating through the list freaks me out.
            for i in range(0, len(self.rows)):
                r = self.rows[i]
                if not self.matches_template(r, t):
                    new_rows.append(r)
            else:
                self.rows = new_rows
        except Exception as e:
            print("Exception = " + str(e))
 def update(self, t, change_values):
     raise DataTableExceptions.DataTableException(
         code=DataTableExceptions.DataTableException.not_implemented,
         message="Updated not implemented")
 def delete(self, t):
     raise DataTableExceptions.DataTableException(
         code=DataTableExceptions.DataTableException.not_implemented,
         message="Delete not implemented")
 def insert(self, r):
     raise DataTableExceptions.DataTableException(
         code=DataTableExceptions.DataTableException.not_implemented,
         message="Insert not implemented")
Пример #20
0
    def insert(self, r):
        """
        Inserts a row into the table.
        :param r: A row to insert into the table.
        :return: None
        """

        try:
            # Cannot insert into derived tables.
            if self.derived:
                raise DataTableExceptions.DataTableException(
                    -10, "Cannot modify a derived table.")

            if self.rows is None:
                self.rows = {}

            keys = r.keys()

            # If there are no defined columns. The first insert defines the columns.
            if self.headers is None:
                self.next_row_id += 1
                self.rows[self.next_row_id] = r
                self.headers = keys
            else:
                # Are there any invalid columns?
                invc = self.__col_list_valid(r)

                if invc != True and len(invc) > 0:
                    raise DataTableExceptions.DataTableException(
                        -11, "Invalid columns " + str(invc))
                else:
                    pk = self.get_key(
                        r)  # Form a template for the primary key.
                    if pk is not None:
                        for k in pk.keys(
                        ):  # This checking should be separate function.
                            if pk[k] is None:
                                raise DataTableExceptions.DataTableException(
                                    -12, "Null primary key column")

                        # The key is valid. Now determine if there is an entry with this key.
                        t = self.find_by_template(pk)

                        # Does the result have rows, and the length is not empty.
                        if t.rows is not None:
                            rows = t.rows
                            keys = list(t.rows.keys())
                            l = len(list(keys))
                            print("l = ", l)
                            print("")
                            if l > 0:
                                print("Hello")
                                raise DataTableExceptions.DataTableException(
                                    -12, "Duplicate primary key")
                            else:
                                print("Adding")
                                # Add to dictionary using auto-increment ID.
                                self.next_row_id += 1
                                self.rows[self.next_row_id] = r
                    else:
                        self.next_row_id += 1
                        self.rows[self.next_row_id] = r

                # Do not automatically index the derived table.
                # User must explicitly call create index.
        except Exception as e:
            error = str(e)
            raise DataTableExceptions.DataTableException(
                -501, "Unknown error in insert(). Original e = " + error)