def _verify_file_format(self): """ Verifies that the Excel document has the correct tabs and column headers. :return: True if the workbook has the correct worksheets and columns in each. :rtype: bool """ is_valid = True # hope for the best. sheet_names = self.workbook.sheet_names() for required_sheet in XLSReader.required_sheets: if required_sheet not in sheet_names: eprint("Error: missing sheet %s!" % required_sheet) is_valid = False else: sheet = self.workbook.sheet_by_name(required_sheet) header_row = sheet.row_values(rowx=0, start_colx=0) for required_column in XLSReader.required_columns[ required_sheet ]: if required_column not in header_row: eprint( "Error: missing column %s in sheet %s!" % (required_column, required_sheet) ) is_valid = False return is_valid
def _read_relationships_from_workbook(self): """ Reads the foreign keys for the tables from Excel. """ # "Relationships": ["Name", "Database", "Schema", "From Table", "To Table", "Conditions"] rel_sheet = self.workbook.sheet_by_name("Relationships") indices = self.indices["Relationships"] for row_count in range(1, rel_sheet.nrows): row = rel_sheet.row_values(rowx=row_count, start_colx=0) database_name = row[indices["Database"]] database = self.databases.get(database_name, None) if database is None: eprint( "ERROR: Database %s from the Relationships tab is not known." % database_name ) else: table_name = row[indices["From Table"]] table = database.get_table(table_name) if table is None: eprint( "ERROR: Table %s from the Relationships tab is not known." % table_name ) table.add_relationship( to_table=row[indices["To Table"]], conditions=row[indices["Conditions"]], )
def _read_columns_from_workbook(self): """ Reads the columns for the tables from Excel. """ # "Columns": ["Database", "Schema", "Table", "Column", "Name", "Type"], column_sheet = self.workbook.sheet_by_name("Columns") indices = self.indices["Columns"] for row_count in range(1, column_sheet.nrows): row = column_sheet.row_values(rowx=row_count, start_colx=0) database_name = row[indices["Database"]] database = self.databases.get(database_name, None) if database is None: eprint( "ERROR: Database %s from the Columns tab is not known." % database_name ) else: table_name = row[indices["Table"]] table = database.get_table(table_name) if table is None: eprint( "ERROR: Table %s from the Columns tab is not known." % table_name ) else: table.add_column( Column( column_name=row[indices["Name"]], column_type=row[indices["Type"]], ) )
def _read_tables_from_workbook(self): """ Reads the databases and tables from Excel. These are used to populate from the remaining sheets. """ # "Tables": ["Database", "Schema", "Table", "Updated", "Update Type", "# Rows", "# Columns", # "Primary Key", "Shard Key", "# Shards", "RLS Column"], table_sheet = self.workbook.sheet_by_name("Tables") indices = self.indices["Tables"] for row_count in range(1, table_sheet.nrows): row = table_sheet.row_values(rowx=row_count, start_colx=0) database_name = row[indices["Database"]] database = self.databases.get(database_name, None) if database is None: database = Database(database_name=database_name) self.databases[database_name] = database pk = row[indices["Primary Key"]].strip() if pk == "": pk = None else: pk = [x.strip() for x in pk.split(",")] sk_name = row[indices["Shard Key"]].strip() sk_nbr_shards = row[indices["# Shards"]] if (sk_name == "" and sk_nbr_shards != "") or ( sk_name != "" and sk_nbr_shards == "" ): eprint( "ERROR: %s need to provide both a shard key name and number of shards." % row[indices["Table"]] ) if sk_name == "": sk = None else: sk = [x.strip() for x in sk_name.split(",")] shard_key = None if sk_name != "" and sk_nbr_shards != "": shard_key = ShardKey( shard_keys=sk, number_shards=sk_nbr_shards ) table = Table( table_name=row[indices["Table"]], schema_name=row[indices["Schema"]], primary_key=pk, shard_key=None, ) database.add_table(table)
def valid_args(args): """ Checks to see if the arguments make sense. :param args: The command line arguments. :return: True if valid, false otherwise. """ # make sure there is a to_ flag since data has to come from somewhere unless this is just creating blank Excel. if not args.from_ddl and not args.from_excel and not args.to_excel: eprint("--from_ddl or --from_excel must be provided as arguments.") return False if args.from_ddl and not args.database: eprint("--from_ddl requires the --database option.") return False return True
def valid_args(args): """ Checks to see if the arguments make sense. :param args: The command line arguments. :return: True if valid, false otherwise. """ # TODO rethink what is and isn't valid and update. # make sure there is a to_ flag since data has to come from somewhere. if args.from_ddl is False and args.from_excel is False: eprint("--from_ddl or --from_excel must be provided as arguments.") return False # if args.to_excel is False and args.to_tql is False: # eprint("--to_tql or --to_excel must be provided as arguments.") # return False # if args.database is None: # eprint("a database name must be provided with --database") return True
def read_excel(args): """ Reads the database description from XLS and returns a database model. Note that XLSReader can read multiple databases at a time, but convert only supports one. If there are more than one database, the first will be returned and an error message written. :param args: The command line arguments. :returns: The database read from Excel. :rtype: Database """ reader = XLSReader() databases = reader.read_xls(filepath=args.from_excel) if len(databases) == 0: eprint("ERROR: No databases read.") return None if len(databases) > 1: eprint("WARNING: multiple databases read. Only using %s" % databases.values()[0].database_name) return databases.values()[0]
def main(): """Main function for the script.""" args = parse_args() database = None if valid_args(args): print(args) if args.from_ddl: print("Reading DDL ...") database = read_ddl(args) if args.from_excel: print("Reading Excel") database = read_excel(args) if args.validate: print("Validating database") vr = database.validate() if not vr.is_valid: for issue in vr.issues: eprint(issue) else: print("Database is valid.") if args.to_tql: print("Writing TQL ...") write_tql(args=args, database=database) if args.to_excel: print("Writing Excel ...") write_excel(args=args, database=database) if args.to_tsload: print("Writing tsload ...") write_tsload(args=args, database=database)
def _read_foreign_keys_from_workbook(self): """ Reads the foreign keys for the tables from Excel. """ # "Foreign Keys": ["Name", "Database", "Schema", "From Table", "Columns", "To Table", "Columns"], fk_sheet = self.workbook.sheet_by_name("Foreign Keys") indices = self.indices["Foreign Keys"] for row_count in range(1, fk_sheet.nrows): row = fk_sheet.row_values(rowx=row_count, start_colx=0) database_name = row[indices["Database"]] database = self.databases.get(database_name, None) if database is None: eprint( "ERROR: Database %s from the Foreign Keys tab is not known." % database_name ) else: table_name = row[indices["From Table"]] table = database.get_table(table_name) if table is None: eprint( "ERROR: Table %s from the Foreign Keys tab is not known." % table_name ) else: key_name = row[indices["Name"]] if key_name is None: eprint( "ERROR: Table %s from the Foreign Keys tab is missing a FK name." % table_name ) from_keys = row[indices["From Columns"]] from_keys = [x.strip() for x in from_keys.split(",")] to_keys = row[indices["To Columns"]] to_keys = [x.strip() for x in to_keys.split(",")] table.add_foreign_key( name=key_name, from_keys=from_keys, to_table=row[indices["To Table"]], to_keys=to_keys, )
def valid_args(args): """ Checks to see if the arguments make sense. :param args: The command line arguments. :return: True if valid, False otherwise. """ ret_value = True # make sure there is a to_ flag since data has to come from somewhere unless this is just creating blank Excel. if not args.ddl1 or not args.ddl2: eprint("--ddl1 and --ddl2 must be provided") ret_value = False else: if not os.path.exists(args.ddl1): eprint("file %s doesn't exist" % args.ddl1) ret_value = False if not os.path.exists(args.ddl2): eprint("file %s doesn't exist" % args.ddl2) ret_value = False return ret_value