Пример #1
0
 def CreateSQLFiles(self, proc, dest):
     '''
     Routine to create sql files to use to create tables in RedShift
     '''
     try:
         for table in proc["tables"]:
             fname = self.commonParams["sqlFolder"] + "Create_" + table[
                 "name"] + ".sql"
             self.logger.info(fname)
             outfile = open(fname, "w")
             outLine = "DROP TABLE IF EXISTS {}.{};".format(
                 dest, table["name"])
             outLine = FileUtilities.PutLine(outLine, outfile)
             outLine = "CREATE TABLE {}.{} (".format(dest, table["name"])
             outLine = FileUtilities.PutLine(outLine, outfile)
             ndx = 0
             for fld in table["fields"]:
                 if ndx > 0:
                     outLine = ','
                 ndx = ndx + 1
                 outLine = outLine + fld["name"] + " " + fld["type"]
                 if fld["type"] == "VARCHAR":
                     outLine = outLine + "(" + fld["size"] + ")  ENCODE LZO"
                 outLine = FileUtilities.PutLine(outLine, outfile)
             outfile.write(");")
             outfile.close()
     except:
         self.logger.exception(self.moduleName +
                               "- we had an error in CreateSQLFiles ")
         raise
Пример #2
0
    def SqlToLoadDataFromAthena(logger, tableSettings, fileLoc,
                                partitionValue):  # pylint: disable=too-many-branches
        '''
        Create SQL Script to load data from Athena into the redshift table
        '''
        fname = AthenaUtilities.ComposeInsertIntoSqlFilename(
            tableSettings, fileLoc)
        outfile = open(fname, "w")

        athenaSchemaName = AthenaUtilities.ComposeAthenaSchemaName(
            tableSettings["schemaName"])
        schemaDotTableName = athenaSchemaName + '.' + tableSettings["table"]
        try:
            # Build the INSERT statement
            outLine = "INSERT INTO " + tableSettings[
                "schemaName"] + '.' + tableSettings["table"] + "\n    SELECT"

            # If we have a date field we need to handle as a special case since Athena/Parquet does not support Date
            if AthenaUtilities.ContainsSpecialFields(tableSettings):
                outLine = outLine + AthenaUtilities.HandleSpecialFields(
                    tableSettings)
            else:
                outLine = outLine + " * "  # Perform a straight copy

            outLine = outLine + "    FROM " + schemaDotTableName

            # Add where clause if a partition value is specified.  Should only be the case with paritioned tables
            if AthenaUtilities.IsTablePartitioned(tableSettings):
                if partitionValue:
                    # Copy data smartly rather than copying everything
                    partitionKey = AthenaUtilities.GetPartitionKey(
                        tableSettings)
                    outLine = outLine + "\n    WHERE " + partitionKey + " = '" + partitionValue + "'"

            outLine = outLine + ";"
            outLine = FileUtilities.PutLine(outLine, outfile)
        except:  # pylint: disable=bare-except
            logger.exception("problem creating table SQL for " +
                             schemaDotTableName)
        finally:
            outfile.close()
        return fname
Пример #3
0
def SqlToCompareTables(logger, tableSettings, fname, schema1, table1,
                       validation1, schema2, table2, validation2, whereClause,
                       columnsToExclude):
    '''
    Create SQL Script to load data from Athena into the redshift table
    '''
    sqlScript = "SELECT COUNT(*) FROM ("
    sqlScript = sqlScript + BuildSqlSubStatement(
        tableSettings, schema1, table1, validation1, whereClause,
        columnsToExclude)
    sqlScript = sqlScript + "\nEXCEPT"
    sqlScript = sqlScript + BuildSqlSubStatement(
        tableSettings, schema2, table2, validation2, whereClause,
        columnsToExclude)
    sqlScript = sqlScript + "\n);"

    if fname is not None:
        outfile = open(fname, "w")
        FileUtilities.PutLine(sqlScript, outfile)
        outfile.close()

    return sqlScript