def LoadFileIntoRedshift(rsConnect, s3, logger, fileUtilities, localFilepath, destinationSchema,\ redshiftDestTable, fileFormat, dateFormat, delimiter, isManifest='N'): ''' Load file from local drive to RedShift Zip the file, upload to S3 and then load into RedShift ''' if isManifest == 'Y': zipLocalFilepath = localFilepath else: # Zip the file zipLocalFilepath = localFilepath + ".gz" fileUtilities.GzipFile(localFilepath, zipLocalFilepath) bucketName, s3TempKey = S3Utilities.UploadFileToS3Temp(s3, zipLocalFilepath) # Build the job definition file job = {} job["destinationSchema"] = destinationSchema job["tableName"] = redshiftDestTable job["s3Filename"] = S3Utilities.GetS3FileName(bucketName, s3TempKey) job["fileFormat"] = fileFormat job["dateFormat"] = dateFormat job["delimiter"] = delimiter RedshiftUtilities.LoadDataFromS3(rsConnect, s3, job, logger, isManifest) S3Utilities.DeleteFile(s3, bucketName, s3TempKey)
def testGetS3FileName(self): fileNameTested = "testGetS3FileName.txt" testFile = self.createTestingFile( fileNameTested, "Testing GetS3FileName from S3Utilities...") bucketName, s3TempKey = S3Utilities.UploadFileToS3Temp( self.awsParams.s3, testFile) s3FileName = S3Utilities.GetS3FileName(bucketName, s3TempKey) listToValid = s3FileName.split("/") self.assertIn(bucketName, listToValid, "s3 File Name does not contain the bucketName.") self.assertIn(fileNameTested, listToValid, "s3 File Name does not contain a valid s3TempKey.")
def ProcessLiquidBalanceFile(self): ''' place holder ''' try: rsConnect = RedshiftUtilities.Connect( dbname=self.awsParams.redshift['Database'], host=self.awsParams.redshift['Hostname'], port=self.awsParams.redshift['Port'], user=self.awsParams.redshiftCredential['Username'], password=self.awsParams.redshiftCredential['Password']) for sheetConfig in self.job["sheetsToExtract"]: self.ExtractSheet(sheetConfig) s3key = self.job["s3SrcDirectory"] + "/" + sheetConfig[ "outputName"] + "." + self.job["sheetsOutputFormat"] + ".gz" self.logger.info( self.moduleName + " Uploading information to redshift for worksheet: " + sheetConfig["name"]) job = {} job["destinationSchema"] = self.job["destinationSchema"] job["tableName"] = sheetConfig["tempTableName"] job["s3Filename"] = S3Utilities.GetS3FileName( self.job["bucketName"], s3key) job["fileFormat"] = self.job["fileFormat"] job["dateFormat"] = self.job["dateFormat"] job["delimiter"] = sheetConfig["delimiter"] RedshiftUtilities.LoadDataFromS3(rsConnect, self.awsParams.s3, job, self.logger) S3Utilities.DeleteFile(self.awsParams.s3, self.job["bucketName"], s3key) except: self.logger.exception( self.moduleName + " [ProcessLiquidBalanceFile] - We had an error in LiquidsBalance during processBlock" ) raise