def Start(self, logger, moduleName, filelocs): try: ApplicationBase.Start(self, logger, moduleName, filelocs) self.fileUtilities.EmptyFolderContents( self.localTempDirectory + "/QC/") #delete and recreate the folder hqc = HindsightQC(self.logger, self.fileUtilities, self.bcpUtilities, self.localTempDirectory) hqc.Get_sql_server_rowcounts("pre") #Get pre-ETL rowcounts #Execute the pre-etl queries for sqlFile in self.job["bcpParameters"].get("preETLQueries"): RedshiftUtilities.PSqlExecute( self.fileUtilities.GetApplicationDirectory("Hindsight") + sqlFile, logger) for subJob in self.job["bcpParameters"]["subJobs"]: if subJob.get("destinationSchema") is None: subJob["destinationSchema"] = self.job["bcpParameters"][ "destinationSchema"] self.ProcessSubJob(subJob) #Get SQL Server rowcounts hqc.Get_sql_server_rowcounts("post") #Execute the post-etl queries to prepare the data post-ETL prior to loading into the production tables for sqlFile in self.job["bcpParameters"].get("postETLQueries"): RedshiftUtilities.PSqlExecute( self.fileUtilities.GetApplicationDirectory("Hindsight") + sqlFile, logger) #Get Redshift rowcounts hqc.Get_redshift_rowcounts("post") #Execute the post-etl qc queries status = hqc.ValidateETL() #Check whether the ETL passed the QC #Check 1: inter-version counts. Are the difference beyond a particular threshold #Check 2: pre-sql v/s post-redshift. Are the differences beyond a particular threshold #If the ETL doesn't pass the QC, do not update/insert the prod tables #If the ETL passed the QC, insert into production tables (data, attributes, history) if status == True: self.logger.info("ETL good to go") for sqlFile in self.job["bcpParameters"].get( "FinalLoadQueries"): #=========================================================== # add a process to backup data/attributes history tables # Download to S3 #=========================================================== RedshiftUtilities.PSqlExecute( self.fileUtilities.GetApplicationDirectory("Hindsight") + sqlFile, logger) else: self.logger.warning("Bad ETL. No go!") print hqc.TimeElaspsed() except: logger.exception(moduleName + " - Exception!") raise
def Start(self, logger, moduleName, filelocs): ''' Start of routine ''' currProcId = None try: ApplicationBase.Start(self, logger, moduleName, filelocs) self.logger.debug(self.moduleName + " -- " + " starting ") currProcId = self.etlUtilities.GetRunID( filelocs["tblEtl"]["table"], self.moduleName) ### # set up to run create folder ### # self.fileUtilities.moduleName = self.moduleName # self.fileUtilities.localBaseDirectory = self.localTempDirectory # self.fileUtilities.CreateFolders(self.job["folders"]) ### self.ProcessCategories() if self.job["cleanlocal"] == "Y": self.fileUtilities.RemoveFolder(self.localTempDirectory) self.logger.debug(self.moduleName + " -- " + " finished ") except Exception as err: self.logger.exception(moduleName + " - Exception! Error: " + err.message) if self.etlUtilities.CompleteInstance(filelocs["tblEtl"]["table"],\ currProcId, 'F') is not True: self.logger.info(self.moduleName + " - we could not Complete Instance.") raise Exception(err.message)
def Start(self, logger, moduleName, filelocs): ''' Start of routine ''' ApplicationBase.Start(self, logger, moduleName, filelocs) # At some point this will be part of Start ApplicationBase.ProcessInput(self, logger, moduleName, filelocs)
def Start(self, logger, moduleName, filelocs): currProcId = None try: ApplicationBase.Start(self, logger, moduleName, filelocs) self.logger.debug(self.moduleName + " -- " + " starting ") currProcId = self.etlUtilities.GetRunID( filelocs["tblEtl"]["table"], self.moduleName) self.rawFolder = self.localTempDirectory + "/" + "Raw" self.csvFolder = self.localTempDirectory + "/" + "CSV" self.CheckWorkingFolders() self.BulkDownload() self.ProcessFiles() self.BulkUploadToS3() self.LoadAllFromS3() if self.job["cleanlocal"] == "Y": self.fileUtilities.RemoveFolder(self.localTempDirectory) self.logger.debug(self.moduleName + " -- " + " finished ") except Exception as err: self.logger.exception(moduleName + " - Exception! Error: " + err.message) if self.etlUtilities.CompleteInstance(filelocs["tblEtl"]["table"],\ currProcId, 'F') is not True: self.logger.info(self.moduleName + " - we could not Complete Instance.") raise Exception(err.message)
def Start(self, logger, moduleName, filelocs): ''' main routine ''' try: ApplicationBase.Start(self, logger, moduleName, filelocs) self.logger.info(self.moduleName + " - Processing: ") outputCSVfileName = self.localTempDirectory + '/PheonixDocuments.csv' self.logger.info(self.moduleName + " - Pull documents from Phoenix: ") jsonDocuments = self.PullDataFromPhoenix() self.logger.info(self.moduleName + " - save contents to CSV file from Phoenix: ") self.ExportToCSV(outputCSVfileName, jsonDocuments) self.logger.info(self.moduleName + " - push documents csv file to S3: ") bucketName, s3TempKey = S3Utilities.UploadFileToS3Temp(self.awsParams.s3, outputCSVfileName) self.logger.info(self.moduleName + " - Create document table: ") psConnect = self.GetPSConnection() self.CreatePostgresTables(psConnect) self.logger.info(self.moduleName + " - pull document s3 to database server temp: ") postgresTempFile = self.DownloadFromS3ToPSTempDir(psConnect, bucketName, s3TempKey) self.logger.info(self.moduleName + " - load documents csv file: ") self.LoadDataFromPostgresTempDir(psConnect, postgresTempFile) self.logger.info(self.moduleName + " - clean up temp file: ") S3Utilities.DeleteFile(self.awsParams.s3, bucketName, s3TempKey) except: logger.exception(moduleName + " - Exception in start!") raise
def Start(self, logger, moduleName, filelocs): ''' Start of routine ''' currProcId = None try: ApplicationBase.Start(self, logger, moduleName, filelocs) self.logger.debug(self.moduleName + " -- " + " starting ") currProcId = self.etlUtilities.GetRunID( filelocs["tblEtl"]["table"], self.moduleName) for table in self.job["tables"]: self.ProcessTable(table) self.LoadDataFromAthenaIntoRedShiftLocalScripts(table) #self.LoadDataFromAthenaIntoRedShiftS3Scripts(table) # Test: Load all data from Athena if self.job["cleanlocal"] == "Y": self.fileUtilities.RemoveFolder(self.localTempDirectory) self.logger.debug(self.moduleName + " -- " + " finished ") except Exception as err: self.logger.exception(moduleName + " - Exception! Error: " + err.message) if self.etlUtilities.CompleteInstance(filelocs["tblEtl"]["table"],\ currProcId, 'F') is not True: self.logger.info(self.moduleName + " - we could not Complete Instance.") raise Exception(err.message)
def Start(self, logger, moduleName, filelocs): currProcId = None try: ApplicationBase.Start(self, logger, moduleName, filelocs) self.logger.debug(self.moduleName + " -- " + " starting ") currProcId = self.etlUtilities.GetRunID( filelocs["tblEtl"]["table"], self.moduleName) ### # set up to run create folder ### self.fileUtilities.moduleName = self.moduleName self.fileUtilities.localBaseDirectory = self.localTempDirectory self.fileUtilities.CreateFolders(self.job["folders"]) ### self.fromDate = self.GetFromDate() for databaseSettings in self.job["Databases"]: if databaseSettings["execute"] == 'Y': self.ProcessDatabase(databaseSettings) else: self.logger.debug(self.moduleName + " -- skip database " + databaseSettings["common"]["name"]) self.logger.debug(self.moduleName + " -- " + " finished ") except Exception as err: self.logger.exception(moduleName + " - Exception! Error: " + err.message) if self.etlUtilities.CompleteInstance(filelocs["tblEtl"]["table"],\ currProcId, 'F') is not True: self.logger.info(self.moduleName + " - we could not Complete Instance.") raise Exception(err.message)
def Start(self, logger, moduleName, filelocs): try: ApplicationBase.Start(self, logger, moduleName, filelocs) self.csvFile = self.localTempDirectory + "/" + self.job[ "fileNameOut"] self.csvFileHistory = self.localTempDirectory + "/" + self.job[ "fileNameOutHistory"] self.GetAndTransform() self.UploadToS3() self.LoadAllFromS3( self.job["s3ToDirectory"] + '/' + self.job["fileNameOut"] + '.gz', self.job["tableName"]) self.LoadAllFromS3( self.job["s3ToDirectory"] + '/' + self.job["fileNameOutHistory"] + '.gz', self.job["tableName"] + '_history') self.LoadAllFromS3( self.job["xReference"]["s3DataDirectory"], self.job["tableName"] + self.job["xReference"]["tableNameSfx"]) self.ExecutePostETL() except Exception as err: self.logger.exception(moduleName + " - Exception! Error: " + err.message) raise Exception(err.message)
def Start(self, logger, moduleName, filelocs): ''' main routine ''' currProcId = None try: ApplicationBase.Start(self, logger, moduleName, filelocs) self.logger.debug(self.moduleName + " -- " + " starting ") currProcId = self.etlUtilities.GetRunID( filelocs["tblEtl"]["table"], self.moduleName) ### # set up to run create folder ### self.fileUtilities.moduleName = self.moduleName self.fileUtilities.localBaseDirectory = self.localTempDirectory self.fileUtilities.CreateFolders(self.job["folders"]) self.fromDate = self.GetFromDate() ### localFilepath = self.GetMostRecentFile(self.job["foldertoscan"]) # localFilepath = r'C:\tmp\IHS Markit Outlook for Global Oil Market Fundamentals - September 2017.xlsx' for tables in self.job["tables"]: fname = self.fileUtilities.CreateTableSql( tables, self.fileUtilities.sqlFolder) RedshiftUtilities.PSqlExecute(fname, self.logger) outPutFileName = self.fileUtilities.csvFolder +\ self.fromDate +\ "_" + tables["table"] + '.csv' outputGZ = self.fileUtilities.gzipFolder + self.fromDate +\ "_" + tables["table"] + '.csv.gz' tableJson = tables xl = ExcelUtilities(logger) if sys.version[0] == '3': csvfile = open(outPutFileName, 'w', newline='') elif sys.version[0] == '2': csvfile = open(outPutFileName, 'wb') csvWriter = csv.writer(csvfile, quoting=csv.QUOTE_ALL) if localFilepath is not None: self.ProcessFile(xl, localFilepath, csvWriter) csvfile.close() self.fileUtilities.GzipFile(outPutFileName, outputGZ) self.BulkUploadToS3() self.LoadData(tableJson) if self.job["cleanlocal"] == "Y": self.fileUtilities.RemoveFolder(self.localTempDirectory) self.logger.debug(self.moduleName + " -- " + " finished ") except Exception as err: self.logger.exception(moduleName + " - Exception! Error: " + err.message) if self.etlUtilities.CompleteInstance(filelocs["tblEtl"]["table"],\ currProcId, 'F') is not True: self.logger.info(self.moduleName + " - we could not Complete Instance.") raise Exception(err.message)
def Start(self, logger, moduleName, filelocs): ''' main routine ''' try: ApplicationBase.Start(self, logger, moduleName, filelocs) s3Key = self.job["s3SrcDirectory"] + "/" + self.job["fileToLoad"] self.logger.info(self.moduleName + " - Processing file: " + s3Key) localFilepath = self.localTempDirectory + "/" + ntpath.basename( s3Key) S3Utilities.DownloadFileFromS3(self.awsParams.s3, self.job["bucketName"], s3Key, localFilepath) df = pd.read_excel(localFilepath, "Major Variables", index_col=None, na_values=['NaN'], skiprows=1, parse_cols="C:E,G:I", header=None) # Save the data as CSV outputCSVfileName = self.localTempDirectory + '/SampleData.csv' df.to_csv(outputCSVfileName, sep=str(self.job["delimiter"]), encoding='utf-8', index=False) # Update the CSV file into a temporary S3 location. Postgres will download it from there to its local directory bucketName, s3TempKey = S3Utilities.UploadFileToS3Temp( self.awsParams.s3, outputCSVfileName) psConnect = self.GetPSConnection() # Postgres tables are created using a connection (rather than psql) self.CreatePostgresTables(psConnect) postgresTempFile = self.DownloadFromS3ToPSTempDir( psConnect, bucketName, s3TempKey) self.LoadDataFromPostgresTempDir(psConnect, postgresTempFile) S3Utilities.DeleteFile(self.awsParams.s3, bucketName, s3TempKey) self.LoadBaseAttributes(psConnect) self.LoadBaseData(psConnect, '1000', 'glm_value') self.LoadBaseData(psConnect, '2000', 'arima_value') self.LoadBaseData(psConnect, '3000', 'lasso_value') # self.LoadBaseData(psConnect,'4000', 'nn_value') # self.LoadBaseData(psConnect,'5000', 'spectre_value') psConnect.close() self.logger.debug(" SampleData CSV loaded to RedShift") except: logger.exception(moduleName + " - Exception in start!") raise
def Start(self, logger, moduleName, filelocs): ''' Start of routine ''' try: ApplicationBase.Start(self, logger, moduleName, filelocs) for srcFileParameter in self.job["srcFileParameters"]: self.ProcessS3File(srcFileParameter) except: logger.exception(moduleName + " - Exception!") raise
def Start(self, logger, moduleName, filelocs): ''' Starting point of this Project ''' try: ApplicationBase.Start(self, logger, moduleName, filelocs) self.GetFileFromS3() Form1(logger, self.fileUtilities, self.localTempDirectory, self.job, self.awsParams) #invoke the Form1 handler except: self.logger.exception(moduleName + " - Exception!") raise
def Start(self, logger, moduleName, filelocs): ''' Application starting point ''' try: ApplicationBase.Start(self, logger, moduleName, filelocs) self.DownloadFilesFromS3() ISO(logger, self.fileUtilities, self.localTempDirectory, self.job, self.awsParams) #invoke the ISO handler except Exception as ex: self.logger.exception(moduleName + " - Exception!") self.logger.exception("{}".format(str(ex))) raise
def Start(self, logger, moduleName, filelocs): ''' start it ''' ApplicationBase.Start(self, logger, moduleName, filelocs) seedVal = 100 logger.info("Starting count with seed %s in Counter" % (seedVal)) for i in range(seedVal): val = i + seedVal # logger.info("v value in Counter %s" % (str(v))) logger.info( "this is the starting seed %s and the end value was %s in Counter" % (seedVal, val))
def Start(self, logger, moduleName, filelocs): ''' main routine ''' currProcId = None try: ApplicationBase.Start(self, logger, moduleName, filelocs) self.logger.debug(self.moduleName + " -- " + " starting ") currProcId = self.etlUtilities.GetRunID( filelocs["tblEtl"]["table"], self.moduleName) ### # establish connection to Access database ### conn = self.EstablishConnection() cur = conn.cursor() sqlline = self.FixSQLStatement() cur.execute(sqlline) outputfileName = self.localTempDirectory + '/ENPdata.csv' self.ConvertToCSV(cur, outputfileName) ### # load the CSV to RedShift ### self.logger.debug(self.moduleName + " - ENP load CSV to RedShift") rsConnect = self.etlUtilities.GetAWSConnection(self.awsParams) RedshiftUtilities.LoadFileIntoRedshift( rsConnect, self.awsParams.s3, self.logger, self.fileUtilities, outputfileName, self.job["destinationSchema"], self.job["tableName"], self.job["fileFormat"], self.job["dateFormat"], self.job["delimiter"]) self.logger.debug(self.moduleName + " - ENP CSV loaded to RedShift") # Cleanup rsConnect.close() cur.close() conn.close() if self.job["cleanlocal"] == "Y": self.fileUtilities.RemoveFolder(self.localTempDirectory) self.logger.debug(self.moduleName + " -- " + " finished ") except Exception as err: self.logger.exception(moduleName + " - Exception! Error: " + err.message) if self.etlUtilities.CompleteInstance(filelocs["tblEtl"]["table"],\ currProcId, 'F') is not True: self.logger.info(self.moduleName + " - we could not Complete Instance.") raise Exception(err.message)
def Start(self, logger, moduleName, filelocs): currProcId = None try: ApplicationBase.Start(self, logger, moduleName, filelocs) self.logger.debug(self.moduleName + " -- " + " starting ") currProcId = self.etlUtilities.GetRunID( filelocs["tblEtl"]["table"], self.moduleName) paramsList = self.GetParamsList(filelocs["tblEtl"]["table"]) ### # set up to run create folder ### self.fileUtilities.moduleName = self.moduleName self.fileUtilities.localBaseDirectory = self.localTempDirectory self.fileUtilities.CreateFolders(self.job["folders"]) ### for tblJson in self.job["tables"]: fname = self.fileUtilities.CreateTableSql( tblJson, self.fileUtilities.sqlFolder) RedshiftUtilities.PSqlExecute(fname, self.logger) sqlPullDataScript, fromDate = self.CreatePullScript(paramsList) outputCSV = self.fileUtilities.csvFolder + fromDate + self.moduleName + ".CSV" outputGZ = self.fileUtilities.gzipFolder + fromDate + self.moduleName + '.csv.gz' self.BulkExtract(sqlPullDataScript, outputCSV) self.fileUtilities.GzipFile(outputCSV, outputGZ) self.BulkUploadToS3() for tblJson in self.job["tables"]: if "s3subfolder" in tblJson: self.LoadData(tblJson["s3subfolder"], tblJson) maxDate = self.GetMaxUpdateDate(tblJson) sMaxDate = maxDate["lastrun"].strftime('%m/%d/%Y') if self.etlUtilities.SetInstanceParameters(filelocs["tblEtl"]["table"],\ currProcId,\ json.dumps({"lastrun":sMaxDate})) is not True: self.logger.info(self.moduleName + " - we could not set the instance.") self.UpdateTable(filelocs["tblEtl"]["schemaName"], filelocs["tblEtl"]["table"], self.job["tables"], currProcId) if self.job["cleanlocal"] == "Y": self.fileUtilities.RemoveFolder(self.localTempDirectory) self.logger.debug(self.moduleName + " -- " + " finished ") except Exception as err: self.logger.exception(moduleName + " - Exception! Error: " + err.message) if self.etlUtilities.CompleteInstance(filelocs["tblEtl"]["table"],\ currProcId, 'F') is not True: self.logger.info(self.moduleName + " - we could not Complete Instance.") raise Exception(err.message)
def Start(self, logger, moduleName, filelocs): ''' Start of routine ''' try: ApplicationBase.Start(self, logger, moduleName, filelocs) self.CreatePackedFolder() fileList = self.DownloadFiles() self.ProcessFiles(fileList) self.UploadPackedToS3() self.LoadTables() except Exception: logger.exception(moduleName + " - Exception!") raise
def Start(self, logger, moduleName, filelocs): ''' Start of routine ''' try: ApplicationBase.Start(self, logger, moduleName, filelocs) for sheet in self.job["ExcelSheets"]: self.DownloadAllFiles(sheet) self.ConvertExcel2Csv(sheet) self.SkipPackAndLoad(self.job["ExcelSheets"]) if "postETLQueries" in self.job: ApplicationBase.CreateTables(self, self.job["postETLQueries"]) except Exception: logger.exception(moduleName + " - Exception!") raise
def Start(self, logger, moduleName, filelocs): ''' main routine starts here ''' try: ApplicationBase.Start(self, logger, moduleName, filelocs) self.moduleName = moduleName self.CreateTables(self.job["tempTablesScript"]) self.SetLastLiquidsBalanceFileInfo() self.ProcessLiquidBalanceFile() self.CreateTables(self.job["unpivotScript"]) self.CreateTables(self.job["cleanTempTablesScript"]) except Exception as err: self.logger.error(self.moduleName + " - Exception in start.") raise err
def Start(self, logger, moduleName, filelocs): ''' Starting point of this Project ''' try: ApplicationBase.Start(self, logger, moduleName, filelocs) self.dbfUtilities = DBFUtilities(logger) self.CreateFolders() self.Process() self.UploadPackedToS3() self.LoadFilesIntoRedshift() self.EmptyPackedFolder() self.PostLoadETL() except: self.logger.exception(moduleName + " - Exception!") raise
def Start(self, logger, moduleName, filelocs): ''' Start of routine ''' try: ApplicationBase.Start(self, logger, moduleName, filelocs) for srcFileParameter in self.job["srcFileParameters"]: self.DownloadAllFiles(srcFileParameter) self.UnzipExcel(srcFileParameter) self.SkipPackAndLoad(self.job["srcFileParameters"]) if "postETLQueries" in self.job: ApplicationBase.CreateTables(self, self.job["postETLQueries"]) except Exception: logger.exception(moduleName + " - Exception!") raise
def Start(self, logger, moduleName, filelocs): ''' Start of routine ''' try: ApplicationBase.Start(self, logger, moduleName, filelocs) self.fileUtilities.EmptyFolderContents( self.localTempDirectory) #delete and recreate the folder self.fileUtilities.EmptyFolderContents( self.localTempDirectory + "/cleaned/") #delete and recreate the folder self.DownloadFiles() self.ProcessFiles() except Exception: logger.exception(moduleName + " - Exception!") raise
def Start(self, logger, moduleName, filelocs): ''' Main starting routine ''' currProcId = None try: ApplicationBase.Start(self, logger, moduleName, filelocs) self.logger.debug(self.moduleName + " -- " + " starting ") currProcId = self.etlUtilities.GetRunID( filelocs["tblEtl"]["table"], self.moduleName) ### # set up to run create folder ### self.fileUtilities.moduleName = self.moduleName self.fileUtilities.localBaseDirectory = self.localTempDirectory self.fileUtilities.CreateFolders(self.job["folders"]) ### for tblJson in self.job["tables"]: fname = self.fileUtilities.CreateTableSql( tblJson, self.fileUtilities.sqlFolder) RedshiftUtilities.PSqlExecute(fname, self.logger) if "s3subfolder" in tblJson: self.s3subFolder = tblJson["s3subfolder"] outputFileName = self.ProcessRequest() outputCSV = outputFileName outputGZ = self.fileUtilities.gzipFolder + self.moduleName + '.csv.gz' self.fileUtilities.GzipFile(outputCSV, outputGZ) self.BulkUploadToS3(self.s3subFolder) for tblJson in self.job["tables"]: if "s3subfolder" in tblJson: self.LoadData(tblJson["s3subfolder"], tblJson) self.UpdateTable(filelocs["tblEtl"]["schemaName"], filelocs["tblEtl"]["table"], self.job["tables"], currProcId) if self.job["cleanlocal"] == "Y": self.fileUtilities.RemoveFolder(self.localTempDirectory) self.logger.debug(self.moduleName + " -- " + " finished ") except Exception as err: self.logger.exception(moduleName + " - Exception! Error: " + err.message) if self.etlUtilities.CompleteInstance(filelocs["tblEtl"]["table"],\ currProcId, 'F') is not True: self.logger.info(self.moduleName + " - we could not Complete Instance.") raise Exception(err.message)
def Start(self, logger, moduleName, filelocs): ''' Start of routine ''' try: ApplicationBase.Start(self, logger, moduleName, filelocs) self.packedFolder = self.localTempDirectory + "/packed/" self.CreateFolders() lastModifiedDatetime = self.GetLastModifiedDatetime(filelocs) maxModifiedDatetime = self.ProcessFiles(lastModifiedDatetime) self.UploadPackedToS3() self.LoadErcotTables() self.SetLastModifiedDatetime(filelocs, DatetimeUtilities.ConvertToSTR(maxModifiedDatetime)) self.EmptyPackedFolder() self.PostLoadETL() except Exception: logger.exception(moduleName + " - Exception!") raise
def Start(self, logger, moduleName, filelocs): ''' Start of routine ''' try: ApplicationBase.Start(self, logger, moduleName, filelocs) self.tempFolder = self.localTempDirectory + "/Temp" self.packedFolder = self.localTempDirectory + "/Packed" self.rawDataFolder = self.localTempDirectory + "/RawData" self.CleanWorkingFolders() self.SynchronizeSourceFolder() self.CleanUpAndPack() self.UploadPackedToS3() self.LoadAirMarketsTables() except: logger.exception(moduleName + " - Exception!") raise
def Start(self, logger, moduleName, filelocs): ''' Start of routine ''' currProcId = None try: ApplicationBase.Start(self, logger, moduleName, filelocs) currProcId = self.etlUtilities.GetRunID( filelocs["tblEtl"]["table"], self.moduleName) self.ProcessRequest() if self.job["cleanlocal"] == "Y": self.fileUtilities.RemoveFolder(self.localTempDirectory) except Exception as err: self.logger.exception(moduleName + " - Exception! Error: " + err.message) if self.etlUtilities.CompleteInstance(filelocs["tblEtl"]["table"],\ currProcId, 'F') is not True: self.logger.info(self.moduleName + " - we could not Complete Instance.") raise Exception(err.message)
def Start(self, logger, moduleName, filelocs): try: ApplicationBase.Start(self, logger, moduleName, filelocs) self.packedFolder = self.localTempDirectory + "/Packed" self.rawFolder = self.localTempDirectory + "/Raw" self.fileUtilities.RemoveFolder(self.packedFolder) self.fileUtilities.RemoveFolder(self.rawFolder) self.fileUtilities.CreateFolder(self.packedFolder) self.fileUtilities.CreateFolder(self.rawFolder) self.BulkExtractAll() self.TransformAndPackAll() self.BulkUploadToS3() self.LoadAllFromS3() except Exception as err: self.logger.exception(moduleName + " - Exception! Error: " + err.message) raise Exception(err.message)
def Start(self, logger, moduleName, filelocs): ''' start it ''' try: ApplicationBase.Start(self, logger, moduleName, filelocs) logger.exception(moduleName + "- starting play area") # self.TestZipIt() # self.MoveFolderToS3() # self.TestConnectionSQLServer() #### # nugget is wrong since latest changes #### # self.TestGEForecastHistory() # self.ExcelTest() # self.TestDates(logger, moduleName) self.TestShooju(logger, moduleName) logger.exception(moduleName + "- ending play area") except Exception as ex: logger.exception(moduleName + " - Exception!") raise
def Start(self, logger, moduleName, filelocs): ''' Start of routine ''' try: ApplicationBase.Start(self, logger, moduleName, filelocs) self.excelUtilities = ExcelUtilities(logger) self.fileUtilities.EmptyFolderContents( self.localTempDirectory) #delete and recreate the folder self.fileUtilities.EmptyFolderContents( self.localTempDirectory + "/CSVS/") #delete and recreate the folder self.BulkDownload() self.UnzipExcel() for sheetParams in self.job["ExcelSheets"]: self.Excel2CsvSkipped(sheetParams) self.PostLoadETL() self.fileUtilities.EmptyFolderContents( self.localTempDirectory) #clear contents of the folder except Exception: logger.exception(moduleName + " - Exception!") raise
def Start(self, logger, moduleName, filelocs): ''' Application starting point ''' try: ApplicationBase.Start(self, logger, moduleName, filelocs) self.pandasUtilities = PandasUtilities(self.logger) self.CreateFolders() #=================================================================== # delete files from S3 bucket #=================================================================== for folder in [ flder for flder in list(self.job["folderPath"].keys()) if flder != "raw" ]: self.DeleteFilesFromAWS(self.job["folderPath"][folder].replace( "/", "")) #=================================================================== # Process the files #=================================================================== self.ProcessQuarterlyFilings() #=================================================================== # Load into Redshift #=================================================================== for folder in [ flder for flder in list(self.job["folderPath"].keys()) if flder != "raw" ]: self.LoadQFTables(self.job["folderPath"][folder].replace( "/", "")) #=================================================================== # do post load ETL & view creation #=================================================================== self.PostLoadETL() except Exception as ex: self.logger.exception(moduleName + " - Exception!") self.logger.exception(str(ex)) raise