Exemplo n.º 1
0
 def GetNewFiles(self, lastModifiedDatetime):
     '''
     Get the list of new files on S3
     '''
     maxModifiedDatetime = None
     files = []
     if lastModifiedDatetime is not None:
         maxModifiedDatetime = lastModifiedDatetime
         newFiles = S3Utilities.GetFilesSinceGivenDatetime(
             self.job["bucketName"], self.job["s3SrcDirectory"],
             lastModifiedDatetime)
         for newFile in newFiles:
             dtStr = newFile["datetime"]
             dt = DatetimeUtilities.ConvertToDT(dtStr)
             if dt > maxModifiedDatetime:
                 maxModifiedDatetime = dt
                 files.append(newFile["fileName"])
     else:
         newFiles = S3Utilities.GetFilesNModifiedDatetimeFromS3(
             self.job["bucketName"], self.job["s3SrcDirectory"])
         for newFile in newFiles:
             dtStr = newFile["datetime"]
             dt = DatetimeUtilities.ConvertToDT(dtStr)
             if maxModifiedDatetime is None:
                 maxModifiedDatetime = dt
             if dt > maxModifiedDatetime:
                 maxModifiedDatetime = dt
             files.append(newFile["fileName"])
     return (files, maxModifiedDatetime)
Exemplo n.º 2
0
 def GetLastModifiedDatetime(self, filelocs):
     '''
     Handles the incremental load of ERCOT data
     Pulls the json {"lastModifiedDatetime": "2017-06-07 19:51:06"} from eaa_dev.etl_process_logs
     Returns datetime in UTC
     '''
     self.currProcId = self.etlUtilities.GetRunID(
         filelocs["tblEtl"]["table"], self.moduleName)
     lastRunRecJson = self.etlUtilities.GetLastGoodRun(
         filelocs["tblEtl"]["table"], self.moduleName)
     paramsList = {}
     lastModifiedDatetime = None
     if (lastRunRecJson is not None) and (lastRunRecJson.get("params")
                                          is not None):
         paramsList = json.loads(lastRunRecJson["params"])
     if paramsList.get("lastModifiedDatetime") is not None:
         lastModifiedDatetime = paramsList["lastModifiedDatetime"]
     return DatetimeUtilities.ConvertToDT(lastModifiedDatetime)