Exemplo n.º 1
0
def splitOnUCLC(token):
    refined1 = splitOnChange(token)
    hits1 = 0
    for t in refined1:
        if Oracle.isWord(t.lower()):
            hits1 += 1

    refined2 = splitPenultimate(token)
    hits2 = 0
    for t in refined2:
        if Oracle.isWord(t.lower()):
            hits2 += 1

    if hits2 > hits1:
        return refined2
    return refined1
Exemplo n.º 2
0
Arquivo: dea.py Projeto: tnraman/ddy
    def __init__(self, configFile, mktName, processingDate, debugFlag):
        """
        Purpose: Constructor
        :param self:        class object itself
        :param configFile:  Configuration file to use
        """

        # Initialize m_logger object from class Logger and add Header to the log, using addGenericInfo function
        self.m_logger = Logger(logging.INFO, configFile, processingDate)
        self.m_logger.addFileHandler(logging.DEBUG)
        self.m_logger.addGenericInfo(__file__)
        
        self.processingDate = processingDate
        self.debugFlag = debugFlag
        #self.forceFlag = forceFlag
        self.configFile = configFile
        self.mktName = mktName

        try:
            # Get configuration to a dictionary
            self.m_configDict = configuration(self.configFile, True).m_dictionary
            
            #Initialize Oracle instance along with connection
            self.m_oracle_db = Oracle(self.m_configDict, self.m_logger)

        except Exception as exp:
                   # An exception occurred
            self.m_logger.error("Unable to initialize the configuration " + str(exp))
            print("ERROR: Unable to initialize the configuration for logger " + str(exp))
            sys.exit(1)
Exemplo n.º 3
0
def splitOnUCLC(token):
	refined1 = splitOnChange(token)
	hits1 = 0
	for t in refined1:
		if Oracle.isWord(t.lower()):
			hits1 += 1

	refined2 = splitPenultimate(token)
	hits2 = 0
	for t in refined2:
		if Oracle.isWord(t.lower()):
			hits2 += 1

	if hits2 > hits1:
		return refined2
	return refined1
Exemplo n.º 4
0
def prepareData(mcd, mcfFile, featModel, moves, filename, wordsLimit):
    c = Config(mcfFile, mcd, dicos)
    numSent = 0
    numWords = 0
    listMvt = []
    listFeatVect = []
    while c.getBuffer().readNextSentence() and numWords < wordsLimit:
        numWords += c.getBuffer().getLength()
        numSent += 1
        #        print(">>>>>>>>>>>>> Sent", numSent, " >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>")
        prepareWordBufferForTrain(c.getBuffer())
        while True:
            mvt = Oracle.oracle(c)
            listMvt.append(mvt)
            outputVector = moves.buildOutputVector(mvt)
            featVect = c.extractFeatVec(featModel)
            listFeatVect.append(featVect)

            if verbose is True:
                print("------------------------------------------")
                c.affiche()
                print('oracle says', mvt[0], mvt[1])
                print(mvt, featVect)

            # c.getBuffer().affiche(mcd)
            res = c.applyMvt(mvt)
            if res is False: print("cannot apply movement")
            if c.isFinal():
                #                print("is final is true")
                break

    try:
        dataFile = open(filename, 'w', encoding='utf-8')
    except IOError:
        print('cannot open', filename)
        exit(1)
    dataFile.write(str(inputSize))
    dataFile.write("\n")
    dataFile.write(str(outputSize))
    dataFile.write("\n")
    for i in range(len(listFeatVect)):
        featVect = listFeatVect[i]
        mvt = listMvt[i]
        inputVector = featModel.buildInputVector(featVect, dicos)
        #outputVector = featModel.buildOutputVector(featVect, dicos)
        np.savetxt(dataFile,
                   inputVector,
                   fmt="%s",
                   delimiter='  ',
                   newline=' ')
        dataFile.write('\n')
        np.savetxt(dataFile,
                   outputVector,
                   fmt="%s",
                   delimiter='  ',
                   newline=' ')
        dataFile.write('\n')
Exemplo n.º 5
0
def Newton_F(Oracle, x0):

    ##### Initialisation des variables

    iter_max = 100
    gradient_step = 1
    threshold = 0.000001

    gradient_norm_list = []
    gradient_step_list = []
    critere_list = []

    time_start = process_time()

    x = x0

    ##### Boucle sur les iterations

    for k in range(iter_max):

        # Valeur du critere et du gradient
        critere, gradient, hessien = Oracle(x, 7)

        # Test de convergence
        gradient_norm = norm(gradient)
        if gradient_norm <= threshold:
            break

        # Direction de descente
        D = -dot(inv(hessien), gradient)
        print(gradient_norm)

        # Mise a jour des variables
        x = x + (gradient_step * D)

        # Evolution du gradient, du pas, et du critere
        gradient_norm_list.append(gradient_norm)
        gradient_step_list.append(gradient_step)
        critere_list.append(critere)

    ##### Resultats de l'optimisation

    critere_opt = critere
    gradient_opt = gradient
    x_opt = x
    time_cpu = process_time() - time_start

    print()
    print('Iteration :', k)
    print('Temps CPU :', time_cpu)
    print('Critere optimal :', critere_opt)
    print('Norme du gradient :', norm(gradient_opt))

    # Visualisation de la convergence
    Visualg(gradient_norm_list, gradient_step_list, critere_list)

    return critere_opt, gradient_opt, x_opt
Exemplo n.º 6
0
def greedy(start, t):
    end = len(t)
    if start == end:
        return []

    while not Oracle.isWord(t[start:end].lower()):
        end -= 1
        if start == end:
            return [t[start:]]

    return [t[start:end]] + greedy(end, t)
Exemplo n.º 7
0
def splitOnUCLC(token, debug=False):
    refined1 = splitOnChange(token)
    hits1 = 0
    for t in refined1:
        if Oracle.oracle(t.lower()):
            hits1 += 1

    refined2 = splitPenultimate(token)
    hits2 = 0
    for t in refined2:
        if Oracle.oracle(t.lower()):
            hits2 += 1

    if debug:
        print(refined1, hits1)
        print(refined2, hits2)

    if hits2 > hits1:
        return refined2
    return refined1
Exemplo n.º 8
0
def splitOnUCLC(token, debug=False):
	refined1 = splitOnChange(token)
	hits1 = 0
	for t in refined1:
		if Oracle.oracle(t.lower()):
			hits1 += 1

	refined2 = splitPenultimate(token)
	hits2 = 0
	for t in refined2:
		if Oracle.oracle(t.lower()):
			hits2 += 1

	if debug:
		print (refined1, hits1)
		print (refined2, hits2)

	if hits2 > hits1:
		return refined2
	return refined1
Exemplo n.º 9
0
def greedy(start, t):
	end = len(t)
	if start == end:
		return []

	while not Oracle.isWord(t[start:end].lower()):
		end -= 1
		if start == end:
			return [t[start:]]

	return [ t[start:end] ] + greedy(end, t)
Exemplo n.º 10
0
 def __init__(self, connectString):
     self.connectString = connectString
     realConn, self.useOracleMod, self.verbose = self._parseConnectString(
         connectString)
     self.realConn = realConn
     if self.useOracleMod:
         self.conn = Oracle.getConnection(realConn)
     else:
         self.conn = DCOracle.Connect(realConn)
     self.bvcount = 0
     oraConns.append(self.conn)
     self.bindVariables = 1
     if self.verbose:
         print 'CONNECTED'
Exemplo n.º 11
0
 def __init__(self, connectString):
     self.connectString = connectString
     realConn, self.useOracleMod, self.verbose = self._parseConnectString(
         connectString)
     self.realConn = realConn
     if self.useOracleMod:
         self.conn = Oracle.getConnection(realConn)
     else:
         self.conn = DCOracle.Connect(realConn)
     self.bvcount = 0
     oraConns.append(self.conn)
     self.bindVariables = 1
     if self.verbose:
         print 'CONNECTED'
Exemplo n.º 12
0
def get_xy(file_conllu, file_features, file_embedding=None):
    mcd = get_mcd()

    print("Chargement des arbres")
    obj_generateAlltree = ConstructAllTree(file_conllu, mcd, True)
    # print(obj_generateAlltree.get_corpus())
    # print(obj_generateAlltree.get_vocabulary())

    all_tree = obj_generateAlltree.get_allTreeProjectiviser()
    # print(all_tree[0].print_tree())
    print("Arbres charger : ", len(all_tree))

    print("Création du dataset")
    features = Features(file_features)
    i = 0
    for tree in all_tree:

        # tree.print_tree()
        # if i != 43 and i != 61:
        A = Oracle(tree, features)
        A.run()
    #
    # print(features.datas)
    #
    # print(features.labels_encoders)
    print("Convertion du dataset")
    print("file_embedding : ", file_embedding)
    X, Y = features.get_Data_Set(file_embedding)

    labels_encoderX = features.get_label_encoderX()
    labels_encoderY = features.get_label_encoderY()

    print("X_train_shape", X.shape)
    print("Y_train_shape", Y.shape)

    return X, Y, labels_encoderX, labels_encoderY, all_tree
Exemplo n.º 13
0
class Loader():
    #class variables
    m_logger = ""

    #database objects
    m_oracle_db = ""
    m_netezza_db = ""

    def __init__(self, configFile, tradeDate):
        """
        Purpose: Constructor
        :param self:        class object itself
        :param configFile:  Configuration file to use
        """

        # Initialize m_logger object from class Logger and add Header to the log, using addGenericInfo function
        self.m_logger = Logger(logging.INFO, configFile, tradeDate)
        self.m_logger.addFileHandler(logging.DEBUG)
        self.m_logger.addGenericInfo(__file__)
        
        self.tradeDate = tradeDate

        #log.addFileHandler(logging.INFO)

        try:
            # Get configuration to a dictionary
            self.m_configDict = configuration(configFile, True).m_dictionary
            
            #Initialize Oracle instance along with connection
            self.m_oracle_db = Oracle(self.m_configDict, self.m_logger)

        except Exception as exp:
                   # An exception occurred
            self.m_logger.error("Unable to initialize the configuration " + str(exp))
            print("ERROR: Unable to initialize the configuration for logger " + str(exp))
            sys.exit(1)

    def readMktConfigFile(self, mktConfigFile):
        """
        Purpose - To read the content of mktConfigFile into the global dictionary m_mktConfigDict for reference
        :param mktConfigFile:
        :return:
        """
        try:
            self.m_mktConfigDict = configuration(mktConfigFile, True).m_dictionary
            #print("mktConfigFile = ", mktConfigFile, "m_mktConfigDict = ", self.m_mktConfigDict)
            #return m_mktConfigDict
        except Exception as exp:
                   # An exception occurred
            self.m_logger.error("Unable to initialize the configuration for logger " + str(exp))
            print("ERROR: Unable to initialize the configuration for logger " + str(exp))
            sys.exit(1)

    def chkActiveLoads(self):
        """
        Purpose - To check the count of active Active loads happening at a given point
        :param None: None at this point
        :return:
        """
        try:
            if self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_check_flag"] == 'Y': 
                localActiveLoadMax = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max"]
                localActiveLoadWaitTime = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_wait_time"]
                localActiveLoadMaxWaitTime = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max_wait_time"]

                mySql = ""
                myParams = ""
                mySql = self.m_configDict["SQL"]["get_active_loads"]
    
                activeFlag=1
                totalActiveWaitTime=0
                while activeFlag:
                    #returnVal=0
                    #returnCode=0
                    #returnVal, returnCode = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql, myParams)
                    returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql, myParams)
                    if returnStr[0] != 0:
                        self.m_logger.error("Unable to get active loads using sql " + mySql + ". Error = " + returnStr[1])
                        sys.exit(1)

                    #Check if actual active loads is <= configured active loads.  If so, return out of the fn
                    #print "chkActiveLoads - ReturnCode = ", returnCode
                    if returnStr[1] <= localActiveLoadMax:
                        activeFlag=0
                        return 0

                    #Sleep for time defined by configured value for "active_load_wait_time"
                    time.sleep(localActiveLoadWaitTime)
                    totalActiveWaitTime += localActiveLoadWaitTime

                    #Check if actual Total wait time is > configured total wait time.  If so, throw an error and exit
                    if totalActiveWaitTime > localActiveMaxWaitTime:
                        self.m_logger.error("In Fn chkActiveLoads.  Total Actual Wait Time exceeds the configured value active_load_max_wait_time.  Either cleanup orphaned loads or increase the either active_load_max or active_load_max_wait_time. totalActiveWaitTime = " + str(totalActiveWaitTime) + " localActiveMaxWaitTime=" + str(localActiveMaxWaitTime))
                        return 1
            else:
                return 0

            #Return failure
            return 1
                    
        except Exception as exp:
            #self.m_logger.error("Failure in chkActiveLoads process for file " + sourceFileWthPath + " with fileID = " + localFileID + " with the error " + str(exp))
            self.m_logger.error("Failure in chkActiveLoads process for file with the error " + str(exp))
            sys.exit(1)

    def chkRaceStatus(self):
        """
        Purpose - To check if a load is already running for the given dataset
        :param None: None at this point
        :return:
        """
        try:
            if self.m_mktConfigDict["RACE"]["race_status_check_flag"] == 'Y':
                localRaceStatusWaitTime = int(self.m_mktConfigDict["RACE"]["race_status_wait_time"])
                localRaceStatusMaxWaitTime = int(self.m_mktConfigDict["RACE"]["race_status_max_wait_time"])

                mySql = ""
                myParams = {"datasetName":self.datasetName}
                mySql = self.m_configDict["SQL"]["get_race_status"]
    
                raceFlag=1
                totalRaceStatusWaitTime=0
                while raceFlag:
                    #returnVal=0
                    #returnCode=0
                    #returnVal, returnCode = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql, myParams)
                    returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql, myParams)
                    if returnStr[0] != 0:
                        self.m_logger.error("Unable to get race status using sql " + mySql + ". Error = " + returnStr[1])
                        sys.exit(1)

                    #Check if the load for this dataset is already running. If not, exit out of the function with normal return value
                    #print "0.chkActiveLoads - returnVal", returnVal, " ReturnCode = ", int(returnStr[1].strip())
                    #if int(returnStr[1]) <= 1:
                    if int(returnStr[1].strip()) <= 1:
                        raceFlag=0
                        return 0
                    #else:
                        #print "E.chkActiveLoads - returnVal", returnVal, " ReturnCode = ", int(returnStr[1].strip())

                    #Check if actual Total wait time is > configured total wait time.  If so, throw an error and exit

                    #print "1.chkActiveLoads - returnVal", returnVal, " ReturnCode = ", int(returnStr[1].strip()), "localRaceStatusWaitTime =", localRaceStatusWaitTime

                    #Sleep for time defined by configured value for "active_load_wait_time"
                    time.sleep(localRaceStatusWaitTime)
                    #time.sleep(90)
                    totalRaceStatusWaitTime += localRaceStatusWaitTime
                    #print "totalRaceStatusWaitTime = ", totalRaceStatusWaitTime, "localRaceStatusWaitTime =", localRaceStatusWaitTime
                    if totalRaceStatusWaitTime > localRaceStatusMaxWaitTime:
                        self.m_logger.error("In Fn chkRaceStatusLoads.  Total Actual Wait Time exceeds the configured value race_status_max_wait_time.  Either check if the Dataset is getting loaded or increase the either active_load_max or active_load_max_wait_time. totalRaceStatusWaitTime = " + str(totalRaceStatusWaitTime) + " localRaceStatusMaxWaitTime=" + str(localRaceStatusMaxWaitTime))
                        return 1
            else:
                return 0

            #Return failure
            return 1
                    
        except Exception as exp:
            #self.m_logger.error("Failure in chkRaceStatus process for file " + sourceFileWthPath + " with fileID = " + localFileID + " with the error " + str(exp))
            self.m_logger.error("Failure in chkRaceStatus process for file with the error " + str(exp))
            sys.exit(1)

    def loadData(self,localDataFile, localFileID, localFileIDQueue, localDBFlag):
        """
        Purpose - To load the given datafile to the S3 bucket specified in the global mktConfigFile
        :param localDataFile: Source datafile to be uploaded to S3
        :param localFileID: Internal File ID assigned to the source datafile
        :param localFileIDQueue: Queue in which, results of the operation is stored
        :return:
        """
        try:
            if localDBFlag:
                raceStatusReturnValue=self.chkRaceStatus()
                #print "0.raceStatusReturnValue=", raceStatusReturnValue

                if raceStatusReturnValue:
                    self.m_logger.error("Failure value returned by chkRaceStatus fn.  Return value = " + str(raceStatusReturnValue))
                    localFileIDQueue.put((localFileID, raceStatusReturnValue))
                    return 1

                activeLoadsReturnValue=self.chkActiveLoads()
                if activeLoadsReturnValue:
                    self.m_logger.error("Failure value returned by chkRaceStatus fn.  Return value = " + str(activeLoadsReturnValue))
                    localFileIDQueue.put((localFileID, raceStatusReturnValue))
                    return 1
    
                processID = os.getpid()
                hostName = socket.gethostname()

                #Insert Process status into Oracle db
                #DB_CALL - sp_ddy_insert_process_status(RUNID, FILE_ID, etc)
                mySql = ""
                myParams = ""
                mySql = self.m_configDict["SQL"]["put_process_status"]
                pStatus = 'P'
                pComment = 'Load started'
                myParams = {"datasetName":self.datasetName, "runID":self.runID, "fileID":localFileID, "fileName":localDataFile, "tDate":self.tradeDate, "processID":processID, "hostName":hostName, "status":pStatus , "lcomment":pComment, "tidalRunID":self.tidalRunID}
                #myParams = {"datasetName":self.datasetName, "runID":self.runID, "fileID":localFileID, "fileName":localDataFile, "tDate":self.tradeDate, "processID":processID, "hostName":hostName, "status":pStatus , "lcomment":pComment}

                #print "Insert process Status"
                #print "mySql = ", mySql, "myParams = ", myParams
        
                #returnVal, returnCode = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql, myParams)
                returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql, myParams)
    
                if returnStr[0] != 0:
                    self.m_logger.error("Unable to put process status info into the database using sql " + mySql + ". Error = " + returnStr[1])
                    sys.exit(1)

            #Call s3.gettoken to get the token and establish connection

            sourceFileWthPath = localDataFile
            #print("sourceFileWthPath =", sourceFileWthPath)
            
            #Commented the following lines to move getToken outside parallel thread
            ##print("mktConfigFile=", mktConfigFile)
            #s3object = S3(self.mktConfigFile, self.m_logger)
            #s3object.getToken()
            ##sourceFileWthPath =  s3object.m_configfile["S3"]["source_file"]

            targetFileWthPath = os.path.join(self.s3object.m_configFile["S3"]["path"], os.path.basename(sourceFileWthPath))
            #print("targetFileWthPath =", targetFileWthPath)
            targetBucket = self.s3object.m_configFile["S3"]["bucket"]
            #print("targetBucket =", targetBucket)
            encryptKeyFlag = self.s3object.m_configFile["S3"]["encrypt_key"]
            #print("encryptKeyFlag =", encryptKeyFlag)

            loadReturnValue = 0

            #Call s3.dataUpload to load the data (single part load)

            loadReturnValue = self.s3object.loadDataSinglePart(sourceFileWthPath, targetFileWthPath, targetBucket, encryptKeyFlag )

            if loadReturnValue == 0:
                pStatus = 'S'
                pComment = 'Load completed'
            else:
                pStatus = 'F'
                pComment = 'Load failed'

            if localDBFlag:
                #Call DB to insert 'S' or 'F' in tb_ddy_process_status
                #localFileIDQueue.put((localFileID, loadReturnValue))
                mySql = ""
                myParams = ""
                mySql = self.m_configDict["SQL"]["put_process_status"]
                myParams = {"datasetName":self.datasetName, "runID":self.runID, "fileID":localFileID, "fileName":localDataFile, "tDate":self.tradeDate, "processID":processID, "hostName":hostName, "status":pStatus , "lcomment":pComment, "tidalRunID":self.tidalRunID}
                #myParams = {"datasetName":self.datasetName, "runID":self.runID, "fileID":localFileID, "fileName":localDataFile, "tDate":self.tradeDate, "processID":processID, "hostName":hostName, "status":pStatus , "lcomment":pComment}
    
                #returnVal, returnCode = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql, myParams)
                returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql, myParams)

                if returnStr[0] != 0:
                    self.m_logger.error("Unable to put process status info into the database using sql " + mySql + ". Error = " + returnStr[1])
                    sys.exit(1)

                localFileIDQueue.put((localFileID,loadReturnValue))
            else:
                return loadReturnValue
      
            #if localFileID == 1:
                #localFileIDQueue.put((localFileID, 1))
                ##print("Queue In = ", localFileIDQueue.get())
                ##print(localFileID)
            #else:
                #localFileIDQueue.put((localFileID, 0))
                ##print("Queue In = ", localFileIDQueue.get())
                ##print(localFileID)

        except Exception as exp:
            #self.m_logger.error("Failure in loadData process for file " + sourceFileWthPath + " with fileID = " + localFileID + " with the error " + str(exp))
            self.m_logger.error("Failure in loadData process for file with the error " + str(exp))
            if localDBFlag:
                localFileIDQueue.put(localFileID, 1)
            else:
                return 1
            #sys.exit(1)

    def createFinraManifestFile(self, manifestFile):
        try:
            # Read Manifest file to get info like total rows, total size & other details to populate the done file for FINRA
            with open(manifestFile,"r") as fh:
                self.totalRows = 0
                self.totalSize = 0
                self.fileCount = 0
                self.fileDict = {}
                for data in fh:
                    data.rstrip("\n")
                    mylist = []
                    self.fileCount +=1
                    mylist = data.split("|")
                    self.fileDict[self.fileCount] = [mylist[0],os.path.basename(mylist[1]),int(mylist[2]),int(mylist[3])]
                    self.totalRows += int(mylist[3])
                    self.totalSize += int(mylist[2])
        except Exception as exp:
            self.m_logger.error("Failed while processing readManifest with Error = " + str(exp))
            return 1

        try:
            # Read Default file to get default file structure info
            #defaultsDir = self.m_mktConfigDict["DATASET"]["defaults_dir"]

            #Use self.defautltsFile which is populated from the db later. No need to get it from config file
            self.defaultsFileWthPath = self.m_mktConfigDict["DATASET"]["defaults_dir"] + "/" + self.defaultsFile

            #self.defaultsFileWthPath = self.m_mktConfigDict["DATASET"]["defaults_dir"] + "/" + self.m_mktConfigDict["DATASET"]["defaults_file"]

            with open(self.defaultsFileWthPath,"r") as fh:
                self.defaultsDict = {}
                self.defaultsCount  = 0
                for data in fh:
                    data.rstrip('\n')
                    self.defaultsCount +=1
                    self.defaultsDict[self.defaultsCount]=data

        except Exception as exp:
            self.m_logger.error("Failed while processing defaults file " + self.defaultsFileWthPath + " with Error = " + str(exp))
            return 1

        try:
            self.finraManifestFile =  self.m_mktConfigDict["ENV"]["donefile_dir"] + "/" +  os.path.basename(manifestFile) + ".done"
            with open(self.finraManifestFile,"w") as finraMnFH:
                finraMnFH.write("# AWS RunID : {}\n".format(str(self.runID)))
                #finraMnFH.write("# AWS RunID : {}\n".format(str(self.tidalRunID)))
                finraMnFH.write("# Dataset : {0} , TradeDate : {1}\n".format(str(self.datasetName),str(self.tradeDate)))
                finraMnFH.write("total_compressed={}\n".format(self.totalSize))
                finraMnFH.write("total_rows={}\n".format(self.totalRows))
                #finraMnFH.write("no of files={}\n".format(len(self.fileDict)))
                finraMnFH.write("no of files={}\n".format(self.fileCount))
   
                for key,val in self.fileDict.items():
                    finraMnFH.write("file_{0}={1}\n".format(str(key),val[1]))
                    finraMnFH.write("file_{0}_rows={1}\n".format(str(key),val[3]))
  
                finraMnFH.write("# Data Attributes\n")
                for key,val in self.defaultsDict.items():
                    finraMnFH.write("{0}".format(str(val)))
            return 0
        except Exception as exp:
            self.m_logger.error("Failed while creating AWS Done file " + self.finraManifestFile + " with Error = " + str(exp))
            return 1

    def processLoader(self, configFile, manifestFile, datasetName, tidalRunID):
        """
        Purpose - Function responsible for reading the manifest file, get market name, call multiprocess load and other db calls
        :param configFile: Configuration File
        :param manifestFile: Manifest File
        :param tradeDate: Trade Date
        :return:
        """
        try:
            # Read the manifest filename and get the suffix i.e. datasetname
            # Assumption - Manifest file format - manifest.<datasetName>.<tradeDate>.<tidalRunID>
            # Program will break otherwise
            self.datasetName = datasetName
            self.tidalRunID = tidalRunID

            # DB_CALL
            # Make database call sp_ddy_get_market_info(datasetname) and get market info
            mktName = ''

            #print "self.m_configDict = ", self.m_configDict
            #print "self.m_configDict[mkt] = ", self.m_configDict["SQL"]["get_mkt"]
            #print "datasetName = ", self.datasetName
            #print "ManifestFile = ", manifestFile
            self.manifestFile = self.m_configDict["ENV"]["manifestfile_dir"] + "/" + manifestFile
            #print "ManifestFile with Path = ", self.manifestFile
            ##Validata Manifest file
            if not os.path.isfile(self.manifestFile):
                self.m_logger.error("Invalid manifest file " + self.manifestFile)
                sys.exit(1)

            # Enable this one the proc to get mkt name and default file are ready and test it
            mySql = self.m_configDict["SQL"]["get_mkt_defaults_filename"]
            myParams = {"datasetName":self.datasetName}
            #returnVal, returnCode = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql, myParams)
            returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql, myParams)
            #print "returnCode = ", returnStr
            if returnStr[0] == 0:
                #print "returnStr[2].strip()", returnStr[2].strip()
                if returnStr[2].strip() != None:
                    mktName = returnStr[2].strip()
                else:
                    self.m_logger.error("Invalid Market Name " + returnStr[2].strip() )
                    sys.exit(1)
                
                if returnStr[3].strip() != None:
                    self.defaultsFile = returnStr[3].strip()
                else:
                    self.m_logger.error("Invalid Defaults File " + returnStr[3].strip() )
                    sys.exit(1)
            else:
                self.m_logger.error("Unable to get market info from the database using sql " + mySql + ". Error = " + returnStr[1])
                sys.exit(1)

            #print "MktName from DB = ", mktName
          
            #mySql = self.m_configDict["SQL"]["get_mkt"]
            ##print "mySql = ", mySql
            #myParams = {"datasetName":self.datasetName}
            ##print "myParams = ", myParams

            #returnVal, returnCode = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql, myParams)
            ##print "returnVal = ", returnVal
            ##print "returnCode = ", returnCode

            #if returnVal == 0:
                #mktName = returnCode
            #else:
                #self.m_logger.error("Unable to get market info from the database using sql " + mySql + ". Error = " + returnCode)
                #sys.exit(1)

            #print "MktName from DB = ", mktName

            # Temporarily use file lookup call

            ########
            ### Start of temp call

            #lookupFile = "/Users/rnarayan/apps/ddy/ICE/conf/dataset_lookup.txt"

            #mktName = ''
            #with open(lookupFile, "r") as myFile:
                #for line in myFile:
                    #print(datasetName, line)
                    #if datasetName in line:
                        #mktNameArray = line.partition('=')
                        #mktName = mktNameArray[2].strip()
            #if not mktName:
                #self.m_logger.error("Unable to find market manifest for dataset " )
                #sys.exit("ERROR: Unable to find market manifest for dataset " + mktConfigFile)
                #sys.exit(1)

            #print("Final MktName = ", mktName)
            ### End of temp call
            ########

            #Build the string for mktConfigFile based on mktName and configFile info
            self.mktConfigFile = os.path.dirname(configFile) + '/' + os.path.basename(configFile).split('.',1)[0].strip() + '_' + mktName.lower() + '.' + os.path.basename(configFile).split('.',1)[1].strip()

            #print("mktConfigFile = ", self.mktConfigFile)
            #Validata Manifest file is a valid file
            if not os.path.isfile(self.mktConfigFile):
                #print "Inside invalid mktConfigFile" + self.mktConfigFile
                self.m_logger.error("Invalid market manifest file " + self.mktConfigFile)
                sys.exit(1)

            #May not need the following section, as we send mktConfigFile to other function not the dictionary self.m_mktConfigDict.  Need to remove it after finishing the loadData part fully

            # Read Market specific config file and store it in a specific dictionary
            #m_mktConfigDict=process.readMktConfigFile(mktConfigFile)
            self.readMktConfigFile(self.mktConfigFile)

            #print("m_mktConfigDict=",self.m_mktConfigDict)

            # Read the contents of manifest - dataFileNames into a list - Will validate the datafiles as well

            localManifest = Manifest()
            manifestFileList = localManifest.readManifest(self.manifestFile, self.m_logger)
            #print("manifestFileList = ", manifestFileList)

            # Get RunID
            self.runID = generate_runId()
            #print("RunID = ", self.runID)

            #Call Oracle fn to insert status 'P' into TB_DDY_DATASET_TRANS with RUNID etc
            #DB_CALL
            # Make database call sp_ddy_insert_dataset_trans and insert data that process started

            #print "self.m_configDict = ", self.m_configDict
            #print "self.m_configDict[put_dataset] = ", self.m_configDict["SQL"]["put_dataset"]
            mySql = ""
            myParams = ""
            mySql = self.m_configDict["SQL"]["put_dataset"]
            #print "mySql = ", mySql
            
            pStatus = 'P'
            myParams = {"datasetName":self.datasetName, "runID": self.runID, "tDate":self.tradeDate, "status": pStatus, "tidalRunID":self.tidalRunID}
            #myParams = {"datasetName":self.datasetName, "runID": self.runID, "tDate":self.tradeDate, "status": pStatus}
            #print "myParams = ", myParams

            #returnVal, returnCode = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql, myParams)
            returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql, myParams)

            if returnStr[0] != 0:
                self.m_logger.error("Unable to put dataset info into the database using sql " + mySql + ". Error = " + returnStr[1])
                sys.exit(1)


            # Insert Manifest data in db and call multiprocessing s3 loader process.  Shd we add RUN_ID to manifest table

            #For each datafile, generate fileID and call loadData fn using multiprocess to load data into AWS
            for dataRecord in manifestFileList:
                mySql = ""
                myParams = ""
                mySql = self.m_configDict["SQL"]["put_manifest"]
                myParams = {"datasetName":self.datasetName, "runID": self.runID, "tDate":self.tradeDate, "dataFileName":dataRecord[1], "manifestFileName":manifestFile , "noOfRecords": dataRecord[2], "fileSize":dataRecord[3], "tidalRunID":self.tidalRunID}
                #myParams = {"datasetName":self.datasetName, "runID": self.runID, "tDate":self.tradeDate, "dataFileName":dataRecord[1], "manifestFileName":manifestFile , "noOfRecords": dataRecord[2], "fileSize":dataRecord[3]}
     
                returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql, myParams)

                if returnStr[0] != 0:
                    self.m_logger.error("Unable to put manifest info into the database using sql " + mySql + ". Error = " + returnStr[1])
                    sys.exit(1)

            self.s3object = S3(self.mktConfigFile, self.m_logger)
            self.s3object.getToken()

            fileID=1
            fileIDQueue = Queue()
            dbFlag=1
            procs = []
            for dataRecord in manifestFileList:
                processHandle = Process(target=Loader.loadData, args=(self, dataRecord[1],fileID, fileIDQueue, dbFlag))
                processHandle.start()
                procs.append(processHandle)
                fileID += 1
                #time.sleep(5)

            for p in procs:
                p.join()

            #Without sleep the queue is unreliable and do not return the expected values
            #time.sleep(2)

            failureFlag=0
            while not fileIDQueue.empty():
                #print("inside while")
                qFileID, qResult = fileIDQueue.get()
                #print("qFileID = ", qFileID, "qResult = ", qResult)
                if qResult:
                    failureFlag=1

            #print "Failure Flag = ", failureFlag

            if failureFlag:
                pStatus = 'F'
            else:
                pStatus = 'S'
                """
                #Generate FINRA Manifest file and Push it to AWS 
                """

                # Call Divakar's generate done file function
                returnValue = self.createFinraManifestFile(self.manifestFile)
    
                if returnValue:
                    self.m_logger.error("Unable to generate done file. Please fix the issue the re-run the load")
                    #sys.exit(1)

                dbFlag=0
                fileID=0
                # Call the loader function with the manifest file
                finraManifestLoadStatus=0
                finraManifestLoadStatus=self.loadData(self.finraManifestFile ,fileID, fileIDQueue, dbFlag)

                if finraManifestLoadStatus:
                    pStatus = 'F'
                    self.m_logger.error("Unable to load finra manifest file ")
                    # Do we need to exit here or insert a failure
                    #sys.exit(1)
                
            #Call Oracle fn to insert status 'S' into TB_DDY_DATASET_TRANS with RUNID etc
            #DB_CALL
            # Make database call sp_ddy_insert_dataset_trans and insert data based on Failure or Success

            #print "self.m_configDict = ", self.m_configDict
            mySql = ""
            myParams = ""
            mySql = self.m_configDict["SQL"]["put_dataset"]
            #print "mySql = ", mySql
            
            myParams = {"datasetName":self.datasetName, "runID": self.runID, "tDate":self.tradeDate, "status": pStatus, "tidalRunID":self.tidalRunID}
            #myParams = {"datasetName":self.datasetName, "runID": self.runID, "tDate":self.tradeDate, "status": pStatus}
            #print "myParams = ", myParams

            #returnVal, returnCode = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql, myParams)
            returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql, myParams)

            if returnStr[0] != 0:
                self.m_logger.error("Unable to put dataset info into the database using sql " + mySql + ". Error = " + returnStr[1])
                sys.exit(1)

        except Exception as e:
            self.m_logger.error("ProcessLoader failed with error " + str(e))
            sys.exit(1)
Exemplo n.º 14
0
Arquivo: dea.py Projeto: tnraman/ddy
class DEAExtractor():
    #class variables
    m_logger = ""

    #database objects
    m_oracle_db = ""

    #def __init__(self, configFile, mktName, processingDate, debugFlag, forceFlag):
    def __init__(self, configFile, mktName, processingDate, debugFlag):
        """
        Purpose: Constructor
        :param self:        class object itself
        :param configFile:  Configuration file to use
        """

        # Initialize m_logger object from class Logger and add Header to the log, using addGenericInfo function
        self.m_logger = Logger(logging.INFO, configFile, processingDate)
        self.m_logger.addFileHandler(logging.DEBUG)
        self.m_logger.addGenericInfo(__file__)
        
        self.processingDate = processingDate
        self.debugFlag = debugFlag
        #self.forceFlag = forceFlag
        self.configFile = configFile
        self.mktName = mktName

        try:
            # Get configuration to a dictionary
            self.m_configDict = configuration(self.configFile, True).m_dictionary
            
            #Initialize Oracle instance along with connection
            self.m_oracle_db = Oracle(self.m_configDict, self.m_logger)

        except Exception as exp:
                   # An exception occurred
            self.m_logger.error("Unable to initialize the configuration " + str(exp))
            print("ERROR: Unable to initialize the configuration for logger " + str(exp))
            sys.exit(1)

    def readMktConfigFile(self, mktConfigFile):
        """
        Purpose - To read the content of mktConfigFile into the global dictionary m_mktConfigDict for reference
        :param mktConfigFile:
        :return:
        """
        try:
            self.m_mktConfigDict = configuration(mktConfigFile, True).m_dictionary
        except Exception as exp:
                   # An exception occurred
            self.m_logger.error("Unable to initialize the configuration for logger " + str(exp))
            print("ERROR: Unable to initialize the configuration for logger " + str(exp))
            sys.exit(1)

    def chkActiveLoads(self):
        """
        Purpose - To check the count of active Active loads happening at a given point
        :param None: None at this point
        :return:
        """
        try:
            if self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_check_flag"] == 'Y': 
                localActiveLoadMax = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max"]
                localActiveLoadWaitTime = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_wait_time"]
                localActiveLoadMaxWaitTime = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max_wait_time"]

                mySql = ""
                myParams = ""
                mySql = self.m_configDict["SQL"]["get_active_loads"]
    
                activeFlag=1
                totalActiveWaitTime=0
                while activeFlag:
                    returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
                    if self.debugFlag:
                        print "mySql = ", mySql
                        print "returnStr = ", returnStr
                        print "chkActiveLoads - Active Loads value = ", int(returnStr[1].strip())
                    if returnStr[0] != '0':
                        self.m_logger.error("Unable to get active loads using sql " + mySql + ". Error = " + returnStr[1])
                        sys.exit(1)

                    #Check if actual active loads is <= configured active loads.  If so, return out of the fn
                    if int(returnStr[1].strip()) <= localActiveLoadMax:
                        activeFlag=0
                        return 0

                    #Sleep for time defined by configured value for "active_load_wait_time"
                    time.sleep(localActiveLoadWaitTime)
                    totalActiveWaitTime += localActiveLoadWaitTime

                    #Check if actual Total wait time is > configured total wait time.  If so, throw an error and exit
                    if totalActiveWaitTime > localActiveMaxWaitTime:
                        self.m_logger.error("In Fn chkActiveLoads.  Total Actual Wait Time exceeds the configured value active_load_max_wait_time.  Either cleanup orphaned loads or increase the either active_load_max or active_load_max_wait_time. totalActiveWaitTime = " + str(totalActiveWaitTime) + " localActiveMaxWaitTime=" + str(localActiveMaxWaitTime))
                        return 1
            else:
                return 0

            #Return failure
            return 1
                    
        except Exception as exp:
            self.m_logger.error("Failure in chkActiveLoads process for file with the error " + str(exp))
            sys.exit(1)

    def chkRaceStatus(self):
        """
        Purpose - To check if a load is already running for the given dataset
        :param None: None at this point
        :return:
        """
        try:
            if self.m_mktConfigDict["RACE"]["race_status_check_flag"] == 'Y':
                localRaceStatusWaitTime = int(self.m_mktConfigDict["RACE"]["race_status_wait_time"])
                localRaceStatusMaxWaitTime = int(self.m_mktConfigDict["RACE"]["race_status_max_wait_time"])

                mySql = ""

                #myParams = {"datasetName":self.datasetName}
                tempSql = self.m_configDict["SQL"]["get_race_status"]
                myParamsDict = { 'datasetName' : self.datasetName }
                tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
                mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
    
                raceFlag=1
                totalRaceStatusWaitTime=0
                while raceFlag:
                    returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
                    if self.debugFlag:
                        print "tempSql = ", tempSql
                        print "myParamsDict = ", myParamsDict
                        print "mySql = ", mySql
                        print "returnStr = ", returnStr
                        print "chkRaceStatus - ReturnCode = ", int(returnStr[1].strip())
                    if returnStr[0] != '0':
                        self.m_logger.error("Unable to get race status using sql " + mySql + ". Error = " + returnStr[1])
                        sys.exit(1)

                    #Check if the load for this dataset is already running. If not, exit out of the function with normal return value
                    if int(returnStr[1].strip()) <= 1:
                        raceFlag=0
                        return 0

                    #Check if actual Total wait time is > configured total wait time.  If so, throw an error and exit


                    #Sleep for time defined by configured value for "active_load_wait_time"
                    time.sleep(localRaceStatusWaitTime)
                    #time.sleep(90)
                    totalRaceStatusWaitTime += localRaceStatusWaitTime
                    if self.debugFlag:
                        print "totalRaceStatusWaitTime = ", totalRaceStatusWaitTime, "localRaceStatusWaitTime =", localRaceStatusWaitTime
                    if totalRaceStatusWaitTime > localRaceStatusMaxWaitTime:
                        self.m_logger.error("In Fn chkRaceStatusLoads.  Total Actual Wait Time exceeds the configured value race_status_max_wait_time.  Either check if the Dataset is getting loaded or increase the either active_load_max or active_load_max_wait_time. totalRaceStatusWaitTime = " + str(totalRaceStatusWaitTime) + " localRaceStatusMaxWaitTime=" + str(localRaceStatusMaxWaitTime))
                        return 1
            else:
                return 0

            #Return failure
            return 1
                    
        except Exception as exp:
            self.m_logger.error("Failure in chkRaceStatus process for file with the error " + str(exp))
            sys.exit(1)

    def extractData(self,localDataFile, localFileID, localFileIDQueue, localDBFlag):
        """
        Purpose - To extract the given datafile from the S3 bucket specified in the global mktConfigFile
        :param localDataFile: Data Filename
        :param localFileID: Internal File ID assigned to the local datafile
        :param localFileIDQueue: Queue in which, results of the operation is stored
        :param localDBFlag: Flag indicating if database should be used or not
        :return:
        """
        try:
            if self.debugFlag:
                print "Inside extractData function"
                print "localDataFile = ", localDataFile

            if localDBFlag:
                """ Not sure if we need Race Status check for Extract
                raceStatusReturnValue=self.chkRaceStatus()
                if self.debugFlag:
                    print "raceStatusReturnValue=", raceStatusReturnValue

                if raceStatusReturnValue:
                    self.m_logger.error("Failure value returned by chkRaceStatus fn.  Return value = " + str(raceStatusReturnValue))
                    localFileIDQueue.put((localFileID, raceStatusReturnValue))
                    return 1
                """

                """  Need to integrate Active loads with tb_dxt_process_status and tb_dxt_process_status ?
                activeLoadsReturnValue=self.chkActiveLoads()
                if activeLoadsReturnValue:
                    self.m_logger.error("Failure value returned by chkRaceStatus fn.  Return value = " + str(activeLoadsReturnValue))
                    localFileIDQueue.put((localFileID, raceStatusReturnValue))
                    return 1
                """

                processID = os.getpid()
                hostName = socket.gethostname()

                #Insert Process status into Oracle db
                #DB_CALL - sp_dxt_insert_process_status(RUNID, FILE_ID, etc)
                mySql = ""
                myParams = ""
                tempSql = self.m_configDict["SQL"]["put_process_status"]
                pStatus = 'P'
                pComment = 'Load started'
                # Keep the below vars 0 for now
                localDataFileSize=0
                localDataFileRecordCount=0

                myParamsDict = {"datasetName":self.datasetName, "runID":str(self.runID), "fileName":localDataFile, "tDate":str(self.processingDate), "processID":str(processID), "hostName":hostName, "fileSize": str(localDataFileSize), "recordCount" : str(localDataFileRecordCount), "status":pStatus , "lcomment":pComment}
                tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
                mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)

                returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)

                if self.debugFlag:
                    print "tempSql = ", tempSql
                    print "myParamsDict = ", myParamsDict
                    print "mySql = ", mySql
                    print "returnStr = ", returnStr
    
                if returnStr[0] != '0':
                    self.m_logger.error("Unable to put process status info into the database using sql " + mySql + ". Error = " + returnStr[1])
                    return 1

            #Here localFileWthPath is the local stage dir with file name
            localFileWthPath = self.m_configDict["ENV"]["stage_dir"] + "/" + localDataFile

            targetFolder = self.s3object.m_configFile["S3"]["path"]
            targetFileDir = targetFolder + self.processingDate + "/" 
            
            #Here targetFileWthPath is the AWS dir with file name
            targetFileWthPath = os.path.join(targetFileDir, os.path.basename(localDataFile))
            targetBucket = self.s3object.m_configFile["S3"]["bucket"]
            encryptKeyFlag = self.s3object.m_configFile["S3"]["encrypt_key"]

            localAWSRetries = int(self.m_mktConfigDict["ENV"]["aws_retries"])
            if self.debugFlag:
                print("localFileWthPath =", localFileWthPath)
                print("targetFileWthPath =", targetFileWthPath)
                print("targetBucket =", targetBucket)
                print("encryptKeyFlag =", encryptKeyFlag)
                print("localAWSRetries =", localAWSRetries)
            initCount = 0
            while (initCount < localAWSRetries):
                extractReturnValue = 0

                #Call s3.data download to extract the manifest file (single part load)
                #extractReturnValue = self.s3object.getDataSinglePart(localFileWthPath, targetFileWthPath, targetBucket, encryptKeyFlag )
                extractReturnValue = self.s3object.getDataSinglePart(localFileWthPath, targetFileWthPath, targetBucket)

                if self.debugFlag:
                    print "extractReturnValue = ", extractReturnValue

                if int(extractReturnValue) == 0:
                    pStatus = 'S'
                    pComment = 'Extract completed'
                    break
                else:
                    pStatus = 'F'
                    pComment = 'Extract failed'
                    initCount += 1

            # Get the size of the file downloaded 
            localFileSize = os.stat(localFileWthPath).st_size

            # Check if the downloaded file size is matching with what is mentioned in manifest file.  If not mark it as failed
            # Following check is commented as we don't have any manifest file to cross check size
#            if localFileSize != localDataFileSize:
#                pStatus = 'F'
#                pComment = 'Actual file size != Manifest file size'

            localRecordCount = 0
 
            if localDBFlag:
                #Call DB to insert 'S' or 'F' in tb_dxt_process_status
                #localFileIDQueue.put((localFileID, extractReturnValue))
                mySql = ""
                myParams = ""
                tempSql = self.m_configDict["SQL"]["put_process_status"]
                #myParamsDict = {"datasetName":self.datasetName, "runID":str(self.runID), "fileID":str(localFileID), "fileName":localDataFile, "tDate":str(self.processingDate), "processID":str(processID), "hostName":hostName, "fileSize": localFileSize, "recordCount" : localRecordCount, "status":pStatus , "lcomment":pComment}
                myParamsDict = {"datasetName":self.datasetName, "runID":str(self.runID), "fileName":localDataFile, "tDate":str(self.processingDate), "processID":str(processID), "hostName":hostName, "fileSize": str(localFileSize), "recordCount" : str(localRecordCount), "status":pStatus , "lcomment":pComment}
                tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
                mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
                returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
                if self.debugFlag:
                    print "tempSql = ", tempSql
                    print "myParamsDict = ", myParamsDict
                    print "mySql = ", mySql
                    print "returnStr = ", returnStr

                if returnStr[0] != '0':
                    self.m_logger.error("Unable to put process status info into the database using sql " + mySql + ". Error = " + returnStr[1])
                    return 1

            localFileIDQueue.put((localFileID,extractReturnValue))
            return extractReturnValue
      
        except Exception as exp:
            self.m_logger.error("Failure in extractData process for file with the error " + str(exp))
            if localDBFlag:
                localFileIDQueue.put((localFileID, 1))
            else:
                return 1

    def getRecords(self, fileDict, startDateTime, endDateTime):
        """
        Purpose - Function to sort the dictionary based on the key and return a sorted list
        :param fileDict : Dictionary containing Last_modified Date and file name
        :param startDateTime : Start DateTime in the format '2016-02-01 00:00:00'
        :param endDateTime : End DateTime in the format '2016-02-10 00:00:00'
        """
        try:
            # No pattern to search for
            #patternToSearch =  self.m_configDict["ENV"]["pattern_to_search"]
            
            if self.debugFlag:
                print "fileDict = ", fileDict
                #print "patternToSearch = ", patternToSearch
                print "startDateTime = ", startDateTime
                print "endDateTime = ", endDateTime
            sorted_values = sorted(fileDict.values())
            start = bisect.bisect_left(sorted_values, startDateTime)
            end = bisect.bisect_right(sorted_values, endDateTime)
            if self.debugFlag:
                print "sorted_values = ", sorted_values
                print "start = ", start
                print "end = ", end
            for fileItem in sorted(fileDict.iteritems())[start:end]:
                # No pattern to search for in DEA
                #if patternToSearch in fileItem[0]:
                    #yield fileItem[0]
                yield fileItem[0]
                if self.debugFlag:
                    print "fileItem[0] = ", fileItem[0]

        except Exception as exp:
            self.m_logger.error("Failed while executing getRecords to sort the dictionary content of dictionary  with Error = " + str(exp))
            sys.exit(1)

    def readManifestFile(self, manifestFileName):
        """
        Purpose - To read the content of Finra's manifest file stored in key-value pair into Nested dictionary 
        :param manifestFileName : Finra's manifestFileName containing data filenames, file size & no of rows
        """
        try:
            manifestRecordStartPattern = self.m_configDict["dxt"]["MANIFEST_RECORD_START_PATTERN"]

            if self.debugFlag:
                print "manifestRecordStartPattern =", manifestRecordStartPattern

            with open(manifestFileName) as infile:
                manifestFileDict = {}
                file = None
                line_count = 0
                for line in infile:
                    line = line.strip()
                    if line.startswith(manifestRecordStartPattern):
                        line_count += 1
                        file = line_count
                        manifestFileDict[file] = {}
                    var, val = line.split(':',1)
                    manifestFileDict[file][var.strip()] = val.strip()

            if self.debugFlag:
                print "manifestFileDict = ", manifestFileDict

            return manifestFileDict

        except Exception as exp:
            self.m_logger.error("Failed while executing readManifestFile to get FINRA manifest file into nested dictionary, Error = " + str(exp))
            sys.exit(1)

    def getFileList(self, startDateTime, endDateTime, s3Bucket, s3Path, folderPosition):
        """
        Purpose - Function to sort the dictionary based on the key and return a sorted list
        :param startDateTime : Start DateTime in the format '2016-02-01 00:00:00'
        :param endDateTime : End DateTime in the format '2016-02-10 00:00:00'
        """
        try:
            if self.debugFlag:
                print "s3Bucket = ", s3Bucket
                print "s3Path = ", s3Path
                print "startDateTime = ", startDateTime
                print "endDateTime = ", endDateTime
                print "folderPosition = ", folderPosition
                
            fileListDict = self.s3object.listBucketWPathByLastModified(s3Bucket, s3Path, folderPosition)
            if self.debugFlag:
                print "fileListDict = ", fileListDict
           
            #endDateTime = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            fileList = list(self.getRecords(fileListDict, startDateTime, endDateTime))

            if self.debugFlag:
                print "fileListDict = ", fileListDict
                print "fileList = ", fileList

            return fileList
        except Exception as exp:
            self.m_logger.error("Failed while creating AWS manifest file list with Error = " + str(exp))
            return 1

    def processDEAExtractor(self):
        """
        Purpose - Function responsible for getting the AWS token and reading the last modified date in DB and fetch the list of files from AWS to be processed
        :param : None
        :return:
        """
        try:
            # DB_CALL
            # Make database call sp_dxt_validate_mktName(mktName) to validate mktName

#            tempSql = self.m_configDict["SQL"]["validate_market_name"]
#            myParamsDict = { 'mktName' : self.mktName.upper() }
#            tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
#            mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
#            returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
#
#            if self.debugFlag:
#                print "tempSql = ", tempSql
#                print "myParamsDict = ", myParamsDict
#                print "mySql = ", mySql
#                print "returnStr = ", returnStr
#
#            if returnStr[0] != '0':
#                self.m_logger.error("Invalid market name provided " + mySql + ". Error = " + self.mktName)
#                sys.exit(1)

            if self.debugFlag:
                print "MktName from DB = ", self.mktName
          
            #Build the string for mktConfigFile based on mktName and configFile info
            self.mktConfigFile = os.path.dirname(self.configFile) + '/' + os.path.basename(self.configFile).split('.',1)[0].strip() + '_' + self.mktName.lower() + '.' + os.path.basename(self.configFile).split('.',1)[1].strip()

            if self.debugFlag:
                print("mktConfigFile = ", self.mktConfigFile)

            #Validate Market Config file is a valid file
            if not os.path.isfile(self.mktConfigFile):
                self.m_logger.error("Invalid market manifest file " + self.mktConfigFile)
                sys.exit(1)

            # Read Market specific config file and store it in a specific dictionary
            self.readMktConfigFile(self.mktConfigFile)

            if self.debugFlag:
                print("m_mktConfigDict=",self.m_mktConfigDict)

            # Get RunID
            self.runID = generate_runId()
            if self.debugFlag:
                print("RunID = ", self.runID)

            # Initialize S3 object and get FINRA cloud service token and establish s3 session
            self.s3object = S3(self.mktConfigFile, self.m_logger, self.debugFlag)

            tokenRetryTimes = int(self.m_configDict["TOKEN"]["token_retry_times"])
            tokenRetryWaitTime = int(self.m_configDict["TOKEN"]["token_retry_wait_time"])

            deaFileWaitTime = int(self.m_configDict["dea"]["DEA_FILE_WAIT_TIME"])
            deaFileSleepTime = int(self.m_configDict["dea"]["DEA_FILE_SLEEP_TIME"])
            s3TimeoutTime = int(self.m_configDict["dea"]["S3_TIMEOUT_TIME"])

            #Not sure what to do.  Keep this for a place holder in the future, when FINRA manifest for zero byte files everyday
            handleNoDatafileFlag = self.m_configDict["dea"]["HANDLE_NO_DATAFILE_FLAG"]

            deaActualTime = 0

            # Download manifest files in the manifest file list to a specific folder from AWS
            localFileDir = self.s3object.m_configFile["ENV"]["stage_dir"]
            targetFolder = self.s3object.m_configFile["S3"]["path"]
            targetBucket = self.s3object.m_configFile["S3"]["bucket"]
            encryptKeyFlag = self.s3object.m_configFile["S3"]["encrypt_key"]
            folderPosition =  int(self.s3object.m_configFile["S3"]["folder_position"])
            targetFileDir = targetFolder + self.processingDate + "/" 

            if self.debugFlag:
                print "localFileDir = ", localFileDir
                print "targetFolder = ", targetFolder
                print "targetBucket = ", targetBucket
                print "encryptKeyFlag = ", encryptKeyFlag
                print "self.processingDate = ", self.processingDate
                print "targetFileDir = ", targetFileDir

            startDate = ((datetime.now() - relativedelta(years=1)) + timedelta(days=1)).strftime("%Y-%m-%d %H:%M:%S")
            endDate = (datetime.now() + timedelta(days=1)).strftime("%Y-%m-%d %H:%M:%S")

            getTokenFlag = 0

            fileExistFlag = 0

            while deaActualTime < deaFileWaitTime:
             
                # Get token only the first time or when the time exceed s3TimeoutTime
                if deaActualTime > s3TimeoutTime or not getTokenFlag:
                    getTokenFlag=1
                    initCount = 0
                    while (initCount < tokenRetryTimes):
                        tokenReturnCode = self.s3object.getToken()
                        if tokenReturnCode:
                            if initCount == tokenRetryTimes:
                                self.m_logger.error("Error: Exceeded the max retries " + tokenRetryTimes + " to get AWS Token from FINRA.  Please re-try after some time or escalate.. ")
                                sys.exit(1)
                            initCount += 1
                            time.sleep(tokenRetryWaitTime)
                        else:
                            break

                self.currentEpochTime = int(time.time())

                # Get the list of files from the AWS folder for the given processing date
                fileList = self.getFileList(startDate, endDate, targetBucket, targetFileDir, folderPosition) 

                if len(fileList):
                    if self.debugFlag:
                        print("fileList = ", fileList)
                    fileExistFlag=1
                    break

                time.sleep(deaFileSleepTime)
                deaActualTime += deaFileSleepTime

                if self.debugFlag:
                    print "deaActualTime =", deaActualTime
                    print "deaFileSleepTime =", deaFileSleepTime
                    print "deaFileWaitTime =", deaFileWaitTime
                self.m_logger.info("INFO : Waiting for file in FINRA's cloud, " + str(deaFileWaitTime - deaActualTime) + " secs remaining...")
                    
            # End of while

            tblName = self.m_mktConfigDict["dea"]["TARGET_TBL_NAME"] + "_" + self.mktName.upper()
            # If no files exists for the given day, create a zero byte data file and a manifest file for the same
            if not fileExistFlag:
                #deaDummyDataFile = self.m_configDict["dea"]["DEA_DUMMY_DATA_FILE"].replace("PDATE", self.processingDate)
                deaDummyDataFile = self.m_configDict["dea"]["DEA_DUMMY_DATA_FILE"].replace("PDATE", str(self.processingDate))
                deaDummyDataFileWthPath = self.m_configDict["ENV"]["stage_dir"] + "/" + deaDummyDataFile
                open(deaDummyDataFileWthPath,'a').close()
                fatlManifestFile = self.m_configDict["ENV"]["stage_dir"] + "/" + tblName + "." + self.processingDate + ".manifest"

                if self.debugFlag:
                    print "fileExistFlag = ", fileExistFlag
                with open(fatlManifestFile,"w") as fh:
                    fileSize = 0
                    if self.debugFlag:
                        print "deaDummyDataFileWthPath = ", deaDummyDataFileWthPath
                        print "fileSize = ", fileSize
                        print "tblName = ", tblName, "file = ", deaDummyDataFile, "fileSize = ", fileSize, "mktName = ", self.mktName
                    fh.write(tblName + "|" + deaDummyDataFile + "|" + str(fileSize) + "|" + "0" + "\n")
                self.m_logger.info("INFO : No File found for processing date " + self.processingDate + ". Creating zero byte data file " + deaDummyDataFileWthPath + " and manifest file " + fatlManifestFile)
                sys.exit(0)
                
            fileIDQueue = Queue()
            localAWSRetries = int(self.m_mktConfigDict["ENV"]["aws_retries"])

            # Insert a record into tb_dxt_dataset_trans with status 'P' for the given datasetName, saying that we start the process for this manifest file

            pStatus = 'P'
            # We decided to use tblName instead of dataset for DEA, as we don't have dataset concept or manifest files
            self.datasetName = tblName
            tempSql = self.m_configDict["SQL"]["put_dataset"]
            myParamsDict = {'datasetName':self.datasetName, 'runID': str(self.runID), 'tDate':str(self.processingDate), 'status': pStatus }
            tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
            mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
            returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)

            if self.debugFlag:
                print "tempSql = ", tempSql
                print "myParamsDict = ", myParamsDict
                print "mySql = ", mySql
                print "returnStr = ", returnStr

            if returnStr[0] != '0':
                self.m_logger.error("Unable to insert into tb_dxt_dataset_trans table " + mySql + ". Error = " + self.datasetName + " for processing date = " + self.processingDate)
                sys.exit(1)

            fileID = 1
            dbFlag = 1
            fileIDQueue = Queue()
            procs = []
            doneCounter = 0
            sendCounter = 0
            failureFlag = 0

            process_count = int(self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max"])

            while doneCounter < len(fileList):
                while sendCounter < len(fileList) and sendCounter - doneCounter < process_count:
                    #print "Inside while loop"
                    #print "fileList = ", fileList
                    # Call fn extractData to fetch files from AWS.  Pass manifestFileDict[sendCounter] as it contains the whole record including the filename, filesize & row count
                    processHandle = Process(target=DEAExtractor.extractData, args=(self, fileList[sendCounter],fileID, fileIDQueue, dbFlag))
                    processFlag=1
                    if ((int(time.time()) - self.currentEpochTime) > s3TimeoutTime):
                        self.currentEpochTime = int(time.time())
                        self.m_logger.info("Getting New Token for Batch : {0}, Max batches : {1}".format(batch_count,max_batches))
                        if self.debugFlag:
                            print 'Memory usage: %s (kb)' % resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
                            print "self.currentEpochTime = ", self.currentEpochTime
                            print "Current Time in Epoch = ", int(time.time())
                        if self.debugFlag:
                            print "Inside get new token - self.currentEpochTime = ", self.currentEpochTime
                        initCount = 0
                        while (initCount < tokenRetryTimes):
                            tokenReturnCode = 0
                            tokenReturnCode = self.s3object.getToken()
                            if tokenReturnCode:
                                if initCount == tokenRetryTimes:
                                    self.m_logger.error("Error: Exceed the max retries " + tokenRetryTimes + " to get AWS Token from FINRA.  Please re-try after some time or escalate.. ")
                                    sys.exit(1)
                                initCount += 1
                                time.sleep(tokenRetryWaitTime)
                            else:
                                break

                    threadDelayTime = int(self.m_configDict["dea"]["THREAD_DELAY_TIME"])
                    time.sleep(threadDelayTime)

                    processHandle.start()
                    procs.append(processHandle)
                    sendCounter += 1
                    fileID += 1
                if processFlag:
                    for p in procs:
                        p.join()
                        procs=[]
                    processFlag=0
                while not fileIDQueue.empty():  # process completed results as they arrive
                    #time.sleep(3)
                    qFileID, qResult = fileIDQueue.get()
                    if self.debugFlag:
                        print("qFileID = ", qFileID, "qResult = ", qResult)
                    doneCounter += 1
                    if qResult:
                        failureFlag = 1
                if self.debugFlag:
                    print "ProcessFlag = ", processFlag, "sendCounter = ", sendCounter, "doneCounter = ", doneCounter
                if failureFlag:
                    break
                            
            if self.debugFlag:
                print "Failure Flag = ", failureFlag
    
            if failureFlag:
                pStatus = 'F'
            else:
                pStatus = 'S'
                fatlManifestFile = self.m_configDict["ENV"]["stage_dir"] + "/" + tblName + "." + self.processingDate + ".manifest"
                if self.debugFlag:
                    print "File List = ", fileList
                    print "fatlManifestFile =", fatlManifestFile
                with open(fatlManifestFile,"w") as fh:
                    counter = 0
                    for file in fileList:
                        sourceFileWthPath = self.m_configDict["ENV"]["stage_dir"] + "/" + file
                        fileSize = os.stat(sourceFileWthPath).st_size
                        if self.debugFlag:
                            print "sourceFileWthPath = ", sourceFileWthPath
                            print "fileSize = ", fileSize
                            print "tblName = ", tblName, "file = ", file, "fileSize = ", fileSize, "mktName = ", self.mktName
                        fh.write(tblName + "|" + file + "|" + str(fileSize) + "|" + "0" + "\n")
                        counter += 1

            # insert a record into tb_dxt_dataset_trans table with 'S' or 'F' record
    
            #Call Oracle fn to insert status 'S' into TB_DDY_DATASET_TRANS with RUNID etc
            #DB_CALL
            # Make database call sp_dxt_insert_dataset_trans and insert data based on Failure or Success
    
            mySql = ""
            myParams = ""
            tempSql = self.m_configDict["SQL"]["put_dataset"]
            
            myParamsDict = {"datasetName":self.datasetName, "runID": str(self.runID), "tDate":str(self.processingDate), "status": pStatus }
            tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
            mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
        
            returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
        
            if self.debugFlag:
                print "tempSql = ", tempSql
                print "myParamsDict = ", myParamsDict
                print "mySql = ", mySql
                print "returnStr = ", returnStr
       
            if returnStr[0] != '0':
                self.m_logger.error("Unable to insert into tb_dxt_dataset_trans table " + mySql + ". Error = " + self.datasetName + " for processing date = " + self.processingDate)
                sys.exit(1)
    
        except Exception as e:
            self.m_logger.error("processDEAExtractor failed with error " + str(e))
            sys.exit(1)
Exemplo n.º 15
0
class Extractor():
    #class variables
    m_logger = ""

    #database objects
    m_oracle_db = ""

    def __init__(self, configFile, mktName, tradeDate, debugFlag):
        """
        Purpose: Constructor
        :param self:        class object itself
        :param configFile:  Configuration file to use
        """

        # Initialize m_logger object from class Logger and add Header to the log, using addGenericInfo function
        self.m_logger = Logger(logging.INFO, configFile, tradeDate)
        self.m_logger.addFileHandler(logging.DEBUG)
        self.m_logger.addGenericInfo(__file__)
        
        self.tradeDate = tradeDate
        self.debugFlag = debugFlag
        self.configFile = configFile
        self.mktName = mktName

        try:
            # Get configuration to a dictionary
            self.m_configDict = configuration(self.configFile, True).m_dictionary
            
            #Initialize Oracle instance along with connection
            self.m_oracle_db = Oracle(self.m_configDict, self.m_logger)

        except Exception as exp:
                   # An exception occurred
            self.m_logger.error("Unable to initialize the configuration " + str(exp))
            print("ERROR: Unable to initialize the configuration for logger " + str(exp))
            sys.exit(1)

    def readMktConfigFile(self, mktConfigFile):
        """
        Purpose - To read the content of mktConfigFile into the global dictionary m_mktConfigDict for reference
        :param mktConfigFile:
        :return:
        """
        try:
            self.m_mktConfigDict = configuration(mktConfigFile, True).m_dictionary
        except Exception as exp:
                   # An exception occurred
            self.m_logger.error("Unable to initialize the configuration for logger " + str(exp))
            print("ERROR: Unable to initialize the configuration for logger " + str(exp))
            sys.exit(1)

    def chkActiveLoads(self):
        """
        Purpose - To check the count of active Active loads happening at a given point
        :param None: None at this point
        :return:
        """
        try:
            if self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_check_flag"] == 'Y': 
                localActiveLoadMax = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max"]
                localActiveLoadWaitTime = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_wait_time"]
                localActiveLoadMaxWaitTime = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max_wait_time"]

                mySql = ""
                myParams = ""
                mySql = self.m_configDict["SQL"]["get_active_loads"]
    
                activeFlag=1
                totalActiveWaitTime=0
                while activeFlag:
                    returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
                    if self.debugFlag:
                        print "mySql = ", mySql
                        print "returnStr = ", returnStr
                        print "chkActiveLoads - Active Loads value = ", int(returnStr[1].strip())
                    if returnStr[0] != '0':
                        self.m_logger.error("Unable to get active loads using sql " + mySql + ". Error = " + returnStr[1])
                        sys.exit(1)

                    #Check if actual active loads is <= configured active loads.  If so, return out of the fn
                    if int(returnStr[1].strip()) <= localActiveLoadMax:
                        activeFlag=0
                        return 0

                    #Sleep for time defined by configured value for "active_load_wait_time"
                    time.sleep(localActiveLoadWaitTime)
                    totalActiveWaitTime += localActiveLoadWaitTime

                    #Check if actual Total wait time is > configured total wait time.  If so, throw an error and exit
                    if totalActiveWaitTime > localActiveMaxWaitTime:
                        self.m_logger.error("In Fn chkActiveLoads.  Total Actual Wait Time exceeds the configured value active_load_max_wait_time.  Either cleanup orphaned loads or increase the either active_load_max or active_load_max_wait_time. totalActiveWaitTime = " + str(totalActiveWaitTime) + " localActiveMaxWaitTime=" + str(localActiveMaxWaitTime))
                        return 1
            else:
                return 0

            #Return failure
            return 1
                    
        except Exception as exp:
            self.m_logger.error("Failure in chkActiveLoads process for file with the error " + str(exp))
            sys.exit(1)

    def chkRaceStatus(self):
        """
        Purpose - To check if a load is already running for the given dataset
        :param None: None at this point
        :return:
        """
        try:
            if self.m_mktConfigDict["RACE"]["race_status_check_flag"] == 'Y':
                localRaceStatusWaitTime = int(self.m_mktConfigDict["RACE"]["race_status_wait_time"])
                localRaceStatusMaxWaitTime = int(self.m_mktConfigDict["RACE"]["race_status_max_wait_time"])

                mySql = ""

                #myParams = {"datasetName":self.datasetName}
                tempSql = self.m_configDict["SQL"]["get_race_status"]
                myParamsDict = { 'datasetName' : self.datasetName }
                tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
                mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
    
                raceFlag=1
                totalRaceStatusWaitTime=0
                while raceFlag:
                    returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
                    if self.debugFlag:
                        print "tempSql = ", tempSql
                        print "myParamsDict = ", myParamsDict
                        print "mySql = ", mySql
                        print "returnStr = ", returnStr
                        print "chkRaceStatus - ReturnCode = ", int(returnStr[1].strip())
                    if returnStr[0] != '0':
                        self.m_logger.error("Unable to get race status using sql " + mySql + ". Error = " + returnStr[1])
                        sys.exit(1)

                    #Check if the load for this dataset is already running. If not, exit out of the function with normal return value
                    if int(returnStr[1].strip()) <= 1:
                        raceFlag=0
                        return 0

                    #Check if actual Total wait time is > configured total wait time.  If so, throw an error and exit


                    #Sleep for time defined by configured value for "active_load_wait_time"
                    time.sleep(localRaceStatusWaitTime)
                    #time.sleep(90)
                    totalRaceStatusWaitTime += localRaceStatusWaitTime
                    if self.debugFlag:
                        print "totalRaceStatusWaitTime = ", totalRaceStatusWaitTime, "localRaceStatusWaitTime =", localRaceStatusWaitTime
                    if totalRaceStatusWaitTime > localRaceStatusMaxWaitTime:
                        self.m_logger.error("In Fn chkRaceStatusLoads.  Total Actual Wait Time exceeds the configured value race_status_max_wait_time.  Either check if the Dataset is getting loaded or increase the either active_load_max or active_load_max_wait_time. totalRaceStatusWaitTime = " + str(totalRaceStatusWaitTime) + " localRaceStatusMaxWaitTime=" + str(localRaceStatusMaxWaitTime))
                        return 1
            else:
                return 0

            #Return failure
            return 1
                    
        except Exception as exp:
            self.m_logger.error("Failure in chkRaceStatus process for file with the error " + str(exp))
            sys.exit(1)

    def extractData(self,localDataRecordList, localFileID, localFileIDQueue, localDBFlag):
        """
        Purpose - To load the given datafile to the S3 bucket specified in the global mktConfigFile
        :param localDataRecordList: Datafile related info fetched from FINRA's manifest file including filename, filesize, recordcount
        :param localFileID: Internal File ID assigned to the local datafile
        :param localFileIDQueue: Queue in which, results of the operation is stored
        :param localDBFlag: Flag indicating if database should be used or not
        :return:
        """
        try:
            if self.debugFlag:
                print "Inside extractData function"

            if localDBFlag:
                """ Not sure if we need Race Status check for Extract
                raceStatusReturnValue=self.chkRaceStatus()
                if self.debugFlag:
                    print "raceStatusReturnValue=", raceStatusReturnValue

                if raceStatusReturnValue:
                    self.m_logger.error("Failure value returned by chkRaceStatus fn.  Return value = " + str(raceStatusReturnValue))
                    localFileIDQueue.put((localFileID, raceStatusReturnValue))
                    return 1
                """

                """  Need to integrate Active loads with tb_dxt_process_status and tb_dxt_process_status ?
                activeLoadsReturnValue=self.chkActiveLoads()
                if activeLoadsReturnValue:
                    self.m_logger.error("Failure value returned by chkRaceStatus fn.  Return value = " + str(activeLoadsReturnValue))
                    localFileIDQueue.put((localFileID, raceStatusReturnValue))
                    return 1
                """

                processID = os.getpid()
                hostName = socket.gethostname()
                # Need to check the order
                localDataFile = localDataRecordList[1]
                dataFileSize = int(localDataRecordList[2])
                dataFileRecordCount = int(localDataRecordList[3])

                #Insert Process status into Oracle db
                #DB_CALL - sp_dxt_insert_process_status(RUNID, FILE_ID, etc)
                mySql = ""
                myParams = ""
                tempSql = self.m_configDict["SQL"]["put_process_status"]
                pStatus = 'P'
                pComment = 'Load started'
                #myParamsDict = {"datasetName":self.datasetName, "runID":str(self.runID), "fileID":str(localFileID), "fileName":localDataFile, "tDate":str(self.tradeDate), "processID":str(processID), "hostName":hostName, "fileSize": dataFileSize, "recordCount" : dataFileRecordCount, "status":pStatus , "lcomment":pComment}
                myParamsDict = {"datasetName":self.datasetName, "runID":str(self.runID), "fileName":localDataFile, "tDate":str(self.tradeDate), "processID":str(processID), "hostName":hostName, "fileSize": str(dataFileSize), "recordCount" : str(dataFileRecordCount), "status":pStatus , "lcomment":pComment}
                tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
                mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)

                returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)

                if self.debugFlag:
                    print "tempSql = ", tempSql
                    print "myParamsDict = ", myParamsDict
                    print "mySql = ", mySql
                    print "returnStr = ", returnStr
    
                if returnStr[0] != '0':
                    self.m_logger.error("Unable to put process status info into the database using sql " + mySql + ". Error = " + returnStr[1])
                    sys.exit(1)

            # Get the dataFileName file to be extracted from AWS
            dataFileName = localDataRecordList[1]
            #Here localFileWthPath is the local stage dir with file name
            localFileWthPath = self.m_configDict["ENV"]["stage_dir"] + "/" + dataFileName
            
            #Here targetFileWthPath is the AWS dir with file name
            targetFileWthPath = os.path.join(self.s3object.m_configFile["S3"]["path"], os.path.basename(dataFileName))
            targetBucket = self.s3object.m_configFile["S3"]["bucket"]
            encryptKeyFlag = self.s3object.m_configFile["S3"]["encrypt_key"]

            localAWSRetries = int(self.m_mktConfigDict["ENV"]["aws_retries"])
            if self.debugFlag:
                print("localFileWthPath =", localFileWthPath)
                print("targetFileWthPath =", targetFileWthPath)
                print("targetBucket =", targetBucket)
                print("encryptKeyFlag =", encryptKeyFlag)
                print("localAWSRetries =", localAWSRetries)
            initCount = 0
            while (initCount < localAWSRetries):
                extractReturnValue = 0

                #Call s3.data download to extract the manifest file (single part load)
                #extractReturnValue = self.s3object.getDataSinglePart(localFileWthPath, targetFileWthPath, targetBucket, encryptKeyFlag )
                extractReturnValue = self.s3object.getDataSinglePart(localFileWthPath, targetFileWthPath, targetBucket)

                if self.debugFlag:
                    print "extractReturnValue = ", extractReturnValue

                if int(extractReturnValue) == 0:
                    pStatus = 'S'
                    pComment = 'Load completed'
                    break
                else:
                    pStatus = 'F'
                    pComment = 'Load failed'
                    initCount += 1


            # Get the size of the file downloaded 
            localFileSize = os.stat(localFileWthPath).st_size

            # Check if the downloaded file size is matching with what is mentioned in manifest file.  If not mark it as failed
            if localFileSize != dataFileSize:
                pStatus = 'F'
                pComment = 'Actual file size != Manifest file size'


            localRecordCount = 0
 
            if localDBFlag:
                #Call DB to insert 'S' or 'F' in tb_dxt_process_status
                #localFileIDQueue.put((localFileID, extractReturnValue))
                mySql = ""
                myParams = ""
                tempSql = self.m_configDict["SQL"]["put_process_status"]
                #myParamsDict = {"datasetName":self.datasetName, "runID":str(self.runID), "fileID":str(localFileID), "fileName":localDataFile, "tDate":str(self.tradeDate), "processID":str(processID), "hostName":hostName, "fileSize": localFileSize, "recordCount" : localRecordCount, "status":pStatus , "lcomment":pComment}
                myParamsDict = {"datasetName":self.datasetName, "runID":str(self.runID), "fileName":localDataFile, "tDate":str(self.tradeDate), "processID":str(processID), "hostName":hostName, "fileSize": str(localFileSize), "recordCount" : str(localRecordCount), "status":pStatus , "lcomment":pComment}
                tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
                mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
                returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
                if self.debugFlag:
                    print "tempSql = ", tempSql
                    print "myParamsDict = ", myParamsDict
                    print "mySql = ", mySql
                    print "returnStr = ", returnStr

                if returnStr[0] != '0':
                    self.m_logger.error("Unable to put process status info into the database using sql " + mySql + ". Error = " + returnStr[1])
                    sys.exit(1)

                localFileIDQueue.put((localFileID,extractReturnValue))
            else:
                return extractReturnValue
      
        except Exception as exp:
            self.m_logger.error("Failure in extractData process for file with the error " + str(exp))
            if localDBFlag:
                localFileIDQueue.put((localFileID, 1))
            else:
                return 1

    def getRecords(self, fileDict, startDateTime, endDateTime):
        """
        Purpose - Function to sort the dictionary based on the key and return a sorted list
        :param fileDict : Dictionary containing Last_modified Date and file name
        :param startDateTime : Start DateTime in the format '2016-02-01 00:00:00'
        :param endDateTime : End DateTime in the format '2016-02-10 00:00:00'
        """
        try:
            patternToSearch =  self.m_configDict["ENV"]["pattern_to_search"]
            
            if self.debugFlag:
                print "fileDict = ", fileDict
                print "patternToSearch = ", patternToSearch
                print "startDateTime = ", startDateTime
                print "endDateTime = ", endDateTime
            sorted_keys = sorted(fileDict.iterkeys())
            start = bisect.bisect_left(sorted_keys, startDateTime)
            end = bisect.bisect_right(sorted_keys, endDateTime)
            if self.debugFlag:
                print "start = ", start
                print "end = ", end
            for fileItem in sorted(fileDict.iteritems())[start:end]:
                print "For fileItem = ", fileItem
                if patternToSearch in fileItem[1]:
                    yield fileItem[1]

        except Exception as exp:
            self.m_logger.error("Failed while executing getRecords to sort the dictionary content of dictionary  with Error = " + str(exp))
            sys.exit(1)

    def getManifestFileList(self, startDateTime, endDateTime):
        """
        Purpose - Function to sort the dictionary based on the key and return a sorted list
        :param startDateTime : Start DateTime in the format '2016-02-01 00:00:00'
        :param endDateTime : End DateTime in the format '2016-02-10 00:00:00'
        """
        try:
            if self.debugFlag:
                print "S3 Bucket = ", self.m_configDict["S3"]["bucket"]
                print "S3 Path = ", self.m_configDict["S3"]["path"]
                print "startDateTime = ", startDateTime
                print "endDateTime = ", endDateTime
            fileListDict = self.s3object.listBucketWPathByLastModified(self.m_configDict["S3"]["bucket"], self.m_configDict["S3"]["path"])
           
            #endDateTime = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            manifestFileList = list(self.getRecords(fileListDict, startDateTime, endDateTime))

            if self.debugFlag:
                print "fileListDict = ", fileListDict
                print "manifestFileList = ", manifestFileList

            return manifestFileList
        except Exception as exp:
            self.m_logger.error("Failed while creating AWS manifest file list with Error = " + str(exp))
            return 1

    def processExtractor(self):
        """
        Purpose - Function responsible for getting the AWS token and reading the last modified date in DB and fetch the list of files from AWS to be processed
        :param : None
        :return:
        """
        try:
            # DB_CALL
            # Make database call sp_dxt_validate_mktName(mktName) to validate mktName

            tempSql = self.m_configDict["SQL"]["validate_market_name"]
            myParamsDict = { 'mktName' : self.mktName }
            tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
            mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
            returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)

            if self.debugFlag:
                print "tempSql = ", tempSql
                print "myParamsDict = ", myParamsDict
                print "mySql = ", mySql
                print "returnStr = ", returnStr

            if returnStr[0] != '0':
                self.m_logger.error("Invalid market name provided " + mySql + ". Error = " + self.mktName)
                sys.exit(1)

            if self.debugFlag:
                print "MktName from DB = ", self.mktName
          
            #Build the string for mktConfigFile based on mktName and configFile info
            self.mktConfigFile = os.path.dirname(self.configFile) + '/' + os.path.basename(self.configFile).split('.',1)[0].strip() + '_' + self.mktName.lower() + '.' + os.path.basename(self.configFile).split('.',1)[1].strip()

            if self.debugFlag:
                print("mktConfigFile = ", self.mktConfigFile)

            #Validate Market Config file is a valid file
            if not os.path.isfile(self.mktConfigFile):
                self.m_logger.error("Invalid market manifest file " + self.mktConfigFile)
                sys.exit(1)

            # Read Market specific config file and store it in a specific dictionary
            self.readMktConfigFile(self.mktConfigFile)

            if self.debugFlag:
                print("m_mktConfigDict=",self.m_mktConfigDict)

            # Read the table for the given market and fetch the last modified timestamp for the given manifest file
            #tempSql = self.m_configDict["SQL"]["get_last_modified"]
            #myParamsDict = { 'mktName' : self.mktName }
            #tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
            #mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
            #returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)

            #if self.debugFlag:
                #print "tempSql = ", tempSql
                #print "myParamsDict = ", myParamsDict
                #print "mySql = ", mySql
                #print "returnStr = ", returnStr

            #if returnStr[0] == '0':
                #if returnStr[1]:
                    #lastModifiedDate=returnStr[1] 
                #else:
                    #lastModifiedDate="2015-01-01 00:00:00"
            #else:
                #self.m_logger.error("Unable to get last_modified date using the sql " + mySql + ". Error = " + self.mktName)
                #sys.exit(1)

            #Temp call.  Need to enable the previous lines to use DB call

            lastModifiedDate="2015-01-01 00:00:00"

            # Get RunID
            self.runID = generate_runId()
            if self.debugFlag:
                print("RunID = ", self.runID)

            # Initialize S3 object and get FINRA cloud service token and establish s3 session
            self.s3object = S3(self.mktConfigFile, self.m_logger, self.debugFlag)
            self.s3object.getToken()

            # Get list of Manifest files to be processed

            #currentDate = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            #currentDate = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            currentDate = (datetime.now() + timedelta(days=1)).strftime("%Y-%m-%d %H:%M:%S")
            finraManifestFileList = self.getManifestFileList(lastModifiedDate, currentDate)
            
            if self.debugFlag:
                print("finraManifestFileList = ", finraManifestFileList)

            # Download manifest files in the manifest file list to a specific folder from AWS
            localFileDir = self.s3object.m_configFile["ENV"]["stage_dir"]

            targetFolder = self.s3object.m_configFile["S3"]["path"]
            #targetFileWthPath = os.path.join(self.s3object.m_configFile["S3"]["path"], os.path.basename(localFileWthPath))
            targetBucket = self.s3object.m_configFile["S3"]["bucket"]
            encryptKeyFlag = self.s3object.m_configFile["S3"]["encrypt_key"]
            #targetFileWthPath="50006/slmm_mnem.007.txt.gz"
            #localFileWthPath="/tmp/slmm_mnem.007.txt.gz"

            # Get an instance of the Manifest class
            localManifest = Manifest()
            fileIDQueue = Queue()
            localAWSRetries = int(self.m_mktConfigDict["ENV"]["aws_retries"])

            for finraManifestFile in finraManifestFileList:
                targetFileWthPath = targetFolder + finraManifestFile
                localFileWthPath = localFileDir + "/" + finraManifestFile
                if self.debugFlag:
                    print "targetFileWthPath = ", targetFileWthPath
                    print "localFileWthPath = ", localFileWthPath
                    print "finraManifestFile = ", finraManifestFile
                
                initCount = 0
                while (initCount < localAWSRetries):
                    extractReturnValue = 0

                    #Call s3.data download to extract the manifest file (single part load)
                    #extractReturnValue = self.s3object.getDataSinglePart(localFileWthPath, targetFileWthPath, targetBucket, encryptKeyFlag )
                    extractReturnValue = self.s3object.getDataSinglePart(localFileWthPath, targetFileWthPath, targetBucket)

                    if self.debugFlag:
                        print "extractReturnValue = ", extractReturnValue


                    if extractReturnValue:
                        # Try it again
                        initCount += 1
                    else:
                        # Come out of the loop
                        break
                # End of while loop for AWS Retries
                  
                if extractReturnValue:
                    self.m_logger.error("Unable to fetch manifestFile = " + finraManifestFile + "from the path = " + targetFileWthPath + " to the local filesystem = " + localFileWthPath )
                    sys.exit(1)

                    """ Not needed
                    if extractReturnValue == 0:
                        pStatus = 'P'
                        pComment = 'Load completed'
                        break
                    else:
                        pStatus = 'F'
                        pComment = 'Load failed'
                    """
                    initCount += 1

                # get datasetname from the manifest file.  Need check based on FINRA naming

                self.datasetName = os.path.basename(finraManifestFile).split('.',3)[1].strip().upper()
                if self.debugFlag:
                    print "datasetName = ", self.datasetName

                # Need to check DB call, once it is ready

                # Validate the manifest file name to make sure that we are expecting it
                tempSql = self.m_configDict["SQL"]["validate_dataset_name"]
                myParamsDict = { 'datasetName' : self.datasetName }
                tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
                mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
                returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)

                if self.debugFlag:
                    print "tempSql = ", tempSql
                    print "myParamsDict = ", myParamsDict
                    print "mySql = ", mySql
                    print "returnStr = ", returnStr
    
                #Check if dataset is there in the tb_dxt_dataset_master, if not, skip it and move to the next file.  For other errors, exit out of the program
                if int(returnStr[0]) < 0:
                    self.m_logger.error("Unable to validate datasetName " + mySql + ". Error = " + self.datasetName)
                    sys.exit(1)
                elif int(returnStr[0]) > 0:
                    self.m_logger.info("Give Dataset is not in the list to process.  Skipping it" + mySql + ". Dataset Name = " + self.datasetName)
                    # Continue to the next file entry in the manifest list
                    continue
                    
                # Insert a record into tb_dxt_dataset_trans with status 'P' for the given datasetName, saying that we start the process for this manifest file

                pStatus = 'P'
                tempSql = self.m_configDict["SQL"]["put_dataset"]
                myParamsDict = {'datasetName':self.datasetName, 'runID': str(self.runID), 'tDate':str(self.tradeDate), 'status': pStatus }
                tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
                mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
                returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)

                if self.debugFlag:
                    print "tempSql = ", tempSql
                    print "myParamsDict = ", myParamsDict
                    print "mySql = ", mySql
                    print "returnStr = ", returnStr
    
                if returnStr[0] != '0':
                    self.m_logger.error("Unable to insert into tb_dxt_dataset_trans table " + mySql + ". Error = " + self.datasetName)
                    sys.exit(1)

                # Read the contents of manifestfile i.e. dataFileNames into a list - Will validate the datafiles as well
                manifestDelim = self.m_configDict["ENV"]["manifest_delim"]
                if self.debugFlag:
                    print "localFileWthPath = ", localFileWthPath
                manifestFileList = localManifest.readManifest(localFileWthPath, self.m_logger, manifestDelim, self.debugFlag)
                if self.debugFlag:
                    print "manifestDelim = ", manifestDelim
                    print "manifestFileList = ", manifestFileList

                process_count = int(self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max"])

                # Now go into multiprocessing and call extractData function and extract files ones by one 
                fileID=1
                dbFlag=1
                fileIDQueue = Queue()
                procs = []
                doneCounter = 0
                sendCounter = 0
                failureFlag = 0

                while doneCounter < len(manifestFileList):
                    while sendCounter < len(manifestFileList) and sendCounter - doneCounter < process_count:
                        if self.debugFlag:
                            print "manifestFileList[sendCounter][1]", manifestFileList[sendCounter][1], "fileID = ", fileID
                        # Call fn extractData to fetch files from AWS.  Pass manifestFileList[sendCounter] as it contains the whole record including the filename, filesize & row count
                        processHandle = Process(target=Extractor.extractData, args=(self, manifestFileList[sendCounter],fileID, fileIDQueue, dbFlag))
                        processFlag=1
                        processHandle.start()
                        procs.append(processHandle)
                        sendCounter += 1
                        fileID += 1
                    if processFlag:
                        for p in procs:
                            p.join()
                            procs=[]
                        processFlag=0
                    while not fileIDQueue.empty():  # process completed results as they arrive
                        #time.sleep(3)
                        qFileID, qResult = fileIDQueue.get()
                        if self.debugFlag:
                            print("qFileID = ", qFileID, "qResult = ", qResult)
                        doneCounter += 1
                        if qResult:
                            failureFlag = 1
                    if self.debugFlag:
                        print "ProcessFlag = ", processFlag, "sendCounter = ", sendCounter, "doneCounter = ", doneCounter
                    if failureFlag:
                        break
                        
                if self.debugFlag:
                    print "Failure Flag = ", failureFlag
    
                if failureFlag:
                    pStatus = 'F'
                else:
                    pStatus = 'S'

                # Move all the data files to inbox  from the stg location.  No need for this step, as Joejo mentioned there will be another Tidal job doing this step

                # Move the manifest file to inbox from the stg location

                # insert a record into tb_dxt_dataset_trans table with 'S' or 'F' record

                #Call Oracle fn to insert status 'S' into TB_DDY_DATASET_TRANS with RUNID etc
                #DB_CALL
                # Make database call sp_dxt_insert_dataset_trans and insert data based on Failure or Success

                mySql = ""
                myParams = ""
                tempSql = self.m_configDict["SQL"]["put_dataset"]
            
                myParamsDict = {"datasetName":self.datasetName, "runID": str(self.runID), "tDate":str(self.tradeDate), "status": pStatus }
                tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
                mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
    
                returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
    
                if self.debugFlag:
                    print "tempSql = ", tempSql
                    print "myParamsDict = ", myParamsDict
                    print "mySql = ", mySql
                    print "returnStr = ", returnStr
    
                if returnStr[0] != '0':
                    self.m_logger.error("Unable to put dataset info into the database using sql " + mySql + ". Error = " + returnStr[1])
                    sys.exit(1)

                if failureFlag:
                    self.m_logger.error("Extract failed for data files for manifest file " + self.manifestFile)
                    sys.exit(1)

           # End of for loop for finraManifestFiles

        except Exception as e:
            self.m_logger.error("ProcessExtractor failed with error " + str(e))
            sys.exit(1)
Exemplo n.º 16
0
 def getProcedure(self, procName):
     if self.useOracleMod:
         return Oracle.getProcedure(self.realConn, procName)
     else:
         return getattr(self.getCursor().procedures, procName)
Exemplo n.º 17
0
def BFGS(Oracle, x0):
    ##### Initialisation des variables

    iter_max = 10000
    # gradient_step_ini = 1.  # Problème primal.
    gradient_step_ini = 1000.  # Problème dual.
    threshold = 0.000001

    error_count = 0  # Compteur de non-convergence de l'algorithme de Fletcher-Lemarechal.

    gradient_norm_list = []
    gradient_step_list = []
    critere_list = []

    time_start = process_time()

    x = x0

    ##### Boucle sur les iterations
    for k in range(iter_max):
        # Nouvelles valeurs du critere et du gradient
        critere, gradient = Oracle(x, 4)

        # Test de convergence
        gradient_norm = norm(gradient)
        if gradient_norm <= threshold:
            break

        # Direction de descente
        if k == 0:
            W = np.eye(len(gradient))
        else:
            delta_x = x - x_p
            delta_g = gradient - gradient_p
            delta_mat_1 = np.outer(delta_x, delta_g) / np.vdot(
                delta_g, delta_x)
            delta_mat_2 = np.outer(delta_x, delta_x) / np.vdot(
                delta_g, delta_x)
            I = np.eye(len(gradient))  # Matrice identité
            W = np.dot(np.dot(I - delta_mat_1, W_p),
                       I - np.transpose(delta_mat_1)) + delta_mat_2
        direction = np.dot(-W, gradient)

        # Pas de descente
        gradient_step, error_code = Wolfe(gradient_step_ini, x, direction,
                                          Oracle)

        if error_code != 1:
            error_count += 1

        # Mise a jour des variables
        x_p = x  # Valeur précédente de la position
        gradient_p = gradient  # Valeur précédente du gradient
        direction_p = direction  # Valeur précédente de la direction
        W_p = W
        x = x + (gradient_step * direction)

        # Evolution du gradient, du pas, et du critere
        gradient_norm_list.append(gradient_norm)
        gradient_step_list.append(gradient_step)
        critere_list.append(critere)

    if error_count > 0:
        print()
        print("Non-convergence de l'algorithme de Fletcher-Lemarechal : {}".
              format(error_count))

    ##### Resultats de l'optimisation
    critere_opt = critere
    gradient_opt = gradient
    x_opt = x
    time_cpu = process_time() - time_start

    print()
    print('Iteration :', k)
    print('Temps CPU :', time_cpu)
    print('Critere optimal :', critere_opt)
    print('Norme du gradient :', norm(gradient_opt))

    # Visualisation de la convergence
    Visualg(gradient_norm_list, gradient_step_list, critere_list)

    return critere_opt, gradient_opt, x_opt,
Exemplo n.º 18
0
    data_filename=data_dir,
    batch_size=batch_size,
    sequence_length=sequence_length,
    validation_split=validation_split,
    fake_batch_size=discriminator_pre_training_fake_batch_size,
    seed=seed,
    data_type="val",
    use_word_vectors=use_word_vectors)

# Initialize Models
oracle = Oracle(train_data_loader=train_dl,
                validation_data_loader=val_dl,
                units=oracle_hidden_units,
                leaky_relu_alpha=oracle_leaky_relu_alpha,
                num_layers=oracle_layers,
                opt=oracle_optimizer,
                dropout_keep_prob=oracle_dropout_keep_prob,
                l2_reg_lambda=oracle_l2_regularization_lambda,
                sequence_length=sequence_length,
                loss=oracle_loss,
                metrics=oracle_metrics)
gen = Generator(train_data_loader=train_dl,
                validation_data_loader=val_dl,
                units=generator_hidden_units,
                leaky_relu_alpha=generator_leaky_relu_alpha,
                num_layers=generator_layers,
                opt=generator_optimizer,
                dropout_keep_prob=generator_dropout_keep_prob,
                l2_reg_lambda=generator_l2_regularization_lambda,
                sequence_length=sequence_length,
                loss=generator_loss,
Exemplo n.º 19
0
class Loader():
    #class variables
    m_logger = ""

    #database objects
    m_oracle_db = ""
    m_netezza_db = ""

    def __init__(self, configFile, tradeDate, debugFlag):
        """
        Purpose: Constructor
        :param self:        class object itself
        :param configFile:  Configuration file to use
        """

        # Initialize m_logger object from class Logger and add Header to the log, using addGenericInfo function
        self.m_logger = Logger(logging.INFO, configFile, tradeDate)
        self.m_logger.addFileHandler(logging.DEBUG)
        self.m_logger.addGenericInfo(__file__)
        
        self.tradeDate = tradeDate
        self.debugFlag = debugFlag
        self.configFile = configFile

        try:
            # Get configuration to a dictionary
            self.m_configDict = configuration(self.configFile, True).m_dictionary
            
            #Initialize Oracle instance along with connection
            self.m_oracle_db = Oracle(self.m_configDict, self.m_logger)

        except Exception as exp:
                   # An exception occurred
            self.m_logger.error("Unable to initialize the configuration " + str(exp))
            print("ERROR: Unable to initialize the configuration for logger " + str(exp))
            sys.exit(1)

    def readMktConfigFile(self, mktConfigFile):
        """
        Purpose - To read the content of mktConfigFile into the global dictionary m_mktConfigDict for reference
        :param mktConfigFile:
        :return:
        """
        try:
            self.m_mktConfigDict = configuration(mktConfigFile, True).m_dictionary
        except Exception as exp:
                   # An exception occurred
            self.m_logger.error("Unable to initialize the configuration for logger " + str(exp))
            print("ERROR: Unable to initialize the configuration for logger " + str(exp))
            sys.exit(1)

    def chkActiveLoads(self):
        """
        Purpose - To check the count of active Active loads happening at a given point
        :param None: None at this point
        :return:
        """
        try:
            if self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_check_flag"] == 'Y': 
                localActiveLoadMax = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max"]
                localActiveLoadWaitTime = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_wait_time"]
                localActiveLoadMaxWaitTime = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max_wait_time"]

                mySql = ""
                myParams = ""
                mySql = self.m_configDict["SQL"]["get_active_loads"]
    
                activeFlag=1
                totalActiveWaitTime=0
                while activeFlag:
                    returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
                    if self.debugFlag:
                        print "mySql = ", mySql
                        print "returnStr = ", returnStr
                        print "chkActiveLoads - Active Loads value = ", int(returnStr[1].strip())
                    if returnStr[0] != '0':
                        self.m_logger.error("Unable to get active loads using sql " + mySql + ". Error = " + returnStr[1])
                        sys.exit(1)

                    #Check if actual active loads is <= configured active loads.  If so, return out of the fn
                    if int(returnStr[1].strip()) <= localActiveLoadMax:
                        activeFlag=0
                        return 0

                    #Sleep for time defined by configured value for "active_load_wait_time"
                    time.sleep(localActiveLoadWaitTime)
                    totalActiveWaitTime += localActiveLoadWaitTime

                    #Check if actual Total wait time is > configured total wait time.  If so, throw an error and exit
                    if totalActiveWaitTime > localActiveMaxWaitTime:
                        self.m_logger.error("In Fn chkActiveLoads.  Total Actual Wait Time exceeds the configured value active_load_max_wait_time.  Either cleanup orphaned loads or increase the either active_load_max or active_load_max_wait_time. totalActiveWaitTime = " + str(totalActiveWaitTime) + " localActiveMaxWaitTime=" + str(localActiveMaxWaitTime))
                        return 1
            else:
                return 0

            #Return failure
            return 1
                    
        except Exception as exp:
            self.m_logger.error("Failure in chkActiveLoads process for file with the error " + str(exp))
            sys.exit(1)

    def chkRaceStatus(self):
        """
        Purpose - To check if a load is already running for the given dataset
        :param None: None at this point
        :return:
        """
        try:
            if self.m_mktConfigDict["RACE"]["race_status_check_flag"] == 'Y':
                localRaceStatusWaitTime = int(self.m_mktConfigDict["RACE"]["race_status_wait_time"])
                localRaceStatusMaxWaitTime = int(self.m_mktConfigDict["RACE"]["race_status_max_wait_time"])

                mySql = ""

                #myParams = {"datasetName":self.datasetName}
                tempSql = self.m_configDict["SQL"]["get_race_status"]
                myParamsDict = { 'datasetName' : self.datasetName }
                tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
                mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
    
                raceFlag=1
                totalRaceStatusWaitTime=0
                while raceFlag:
                    returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
                    if self.debugFlag:
                        print "tempSql = ", tempSql
                        print "myParamsDict = ", myParamsDict
                        print "mySql = ", mySql
                        print "returnStr = ", returnStr
                        print "chkRaceStatus - ReturnCode = ", int(returnStr[1].strip())
                    if returnStr[0] != '0':
                        self.m_logger.error("Unable to get race status using sql " + mySql + ". Error = " + returnStr[1])
                        sys.exit(1)

                    #Check if the load for this dataset is already running. If not, exit out of the function with normal return value
                    if int(returnStr[1].strip()) <= 1:
                        raceFlag=0
                        return 0

                    #Check if actual Total wait time is > configured total wait time.  If so, throw an error and exit


                    #Sleep for time defined by configured value for "active_load_wait_time"
                    time.sleep(localRaceStatusWaitTime)
                    #time.sleep(90)
                    totalRaceStatusWaitTime += localRaceStatusWaitTime
                    if self.debugFlag:
                        print "totalRaceStatusWaitTime = ", totalRaceStatusWaitTime, "localRaceStatusWaitTime =", localRaceStatusWaitTime
                    if totalRaceStatusWaitTime > localRaceStatusMaxWaitTime:
                        self.m_logger.error("In Fn chkRaceStatusLoads.  Total Actual Wait Time exceeds the configured value race_status_max_wait_time.  Either check if the Dataset is getting loaded or increase the either active_load_max or active_load_max_wait_time. totalRaceStatusWaitTime = " + str(totalRaceStatusWaitTime) + " localRaceStatusMaxWaitTime=" + str(localRaceStatusMaxWaitTime))
                        return 1
            else:
                return 0

            #Return failure
            return 1
                    
        except Exception as exp:
            self.m_logger.error("Failure in chkRaceStatus process for file with the error " + str(exp))
            sys.exit(1)

    #def loadData(self,localDataFile, localFileID, localFileIDQueue, localDBFlag):
    def loadData(self,localDataFile, localFileID, localFileIDQueue):
        """
        Purpose - To load the given datafile to the S3 bucket specified in the global mktConfigFile
        :param localDataFile: Source datafile to be uploaded to S3
        :param localFileID: Internal File ID assigned to the source datafile
        :param localFileIDQueue: Queue in which, results of the operation is stored
        :return:
        """
        try:
            localDBFlag=1
            if self.debugFlag:
                print "Inside loadData function"

            if localDBFlag:
                raceStatusReturnValue=self.chkRaceStatus()
                if self.debugFlag:
                    print "raceStatusReturnValue=", raceStatusReturnValue

                if raceStatusReturnValue:
                    self.m_logger.error("Failure value returned by chkRaceStatus fn.  Return value = " + str(raceStatusReturnValue))
                    localFileIDQueue.put((localFileID, raceStatusReturnValue))
                    return 1

                activeLoadsReturnValue=self.chkActiveLoads()
                if activeLoadsReturnValue:
                    self.m_logger.error("Failure value returned by chkRaceStatus fn.  Return value = " + str(activeLoadsReturnValue))
                    localFileIDQueue.put((localFileID, raceStatusReturnValue))
                    return 1
    
                processID = os.getpid()
                hostName = socket.gethostname()

                #Insert Process status into Oracle db
                #DB_CALL - sp_ddy_insert_process_status(RUNID, FILE_ID, etc)
                mySql = ""
                myParams = ""
                tempSql = self.m_configDict["SQL"]["put_process_status"]
                pStatus = 'P'
                pComment = 'Load started'
                myParamsDict = {"datasetName":self.datasetName, "runID":str(self.runID), "fileID":str(localFileID), "fileName":localDataFile, "tDate":str(self.tradeDate), "processID":str(processID), "hostName":hostName, "status":pStatus , "lcomment":pComment, "tidalRunID":str(self.tidalRunID)}
                tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
                mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)

                returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)

                if self.debugFlag:
                    print "tempSql = ", tempSql
                    print "myParamsDict = ", myParamsDict
                    print "mySql = ", mySql
                    print "returnStr = ", returnStr
    
                if returnStr[0] != '0':
                    self.m_logger.error("Unable to put process status info into the database using sql " + mySql + ". Error = " + returnStr[1])
                    sys.exit(1)

            #Call s3.gettoken to get the token and establish connection

            sourceFileWthPath = localDataFile
            
            #Commented the following lines to move getToken outside parallel thread
            # Keep it until we test all 93 loads and remove it
            #s3object = S3(self.mktConfigFile, self.m_logger)
            #s3object.getToken()
            ##sourceFileWthPath =  s3object.m_configfile["S3"]["source_file"]

            targetFileWthPath = os.path.join(self.s3object.m_configFile["S3"]["path"], os.path.basename(sourceFileWthPath))
            targetBucket = self.s3object.m_configFile["S3"]["bucket"]
            encryptKeyFlag = self.s3object.m_configFile["S3"]["encrypt_key"]

            local_aws_retries = int(self.m_mktConfigDict["ENV"]["aws_retries"])
            if self.debugFlag:
                print("sourceFileWthPath =", sourceFileWthPath)
                print("targetFileWthPath =", targetFileWthPath)
                print("targetBucket =", targetBucket)
                print("encryptKeyFlag =", encryptKeyFlag)
                print("local_aws_retries =", local_aws_retries)

            init_count = 0
            while (init_count < local_aws_retries):
                loadReturnValue = 0

                #Call s3.dataUpload to load the data (single part load)

                loadReturnValue = self.s3object.loadDataSinglePart(sourceFileWthPath, targetFileWthPath, targetBucket, encryptKeyFlag )
                if self.debugFlag:
                    print "loadReturnValue = ", loadReturnValue

                if loadReturnValue == 0:
                    pStatus = 'S'
                    pComment = 'Load completed'
                    break
                else:
                    pStatus = 'F'
                    pComment = 'Load failed'
                    init_count += 1

            if localDBFlag:
                #Call DB to insert 'S' or 'F' in tb_ddy_process_status
                #localFileIDQueue.put((localFileID, loadReturnValue))
                mySql = ""
                myParams = ""
                tempSql = self.m_configDict["SQL"]["put_process_status"]
                myParamsDict = {"datasetName":self.datasetName, "runID":str(self.runID), "fileID":str(localFileID), "fileName":localDataFile, "tDate":str(self.tradeDate), "processID":str(processID), "hostName":hostName, "status":pStatus , "lcomment":pComment, "tidalRunID":str(self.tidalRunID)}
                tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
                mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
                returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
                if self.debugFlag:
                    print "tempSql = ", tempSql
                    print "myParamsDict = ", myParamsDict
                    print "mySql = ", mySql
                    print "returnStr = ", returnStr

                if returnStr[0] != '0':
                    self.m_logger.error("Unable to put process status info into the database using sql " + mySql + ". Error = " + returnStr[1])
                    sys.exit(1)

                localFileIDQueue.put((localFileID,loadReturnValue))
            else:
                return loadReturnValue
      
        except Exception as exp:
            self.m_logger.error("Failure in loadData process for file with the error " + str(exp))
            if localDBFlag:
                localFileIDQueue.put(localFileID, 1)
            else:
                return 1

    def createFinraManifestFile(self, manifestFile):
        try:
            # Read Manifest file to get info like total rows, total size & other details to populate the done file for FINRA
            if self.debugFlag:
                print "Inside createFinraManifestFile fuction"

            with open(manifestFile,"r") as fh:
                self.totalRows = 0
                self.totalSize = 0
                self.fileCount = 0
                self.fileDict = {}
                for data in fh:
                    data.rstrip("\n")
                    mylist = []
                    self.fileCount +=1
                    mylist = data.split("|")
                    self.fileDict[self.fileCount] = [mylist[0],os.path.basename(mylist[1]),int(mylist[2]),int(mylist[3])]
                    self.totalRows += int(mylist[3])
                    self.totalSize += int(mylist[2])
                if self.debugFlag:
                    print "self.fileDict = ", self.fileDict
        except Exception as exp:
            self.m_logger.error("Failed while processing readManifest with Error = " + str(exp))
            return 1

        try:
            #Use self.defautltsFile which is populated from the db later. No need to get it from config file
            self.defaultsFileWthPath = self.m_mktConfigDict["DATASET"]["defaults_dir"] + "/" + self.defaultsFile
            with open(self.defaultsFileWthPath,"r") as fh:
                self.defaultsDict = {}
                self.defaultsCount  = 0
                for data in fh:
                    data.rstrip('\n')
                    self.defaultsCount +=1
                    self.defaultsDict[self.defaultsCount]=data
                if self.debugFlag:
                    print "After Defaults, self.fileDict = ", self.fileDict

        except Exception as exp:
            self.m_logger.error("Failed while processing defaults file " + self.defaultsFileWthPath + " with Error = " + str(exp))
            return 1

        try:
            self.finraManifestFile =  self.m_mktConfigDict["ENV"]["donefile_dir"] + "/" +  os.path.basename(manifestFile) + ".done"
            with open(self.finraManifestFile,"w") as finraMnFH:
                finraMnFH.write("# AWS RunID : {}\n".format(str(self.runID)))
                finraMnFH.write("# Dataset : {0} , TradeDate : {1}\n".format(str(self.datasetName),str(self.tradeDate)))
                finraMnFH.write("total_compressed={}\n".format(self.totalSize))
                finraMnFH.write("total_rows={}\n".format(self.totalRows))
                finraMnFH.write("no of files={}\n".format(self.fileCount))
   
                for key,val in self.fileDict.items():
                    finraMnFH.write("file_{0}={1}\n".format(str(key),val[1]))
                    finraMnFH.write("file_{0}_rows={1}\n".format(str(key),val[3]))
  
                finraMnFH.write("# Data Attributes\n")
                for key,val in self.defaultsDict.items():
                    finraMnFH.write("{0}".format(str(val)))
            return 0
        except Exception as exp:
            self.m_logger.error("Failed while creating AWS Done file " + self.finraManifestFile + " with Error = " + str(exp))
            return 1

    def processLoader(self, manifestFile, datasetName, tidalRunID):
        """
        Purpose - Function responsible for reading the manifest file, get market name, call multiprocess load and other db calls
        :param manifestFile: Manifest File
        :param tradeDate: Trade Date
        :param tidalRunID: Tidal Run ID 
        :return:
        """
        try:
            # Read the manifest filename and get the suffix i.e. datasetname
            # Assumption - Manifest file format - manifest.<datasetName>.<tradeDate>.<tidalRunID>
            # Program will break otherwise

            self.datasetName = datasetName
            self.tidalRunID = tidalRunID

            # DB_CALL
            # Make database call sp_ddy_get_market_info(datasetname) and get market info
            mktName = ''

            self.manifestFile = self.m_configDict["ENV"]["manifestfile_dir"] + "/" + manifestFile

            ##Validate Manifest file
            if not os.path.isfile(self.manifestFile):
                self.m_logger.error("Invalid manifest file " + self.manifestFile)
                sys.exit(1)

            if self.debugFlag:
                print "Inside processLoader"
                print "DatasetName = ", self.datasetName
                print "ManifestFile = ", manifestFile
                print "Self ManifestFile = ", self.manifestFile
                print "TidalRunID = ", self.tidalRunID
                print "DebugFlag = ", self.debugFlag
                print "confDict = ", self.m_configDict

            # Enable this one the proc to get mkt name and default file are ready and test it
            tempSql = self.m_configDict["SQL"]["get_mkt_defaults_filename"]
            myParamsDict = { 'datasetName' : self.datasetName }
            tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
            mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
            returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)

            if self.debugFlag:
                print "tempSql = ", tempSql
                print "myParamsDict = ", myParamsDict
                print "mySql = ", mySql
                print "returnStr = ", returnStr

            if returnStr[0] == '0':
                mktName = returnStr[2].strip()
                self.defaultsFile = returnStr[3].strip()
            else:
                self.m_logger.error("Unable to get market info from the database using sql " + mySql + ". Error = " + returnStr[1])
                sys.exit(1)

            if self.debugFlag:
                print "MktName from DB = ", mktName
                print "Defaults = ", self.defaultsFile
          
            #Build the string for mktConfigFile based on mktName and configFile info
            self.mktConfigFile = os.path.dirname(self.configFile) + '/' + os.path.basename(self.configFile).split('.',1)[0].strip() + '_' + mktName.lower() + '.' + os.path.basename(self.configFile).split('.',1)[1].strip()

            if self.debugFlag:
                print("mktConfigFile = ", self.mktConfigFile)

            #Validate Manifest file is a valid file
            if not os.path.isfile(self.mktConfigFile):
                self.m_logger.error("Invalid market manifest file " + self.mktConfigFile)
                sys.exit(1)

            #May not need the following section, as we send mktConfigFile to other function not the dictionary self.m_mktConfigDict.  Need to remove it after finishing the loadData part fully

            # Read Market specific config file and store it in a specific dictionary
            self.readMktConfigFile(self.mktConfigFile)

            if self.debugFlag:
                print("m_mktConfigDict=",self.m_mktConfigDict)

            # Read the contents of manifest - dataFileNames into a list - Will validate the datafiles as well

            localManifest = Manifest()
            manifestDelim = self.m_configDict["ENV"]["manifest_delim"]
            manifestFileList = localManifest.readManifest(self.manifestFile, self.m_logger, manifestDelim, self.debugFlag)

            # Get RunID
            self.runID = generate_runId()
            if self.debugFlag:
                print("RunID = ", self.runID)
                #print("manifestFileList = ", manifestFileList)

            #Call Oracle fn to insert status 'P' into TB_DDY_DATASET_TRANS with RUNID etc
            #DB_CALL
            # Make database call sp_ddy_insert_dataset_trans and insert data that process started

            mySql = ""
            myParams = ""
            tempSql = self.m_configDict["SQL"]["put_dataset"]
            pStatus = 'P'

            myParamsDict = {'datasetName':self.datasetName, 'runID': str(self.runID), 'tDate':str(self.tradeDate), 'status': pStatus, 'tidalRunID':str(self.tidalRunID)}
            tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
            mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)

            returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)

            if self.debugFlag:
                print "tempSql = ", tempSql
                print "myParamsDict = ", myParamsDict
                print "mySql = ", mySql
                print "returnStr = ", returnStr

            if returnStr[0] != '0':
                self.m_logger.error("Unable to put dataset info into the database using sql " + mySql + ". Error = " + returnStr[1])
                sys.exit(1)

            # Insert Manifest data in db and call multiprocessing s3 loader process.  Shd we add RUN_ID to manifest table

            #For each datafile, generate fileID and call loadData fn using multiprocess to load data into AWS
            for dataRecord in manifestFileList:
                mySql = ""
                myParams = ""
                tempSql = self.m_configDict["SQL"]["put_manifest"]
                myParamsDict = {"datasetName":self.datasetName, "runID": str(self.runID), "tDate":str(self.tradeDate), "dataFileName":dataRecord[1], "manifestFileName":manifestFile , "noOfRecords": str(dataRecord[2]), "fileSize":str(dataRecord[3]), "tidalRunID":str(self.tidalRunID)}
                tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
                mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
     
                returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
                #if self.debugFlag:
                    #print "tempSql = ", tempSql
                    #print "myParamsDict = ", myParamsDict
                    #print "mySql = ", mySql
                    #print "returnStr = ", returnStr

                if returnStr[0] != '0':
                    self.m_logger.error("Unable to put manifest info into the database using sql " + mySql + ". Error = " + returnStr[1])
                    sys.exit(1)

            # Initialize S3 object and get FINRA cloud service token and establish s3 session
            self.s3object = S3(self.mktConfigFile, self.m_logger, self.debugFlag)
            self.s3object.getToken()

            # Get Active load values from config file
            localActiveLoadCheckFlag = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_check_flag"]
            process_count = int(self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max"])
            #localActiveLoadMax = int(self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max"])
            #localActiveLoadWaitTime = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_wait_time"]
            #localActiveLoadMaxWaitTime = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max_wait_time"]
            if self.debugFlag:
                print("localActiveLoadMax = ", process_count)
                print("len(manifestFileList) = ", len(manifestFileList))

            pool = multiprocessing.Pool(processes=process_count)
            m = multiprocessing.Manager()
            fileIDQueue = m.Queue()
            #dbFlag=1

            sendCounter = 0
            doneCounter = 0
            fileID=1

            failureFlag=0
            print manifestFileList
            while doneCounter < len(manifestFileList):
                print "Inside while doneCounter = ", doneCounter
                print "doneCounter = ", doneCounter, "sendCounter = ", sendCounter
                while sendCounter < len(manifestFileList) and sendCounter - doneCounter < process_count:
                    tmpDataFileName = manifestFileList[sendCounter][1]
                    print "Inside sendCounter, manifestFileList[sendCounter] = ", manifestFileList[sendCounter], "manifestFileList[sendCounter][1] = ", manifestFileList[sendCounter][1]
                    #finraManifestLoadStatus=self.loadData(self.finraManifestFile ,fileID, fileIDQueue, dbFlag)
#                    #processHandle = Process(target=Loader.loadData, args=(self, dataRecord[1],fileID, fileIDQueue, dbFlag))
                     #def loadData(self,localDataFile, localFileID, localFileIDQueue, localDBFlag):
                    #results = mpPool.apply_async(Loader.loadData, (self, manifestFileList[sendCounter][1], fileID, fileIDQueue, dbFlag) )
                    #results = mpPool.apply_async(self.loadData, (tmpDataFileName, fileID, fileIDQueue, dbFlag))

                    results = pool.apply_async(self.loadData, args=(tmpDataFileName, fileID, fileIDQueue))
                    #results = pool.apply_async(Loader.loadData, (tmpDataFileName, fileID, fileIDQueue))
                    print "After pool apply_async"
                    time.sleep(2)
                    sendCounter += 1
                    fileID += 1
            
                while not fileIDQueue.empty():  # process completed results as they arrive
                    print "Inside Queue"
                    time.sleep(3)
                    qFileID, qResult = fileIDQueue.get()
                    if qResult:
                        failureFlag=1
                    if self.debugFlag:
                        print("qFileID = ", qFileID, "qResult = ", qResult)
                    doneCounter += 1
                if failureFlag:
                    break
                time.sleep(2)
                


#            #for dataRecord in manifestFileList:
#                #if self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_check_flag"] == 'Y': 
#                    #processHandle = Process(target=Loader.loadData, args=(self, dataRecord[1],fileID, fileIDQueue, dbFlag))
#                #processHandle.start()
#                #procs.append(processHandle)
#                #fileID += 1
#
#            #for p in procs:
#                #p.join()
#
#            #Without sleep the queue is unreliable and do not return the expected values.  Fixed with procs.append function.  No need for sleep anymore
#            #time.sleep(2)
#
##            failureFlag=0
#            while not fileIDQueue.empty():
#                qFileID, qResult = fileIDQueue.get()
#                if qResult:
#                    failureFlag=1
#                if self.debugFlag:
#                    print("Inside fileIDQueue while")
#                    print("qFileID = ", qFileID, "qResult = ", qResult)
#                    
#            if self.debugFlag:
#                print "Failure Flag = ", failureFlag

            if failureFlag:
                pStatus = 'F'
            else:
                pStatus = 'S'
                """
                #Generate FINRA Manifest file and Push it to AWS 
                """

                # Call Divakar's generate done file function
                returnValue = self.createFinraManifestFile(self.manifestFile)
                if self.debugFlag:
                    print "Post createFinraManifestFile fn - return value= ", returnValue
    
                if returnValue:
                    self.m_logger.error("Unable to generate done file. Please fix the issue the re-run the load")
                    #sys.exit(1)
                    failureFlag=1
                    pStatus = 'F'
                else:

                    dbFlag=0
                    fileID=0
                    # Call the loader function with the manifest file
                    finraManifestLoadStatus=0
                    finraManifestLoadStatus=self.loadData(self.finraManifestFile ,fileID, fileIDQueue, dbFlag)
    
                    if finraManifestLoadStatus:
                        pStatus = 'F'
                        self.m_logger.error("Unable to load finra manifest file ")
                

            #Call Oracle fn to insert status 'S' into TB_DDY_DATASET_TRANS with RUNID etc
            #DB_CALL
            # Make database call sp_ddy_insert_dataset_trans and insert data based on Failure or Success

            mySql = ""
            myParams = ""
            tempSql = self.m_configDict["SQL"]["put_dataset"]
            
            myParamsDict = {"datasetName":self.datasetName, "runID": str(self.runID), "tDate":str(self.tradeDate), "status": pStatus, "tidalRunID":str(self.tidalRunID)}
            tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
            mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)

            returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)

            if self.debugFlag:
                print "tempSql = ", tempSql
                print "myParamsDict = ", myParamsDict
                print "mySql = ", mySql
                print "returnStr = ", returnStr

            if returnStr[0] != '0':
                self.m_logger.error("Unable to put dataset info into the database using sql " + mySql + ". Error = " + returnStr[1])
                sys.exit(1)

            if failureFlag:
                self.m_logger.error("Load failed")
                sys.exit(1)

        except Exception as e:
            self.m_logger.error("ProcessLoader failed with error " + str(e))
            sys.exit(1)
Exemplo n.º 20
0
#      GNU General Public License for more details.
#  
#      You should have received a copy of the GNU General Public License
#      along with this program; if not, write to the Free Software
#      Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
#   
from SkunkWeb import Configuration, LogObj, ServiceRegistry
from requestHandler.requestHandler import CleanupRequest
import Oracle

ServiceRegistry.registerService('oracle')

Configuration.mergeDefaults(
    OracleConnectStrings = {},
    OracleProcedurePackageLists = {}
    )

for u, str in Configuration.OracleConnectStrings.items():
    LogObj.DEBUG(ServiceRegistry.ORACLE, 'initializing user %s' % u)
    Oracle.initUser(u, str)

for u, pkglist in Configuration.OracleProcedurePackageLists:
    Oracle.loadSignatures(u, pkglist, LogObj.LOG,
                       lambda x: LogObj.DEBUG(ServiceRegistry.ORACLE, x))

def rollbackConnection(*args):
    for v in Oracle._connections.values():
        v.rollback()

CleanupRequest.addFunction(rollbackConnection)
Exemplo n.º 21
0
 def getProcedure(self, procName):
     if self.useOracleMod:
         return Oracle.getProcedure(self.realConn, procName)
     else:
         return getattr(self.getCursor().procedures, procName)
Exemplo n.º 22
0
def refineUnknown(token):
	if not Oracle.isWord(token):
		return greedy(0, token)
	return [ token ]
Exemplo n.º 23
0
def refineUnknown(token):
    if not Oracle.isWord(token):
        return greedy(0, token)
    return [token]
Exemplo n.º 24
0
class Loader():
    #class variables
    m_logger = ""

    #database objects
    m_oracle_db = ""
    m_netezza_db = ""

    def __init__(self, configFile, tradeDate, debugFlag, datasetName):
        """
        Purpose: Constructor
        :param self:        class object itself
        :param configFile:  Configuration file to use
        """

        # Initialize m_logger object from class Logger and add Header to the log, using addGenericInfo function
        self.m_logger = Logger(logging.INFO, configFile, tradeDate, datasetName.lower())
        self.m_logger.addFileHandler(logging.DEBUG)
        self.m_logger.addGenericInfo(__file__)
        
        self.tradeDate = tradeDate
        self.debugFlag = debugFlag
        self.configFile = configFile

        try:
            # Get configuration to a dictionary
            self.m_configDict = configuration(self.configFile, True).m_dictionary
            
            #Initialize Oracle instance along with connection
            self.m_oracle_db = Oracle(self.m_configDict, self.m_logger)

        except Exception as exp:
                   # An exception occurred
            self.m_logger.error("Unable to initialize the configuration " + str(exp))
            print("ERROR: Unable to initialize the configuration for logger " + str(exp))
            sys.exit(1)

    def readMktConfigFile(self, mktConfigFile):
        """
        Purpose - To read the content of mktConfigFile into the global dictionary m_mktConfigDict for reference
        :param mktConfigFile:
        :return:
        """
        try:
            self.m_mktConfigDict = configuration(mktConfigFile, True).m_dictionary
        except Exception as exp:
                   # An exception occurred
            self.m_logger.error("Unable to initialize the configuration for logger " + str(exp))
            print("ERROR: Unable to initialize the configuration for logger " + str(exp))
            sys.exit(1)

    def chkActiveLoads(self):
        """
        Purpose - To check the count of active Active loads happening at a given point
        :param None: None at this point
        :return:
        """
        try:
            if self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_check_flag"] == 'Y': 
                localActiveLoadMax = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max"]
                localActiveLoadWaitTime = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_wait_time"]
                localActiveLoadMaxWaitTime = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max_wait_time"]

                mySql = ""
                myParams = ""
                mySql = self.m_configDict["SQL"]["get_active_loads"]
    
                activeFlag=1
                totalActiveWaitTime=0
                while activeFlag:
                    returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
                    if self.debugFlag:
                        print "mySql = ", mySql
                        print "returnStr = ", returnStr
                        print "chkActiveLoads - Active Loads value = ", int(returnStr[1].strip())
                    if returnStr[0] != '0':
                        self.m_logger.info("Retry after delay., Unable to get active loads using sql " + mySql + ". Error = " + returnStr[1])
                        local_delay_time = int(self.m_configDict["SQL"]["delay_time"])
                        time.sleep(local_delay_time)
                        returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
                        if returnStr[0] != '0':
                            self.m_logger.error("Unable to get active loads using sql " + mySql + ". Error = " + returnStr[1])
                            return 1

                    #Check if actual active loads is <= configured active loads.  If so, return out of the fn
                    if int(returnStr[1].strip()) <= localActiveLoadMax:
                        activeFlag=0
                        return 0

                    #Sleep for time defined by configured value for "active_load_wait_time"
                    time.sleep(localActiveLoadWaitTime)
                    totalActiveWaitTime += localActiveLoadWaitTime

                    #Check if actual Total wait time is > configured total wait time.  If so, throw an error and exit
                    if totalActiveWaitTime > localActiveMaxWaitTime:
                        self.m_logger.error("In Fn chkActiveLoads.  Total Actual Wait Time exceeds the configured value active_load_max_wait_time.  Either cleanup orphaned loads or increase the either active_load_max or active_load_max_wait_time. totalActiveWaitTime = " + str(totalActiveWaitTime) + " localActiveMaxWaitTime=" + str(localActiveMaxWaitTime))
                        return 1
            else:
                return 0

            #Return failure
            return 1
                    
        except Exception as exp:
            self.m_logger.error("Failure in chkActiveLoads process for file with the error " + str(exp))
            return 1

    def chkRaceStatus(self):
        """
        Purpose - To check if a load is already running for the given dataset
        :param None: None at this point
        :return:
        """
        try:
            if self.m_mktConfigDict["RACE"]["race_status_check_flag"] == 'Y':
                localRaceStatusWaitTime = int(self.m_mktConfigDict["RACE"]["race_status_wait_time"])
                localRaceStatusMaxWaitTime = int(self.m_mktConfigDict["RACE"]["race_status_max_wait_time"])

                mySql = ""

                #myParams = {"datasetName":self.datasetName}
                tempSql = self.m_configDict["SQL"]["get_race_status"]
                myParamsDict = { 'datasetName' : self.datasetName }
                tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
                mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
    
                raceFlag=1
                totalRaceStatusWaitTime=0
                while raceFlag:
                    returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
                    if self.debugFlag:
                        print "tempSql = ", tempSql
                        print "myParamsDict = ", myParamsDict
                        print "mySql = ", mySql
                        print "returnStr = ", returnStr
                        print "chkRaceStatus - ReturnCode = ", int(returnStr[1].strip())
                    if returnStr[0] != '0':
                        self.m_logger.info("Retry after delay., Unable to get race status using sql " + mySql + ". Error = " + returnStr[1])
                        local_delay_time = int(self.m_configDict["SQL"]["delay_time"])
                        time.sleep(local_delay_time)
                        returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
                        if returnStr[0] != '0':
                            self.m_logger.error("Unable to get race status using sql " + mySql + ". Error = " + returnStr[1])
                            return 1

                    #Check if the load for this dataset is already running. If not, exit out of the function with normal return value
                    if int(returnStr[1].strip()) <= 1:
                        raceFlag=0
                        return 0

                    #Check if actual Total wait time is > configured total wait time.  If so, throw an error and exit


                    #Sleep for time defined by configured value for "active_load_wait_time"
                    time.sleep(localRaceStatusWaitTime)
                    #time.sleep(90)
                    totalRaceStatusWaitTime += localRaceStatusWaitTime
                    if self.debugFlag:
                        print "totalRaceStatusWaitTime = ", totalRaceStatusWaitTime, "localRaceStatusWaitTime =", localRaceStatusWaitTime
                    if totalRaceStatusWaitTime > localRaceStatusMaxWaitTime:
                        self.m_logger.error("In Fn chkRaceStatusLoads.  Total Actual Wait Time exceeds the configured value race_status_max_wait_time.  Either check if the Dataset is getting loaded or increase the either active_load_max or active_load_max_wait_time. totalRaceStatusWaitTime = " + str(totalRaceStatusWaitTime) + " localRaceStatusMaxWaitTime=" + str(localRaceStatusMaxWaitTime))
                        return 1
            else:
                return 0

            #Return failure
            return 1
                    
        except Exception as exp:
            self.m_logger.error("Failure in chkRaceStatus process for file with the error " + str(exp))
            return 1

    def loadData(self,localDataFile, localFileID, localFileIDQueue, localDBFlag, dataFileFlag, localRecordCount):
        """
        Purpose - To load the given datafile to the S3 bucket specified in the global mktConfigFile
        :param localDataFile: Source datafile to be uploaded to S3
        :param localFileID: Internal File ID assigned to the source datafile
        :param localFileIDQueue: Queue in which, results of the operation is stored
        :return:
        """
        try:
            if self.debugFlag:
                print "Inside loadData function"

            if localDBFlag:
                raceStatusReturnValue=self.chkRaceStatus()
                if self.debugFlag:
                    print "raceStatusReturnValue=", raceStatusReturnValue

                if raceStatusReturnValue:
                    self.m_logger.error("Failure value returned by chkRaceStatus fn.  Return value = " + str(raceStatusReturnValue))
                    localFileIDQueue.put((localFileID, raceStatusReturnValue))
                    return 1

                activeLoadsReturnValue=self.chkActiveLoads()
                if activeLoadsReturnValue:
                    self.m_logger.error("Failure value returned by chkActiveLoads fn.  Return value = " + str(activeLoadsReturnValue))
                    localFileIDQueue.put((localFileID, activeLoadsReturnValue))
                    return 1
    
                processID = os.getpid()
                hostName = socket.gethostname()

                #Insert Process status into Oracle db
                #DB_CALL - sp_ddy_insert_process_status(RUNID, FILE_ID, etc)
                mySql = ""
                myParams = ""
                tempSql = self.m_configDict["SQL"]["put_process_status"]
                pStatus = 'P'
                pComment = 'Load started'
                myParamsDict = {"datasetName":self.datasetName, "runID":str(self.runID), "fileID":str(localFileID), "fileName":localDataFile, "tDate":str(self.tradeDate), "processID":str(processID), "hostName":hostName, "status":pStatus , "lcomment":pComment, "tidalRunID":str(self.tidalRunID)}
                tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
                mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)

                returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)

                if self.debugFlag:
                    print "tempSql = ", tempSql
                    print "myParamsDict = ", myParamsDict
                    print "mySql = ", mySql
                    print "returnStr = ", returnStr
    
                if returnStr[0] != '0':
                    self.m_logger.info("Retry after delay., Unable to put process status info into the database using sql " + mySql + ". Error = " + returnStr[1])
                    local_delay_time = int(self.m_configDict["SQL"]["delay_time"])
                    time.sleep(local_delay_time)
                    returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
                    if returnStr[0] != '0':
                         self.m_logger.error("Unable to put process status info into the database using sql " + mySql + ". Error = " + returnStr[1])
                         localFileIDQueue.put((localFileID, 1))
                         return 1

            #Call s3.gettoken to get the token and establish connection

            sourceFileWthPath = localDataFile
            
            #Commented the following lines to move getToken outside parallel thread
            # Keep it until we test all 93 loads and remove it
            #s3object = S3(self.mktConfigFile, self.m_logger)
            #s3object.getToken()
            ##sourceFileWthPath =  s3object.m_configfile["S3"]["source_file"]

            targetFileWthPath = os.path.join(self.s3object.m_configFile["S3"]["path"], os.path.basename(sourceFileWthPath))
            targetBucket = self.s3object.m_configFile["S3"]["bucket"]
            encryptKeyFlag = self.s3object.m_configFile["S3"]["encrypt_key"]

            local_aws_retries = int(self.m_mktConfigDict["ENV"]["aws_retries"])
            if self.debugFlag:
                print("sourceFileWthPath =", sourceFileWthPath)
                print("targetFileWthPath =", targetFileWthPath)
                print("targetBucket =", targetBucket)
                print("encryptKeyFlag =", encryptKeyFlag)
                print("local_aws_retries =", local_aws_retries)

            #Get size of the file
            sourceSize = os.stat(sourceFileWthPath).st_size

            multiPartFlag=False
            GBFACTOR = float(1<<30)
            #Check if the given file is greater than 4.5 GB.  Limit on AWS > 5 GB on single part upload
            if float(sourceSize/GBFACTOR) > 4.5:
                multiPartFlag=True

            init_count = 0
            self.m_logger.info("Started Xfer of Source File " + sourceFileWthPath  + " with size " + str(sourceSize) + " to target " + targetFileWthPath)
            while (init_count < local_aws_retries):
                loadReturnValue = 0

                #Call s3.dataUpload to load the data (single part load)

                if multiPartFlag:
                    if self.debugFlag:
                        print "Inside Multipart load.  File size = ", sourceSize
                    loadReturnValue = self.s3object.loadDataMultiPart(sourceFileWthPath, targetFileWthPath, targetBucket, encryptKeyFlag, self.bytes_per_chunk)
                else:
                    if self.debugFlag:
                        print "Inside Singlepart load.  File size = ", sourceSize
                    loadReturnValue = self.s3object.loadDataSinglePart(sourceFileWthPath, targetFileWthPath, targetBucket, encryptKeyFlag )
                if self.debugFlag:
                    print "loadReturnValue = ", loadReturnValue

                #Check if we are sending data file.  If so, we need to generate a complete file and send it along
                if (dataFileFlag) and (loadReturnValue == 0):
                    completeFile = localDataFile.split(".")[0] + self.compFilePattern
   
                    sourceFileWthPath = self.m_mktConfigDict["ENV"]["donefile_dir"] + "/" +  os.path.basename(completeFile)
                    if self.debugFlag:
                        print("completeFile =", completeFile)
                        print("sourceFileWthPath =", sourceFileWthPath)
                
                    with open(sourceFileWthPath,"w") as finraMnFH:
                        finraMnFH.write("{0},{1}\n".format(str(self.tradeDate),str(localRecordCount)))

                    targetFileWthPath = os.path.join(self.s3object.m_configFile["S3"]["path"], os.path.basename(sourceFileWthPath))
                    sourceSize = os.stat(sourceFileWthPath).st_size
                    self.m_logger.info("Started Xfer of complete file " + sourceFileWthPath  + " with size " + str(sourceSize) + " to target " + targetFileWthPath)
                    loadReturnValueCompleteFile = self.s3object.loadDataSinglePart(sourceFileWthPath, targetFileWthPath, targetBucket, encryptKeyFlag )

                    if loadReturnValueCompleteFile:
                        loadReturnValue = 1
                    #End of loadReturnValueCompleteFile If

                if loadReturnValue == 0:
                    pStatus = 'S'
                    pComment = 'Load completed'
                    break
                else:
                    pStatus = 'F'
                    pComment = 'Load failed'
                    init_count += 1


            if localDBFlag:
                #Call DB to insert 'S' or 'F' in tb_ddy_process_status
                #localFileIDQueue.put((localFileID, loadReturnValue))
                mySql = ""
                myParams = ""
                tempSql = self.m_configDict["SQL"]["put_process_status"]
                myParamsDict = {"datasetName":self.datasetName, "runID":str(self.runID), "fileID":str(localFileID), "fileName":localDataFile, "tDate":str(self.tradeDate), "processID":str(processID), "hostName":hostName, "status":pStatus , "lcomment":pComment, "tidalRunID":str(self.tidalRunID)}
                tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
                mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
                returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
                if self.debugFlag:
                    print "tempSql = ", tempSql
                    print "localFileID = ", localFileID
                    print "myParamsDict = ", myParamsDict
                    print "mySql = ", mySql
                    print "returnStr = ", returnStr

                if returnStr[0] != '0':
                    self.m_logger.info("Retry after delay., Unable to put process status info into the database using sql " + mySql + ". Error = " + returnStr[1])
                    local_delay_time = int(self.m_configDict["SQL"]["delay_time"])
                    time.sleep(local_delay_time)
                    returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
                    if returnStr[0] != '0':
                        self.m_logger.error("Unable to put process status info into the database using sql " + mySql + ". Error = " + returnStr[1])
                        localFileIDQueue.put((localFileID, 1))
                        return 1

                if self.debugFlag:
                    print "localFileID = ", localFileID
                    print "loadReturnValue = ", loadReturnValue
                    print 'Memory usage: %s (kb)' % resource.getrusage(resource.RUSAGE_SELF).ru_maxrss

                localFileIDQueue.put((localFileID,loadReturnValue))
            else:
                return loadReturnValue
      
        except Exception as exp:
            self.m_logger.error("Failure in loadData process for file with the error " + str(exp))
            if localDBFlag:
                localFileIDQueue.put(localFileID, 1)
            else:
                return 1

    def createFinraManifestFile(self, manifestFile):
        try:
            # Read Manifest file to get info like total rows, total size & other details to populate the done file for FINRA
            if self.debugFlag:
                print "Inside createFinraManifestFile fuction"

            with open(manifestFile,"r") as fh:
                self.totalRows = 0
                self.totalSize = 0
                self.fileCount = 0
                self.fileDict = {}
                for data in fh:
                    data.rstrip("\n")
                    # Exclude any entry with the pattern "start-of-day"
                    if self.sodFilePatternSearch in data:
                        continue
                    mylist = []
                    self.fileCount +=1
                    mylist = data.split("|")
                    self.fileDict[self.fileCount] = [mylist[0],os.path.basename(mylist[1]),int(mylist[2]),int(mylist[3])]
                    self.totalRows += int(mylist[3])
                    self.totalSize += int(mylist[2])
                if self.debugFlag:
                    print "self.fileDict = ", self.fileDict
        except Exception as exp:
            self.m_logger.error("Failed while processing readManifest with Error = " + str(exp))
            return 1

        try:
            #Use self.defautltsFile which is populated from the db later. No need to get it from config file
            self.defaultsFileWthPath = self.m_mktConfigDict["DATASET"]["defaults_dir"] + "/" + self.defaultsFile
            with open(self.defaultsFileWthPath,"r") as fh:
                self.defaultsDict = {}
                self.defaultsCount  = 0
                for data in fh:
                    data.rstrip('\n')
                    self.defaultsCount +=1
                    self.defaultsDict[self.defaultsCount]=data
                if self.debugFlag:
                    print "After Defaults, self.fileDict = ", self.fileDict

        except Exception as exp:
            self.m_logger.error("Failed while processing defaults file " + self.defaultsFileWthPath + " with Error = " + str(exp))
            return 1

        try:
            # Not needed as the naming convention is changed
            #self.finraManifestFile =  self.m_mktConfigDict["ENV"]["donefile_dir"] + "/" +  os.path.basename(manifestFile) + ".done"

            #Changing the EOD naming convention per Finra's requirement
            if self.debugFlag:
                print "self.eodFilePattern = ", self.eodFilePattern

            myParamsDict = {'datasetName':self.datasetName.lower(), 'tradeDate':str(self.tradeDate)}
            tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
            eodFileName = re.sub( tempGrp, lambda m:myParamsDict[m.group()], self.eodFilePattern)
            if self.debugFlag:
                print "eodFileName = ", eodFileName

            self.finraManifestFile =  self.m_mktConfigDict["ENV"]["donefile_dir"] + "/" +  eodFileName
            if self.debugFlag:
                print "self.finraManifestFile = ", self.finraManifestFile
            
            with open(self.finraManifestFile,"w") as finraMnFH:
                finraMnFH.write("# AWS RunID : {}\n".format(str(self.runID)))
                finraMnFH.write("# Dataset : {0} , TradeDate : {1}\n".format(str(self.datasetName),str(self.tradeDate)))
                finraMnFH.write("total_compressed={}\n".format(self.totalSize))
                finraMnFH.write("total_rows={}\n".format(self.totalRows))
                finraMnFH.write("no of files={}\n".format(self.fileCount))
   
                for key,val in self.fileDict.items():
                    finraMnFH.write("file_{0}={1}\n".format(str(key),val[1]))
                    finraMnFH.write("file_{0}_rows={1}\n".format(str(key),val[3]))
  
                finraMnFH.write("# Data Attributes\n")
                for key,val in self.defaultsDict.items():
                    finraMnFH.write("{0}".format(str(val)))
            return 0
        except Exception as exp:
            self.m_logger.error("Failed while creating AWS Done file " + self.finraManifestFile + " with Error = " + str(exp))
            return 1

    def processLoader(self, manifestFile, datasetName, tidalRunID):
        """
        Purpose - Function responsible for reading the manifest file, get market name, call multiprocess load and other db calls
        :param manifestFile: Manifest File
        :param tradeDate: Trade Date
        :param tidalRunID: Tidal Run ID 
        :return:
        """
        try:
            # Read the manifest filename and get the suffix i.e. datasetname
            # Assumption - Manifest file format - manifest.<datasetName>.<tradeDate>.<tidalRunID>
            # Program will break otherwise

            self.datasetName = datasetName
            self.tidalRunID = tidalRunID

            # DB_CALL
            # Make database call sp_ddy_get_market_info(datasetname) and get market info
            mktName = ''

            self.manifestFile = self.m_configDict["ENV"]["manifestfile_dir"] + "/" + manifestFile

            ##Validate Manifest file
            if not os.path.isfile(self.manifestFile):
                self.m_logger.error("Invalid manifest file " + self.manifestFile)
                sys.exit(1)

            if self.debugFlag:
                print "Inside processLoader"
                print "DatasetName = ", self.datasetName
                print "ManifestFile = ", manifestFile
                print "Self ManifestFile = ", self.manifestFile
                print "TidalRunID = ", self.tidalRunID
                print "DebugFlag = ", self.debugFlag
                print "confDict = ", self.m_configDict

            # Enable this one the proc to get mkt name and default file are ready and test it
            tempSql = self.m_configDict["SQL"]["get_mkt_defaults_filename"]
            myParamsDict = { 'datasetName' : self.datasetName }
            tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
            mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
            returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)

            if self.debugFlag:
                print "tempSql = ", tempSql
                print "myParamsDict = ", myParamsDict
                print "mySql = ", mySql
                print "returnStr = ", returnStr

            if returnStr[0] == '0':
                mktName = returnStr[2].strip()
                self.defaultsFile = returnStr[3].strip()
            else:
                self.m_logger.info("Retry after delay., Unable to get market info from the database using sql " + mySql + ". Error = " + returnStr[1])
                local_delay_time = int(self.m_configDict["SQL"]["delay_time"])
                time.sleep(local_delay_time)
                returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
                if returnStr[0] == '0':
                    mktName = returnStr[2].strip()
                    self.defaultsFile = returnStr[3].strip()
                else:
                    self.m_logger.error("Unable to get market info from the database using sql " + mySql + ". Error = " + returnStr[1])
                    sys.exit(1)

            if self.debugFlag:
                print "MktName from DB = ", mktName
                print "Defaults = ", self.defaultsFile
          
            #Build the string for mktConfigFile based on mktName and configFile info
            self.mktConfigFile = os.path.dirname(self.configFile) + '/' + os.path.basename(self.configFile).split('.',1)[0].strip() + '_' + mktName.lower() + '.' + os.path.basename(self.configFile).split('.',1)[1].strip()

            if self.debugFlag:
                print("mktConfigFile = ", self.mktConfigFile)

            #Validate Manifest file is a valid file
            if not os.path.isfile(self.mktConfigFile):
                self.m_logger.error("Invalid market manifest file " + self.mktConfigFile)
                sys.exit(1)

            # Read Market specific config file and store it in a specific dictionary
            self.readMktConfigFile(self.mktConfigFile)

            if self.debugFlag:
                print("m_mktConfigDict=",self.m_mktConfigDict)

            # Read the contents of manifest - dataFileNames into a list - Will validate the datafiles as well

            localManifest = Manifest()
            manifestDelim = self.m_configDict["ENV"]["manifest_delim"]

            manifestFileList = localManifest.readManifest(self.manifestFile, self.m_logger, manifestDelim, self.debugFlag)

            # Get RunID
            self.runID = generate_runId()
            if self.debugFlag:
                print("RunID = ", self.runID)
                #print("manifestFileList = ", manifestFileList)

            #Call Oracle fn to insert status 'P' into TB_DDY_DATASET_TRANS with RUNID etc
            #DB_CALL
            # Make database call sp_ddy_insert_dataset_trans and insert data that process started

            mySql = ""
            myParams = ""
            tempSql = self.m_configDict["SQL"]["put_dataset"]
            pStatus = 'P'

            myParamsDict = {'datasetName':self.datasetName, 'runID': str(self.runID), 'tDate':str(self.tradeDate), 'status': pStatus, 'tidalRunID':str(self.tidalRunID)}
            tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
            mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)

            returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)

            if self.debugFlag:
                print "tempSql = ", tempSql
                print "myParamsDict = ", myParamsDict
                print "mySql = ", mySql
                print "returnStr = ", returnStr

            if returnStr[0] != '0':
                self.m_logger.info("Retry after delay., Unable to put dataset info into the database using sql " + mySql + ". Error = " + returnStr[1])
                local_delay_time = int(self.m_configDict["SQL"]["delay_time"])
                time.sleep(local_delay_time)
                returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
                if returnStr[0] != '0':
                    self.m_logger.error("Unable to put dataset info into the database using sql " + mySql + ". Error = " + returnStr[1])
                    sys.exit(1)

            # Insert Manifest data in db and call multiprocessing s3 loader process.  Shd we add RUN_ID to manifest table

            #For each datafile, generate fileID and call loadData fn using multiprocess to load data into AWS
            for dataRecord in manifestFileList:
                mySql = ""
                myParams = ""
                tempSql = self.m_configDict["SQL"]["put_manifest"]
                myParamsDict = {"datasetName":self.datasetName, "runID": str(self.runID), "tDate":str(self.tradeDate), "dataFileName":dataRecord[1], "manifestFileName":manifestFile , "noOfRecords": str(dataRecord[3]), "fileSize":str(dataRecord[2]), "tidalRunID":str(self.tidalRunID)}
                tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
                mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
     
                returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
                #if self.debugFlag:
                    #print "tempSql = ", tempSql
                    #print "myParamsDict = ", myParamsDict
                    #print "mySql = ", mySql
                    #print "returnStr = ", returnStr

                if returnStr[0] != '0':
                    self.m_logger.info("Retry after delay., Unable to put manifest info into the database using sql " + mySql + ". Error = " + returnStr[1])
                    local_delay_time = int(self.m_configDict["SQL"]["delay_time"])
                    time.sleep(local_delay_time)
                    returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
                    if returnStr[0] != '0':
                        self.m_logger.error("Unable to put manifest info into the database using sql " + mySql + ". Error = " + returnStr[1])
                        sys.exit(1)

            # Initialize S3 object and get FINRA cloud service token and establish s3 session
            self.currentEpochTime = int(time.time())

            self.s3object = S3(self.mktConfigFile, self.m_logger, self.debugFlag)
            self.s3object.getToken()
            if self.debugFlag:
                print "self.currentEpochTime = ", self.currentEpochTime

            process_count = int(self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max"])

            fileID=1
            fileIDQueue = Queue()
            dbFlag=1
            procs = []
            doneCounter = 0
            sendCounter = 0
            processFlag=0
            failureFlag=0
            

            #Get chunk size from config file for multipart uploads
            self.bytes_per_chunk = int(self.m_configDict["DATASET"]["bytes_per_chunk"])
            
            self.sodFilePatternSearch = self.m_configDict["ddy"]["SOD_FILE_PATTERN_SEARCH"]
            # Following variables are used across the class.  Hence, assigned to self variables
            self.eodFilePattern = self.m_configDict["ddy"]["EOD_FILE_PATTERN"]
            self.compFilePattern = self.m_configDict["ddy"]["COMP_FILE_PATTERN"]
            self.sodFileCheck = self.m_configDict["ddy"]["SOD_FILE_CHECK"].strip().upper()

            manifestListItems = len(manifestFileList)
            if self.debugFlag:
                print "bytes_per_chunk = ", self.bytes_per_chunk
                print "self.sodFilePatternSearch = ", self.sodFilePatternSearch
                print "self.eodFilePattern = ", self.eodFilePattern
                print "self.compFilePattern = ", self.compFilePattern
                print "self.sodFileCheck = ", self.sodFileCheck
                print "manifestListItems = ", manifestListItems

            dataFileFlag=False
            sodFileProcessedFlag=0
            max_batches= int(math.ceil(float(len(manifestFileList))/process_count)) 
            batch_count=0

            while doneCounter < manifestListItems and failureFlag == 0 :
                while sendCounter < manifestListItems and sendCounter - doneCounter < process_count and failureFlag == 0:
                    if self.sodFilePatternSearch in manifestFileList[sendCounter][1] and not sodFileProcessedFlag:
                        dataFileFlag=False
                        sodFileLoadStatus=self.loadData(manifestFileList[sendCounter][1] ,fileID, fileIDQueue, dbFlag, dataFileFlag, 0)
                        if sodFileLoadStatus:
                            self.m_logger.error("Unable to push Start of Day file to FINRA.  Exiting.. ")
                            sys.exit(1)
                        sodFileProcessedFlag=1
                        sendCounter += 1
                        qFileID = 0
                        qRestult = 0
                        qFileID, qResult = fileIDQueue.get()
                        doneCounter += 1
                        fileID += 1
                    else:
                        if self.sodFileCheck == 'Y':
                            if not sodFileProcessedFlag:
                                self.m_logger.error("No Start of day file. Please add SOD file to the generate manifest.  Exiting.. ") 
                                sys.exit(1)
    
                        dataFileFlag=True
                        if self.debugFlag:
                            print "manifestFileList[sendCounter][1]", manifestFileList[sendCounter][1], "fileID = ", fileID
                        processHandle = Process(target=Loader.loadData, args=(self, manifestFileList[sendCounter][1],fileID, fileIDQueue, dbFlag, dataFileFlag, manifestFileList[sendCounter][3]))
                        processFlag=1
                        
                        s3TimeoutTime = int(self.m_configDict["ddy"]["S3_TIMEOUT_TIME"])
                        if ((int(time.time()) - self.currentEpochTime) > s3TimeoutTime):
                            self.currentEpochTime = int(time.time())
                            self.m_logger.info("Getting New Token for Batch : {0}, Max batches : {1}".format(batch_count,max_batches))
                            if self.debugFlag:
                                print "Inside get new token - self.currentEpochTime = ", self.currentEpochTime
                            self.s3object.getToken()

                        threadDelayTime = int(self.m_configDict["ddy"]["THREAD_DELAY_TIME"])
                        time.sleep(threadDelayTime)
                        processHandle.start()
                        procs.append(processHandle)
                        sendCounter += 1
                        fileID += 1
                        if processFlag and ( sendCounter - doneCounter == process_count or sendCounter == manifestListItems ) : 
                            batch_count += 1
                            self.m_logger.info("Waiting for Batch : {0} to complete. No of active workers : {2}.  Max batches : {1}".format(batch_count,max_batches,sendCounter-doneCounter))
                            for p in procs:
                                p.join()
                            processFlag=0
                        if self.debugFlag:
                            print "Before fileIDQueue - ProcessFlag = ", processFlag, "sendCounter = ", sendCounter, "doneCounter = ", doneCounter, "manifestListItems = ", manifestListItems
                        while not fileIDQueue.empty():  # process completed results as they arrive
                            qFileID = 0
                            qRestult = 0
                            qFileID, qResult = fileIDQueue.get()
                            if self.debugFlag:
                                print("qFileID = ", qFileID, "qResult = ", qResult)
                            doneCounter += 1
                            if qResult:
                                failureFlag = 1
                        if self.debugFlag:
                            print "After fileIDQueue - ProcessFlag = ", processFlag, "sendCounter = ", sendCounter, "doneCounter = ", doneCounter, "manifestListItems = ", manifestListItems, "failureFlag = ", failureFlag
                        if failureFlag:
                            break
                        #Check to see if specified time has passed.  If so get another token to avoid expiration.  Required for large datasets
                        if self.debugFlag:
                            print 'Memory usage: %s (kb)' % resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
                            print "self.currentEpochTime = ", self.currentEpochTime
                            print "Current Time in Epoch = ", int(time.time())


                    # End of else
                #End of Inner While
            #End of Outer While

            if failureFlag:
                pStatus = 'F'
            else:
                pStatus = 'S'
                """
                #Generate FINRA Manifest file and Push it to AWS 
                """

                # Call Divakar's finra manifest generate function
                returnValue = self.createFinraManifestFile(self.manifestFile)
                if self.debugFlag:
                    print "Post createFinraManifestFile fn - return value= ", returnValue
    
                if returnValue:
                    self.m_logger.error("Unable to generate done file. Please fix the issue the re-run the load")
                    #sys.exit(1)
                    failureFlag=1
                    pStatus = 'F'
                else:

                    dbFlag=0
                    fileID=0
                    # Call the loader function with the manifest file
                    finraManifestLoadStatus=0
                    dataFileFlag=False
                    finraManifestLoadStatus=self.loadData(self.finraManifestFile ,fileID, fileIDQueue, dbFlag, dataFileFlag, 0)
    
                    if finraManifestLoadStatus:
                        pStatus = 'F'
                        self.m_logger.error("Unable to load finra manifest file ")

            #Call Oracle fn to insert status 'S' into TB_DDY_DATASET_TRANS with RUNID etc
            #DB_CALL
            # Make database call sp_ddy_insert_dataset_trans and insert data based on Failure or Success

            mySql = ""
            myParams = ""
            tempSql = self.m_configDict["SQL"]["put_dataset"]
            
            myParamsDict = {"datasetName":self.datasetName, "runID": str(self.runID), "tDate":str(self.tradeDate), "status": pStatus, "tidalRunID":str(self.tidalRunID)}
            tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
            mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)

            returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)

            if self.debugFlag:
                print "tempSql = ", tempSql
                print "myParamsDict = ", myParamsDict
                print "mySql = ", mySql
                print "returnStr = ", returnStr

            if returnStr[0] != '0':
                self.m_logger.info("Retry after delay., Unable to put dataset info into the database using sql " + mySql + ". Error = " + returnStr[1])
                local_delay_time = int(self.m_configDict["SQL"]["delay_time"])
                time.sleep(local_delay_time)
                returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
                if returnStr[0] != '0':
                    self.m_logger.error("Unable to put dataset info into the database using sql " + mySql + ". Error = " + returnStr[1])
                    sys.exit(1)

            if failureFlag:
                self.m_logger.error("Load failed")
                sys.exit(1)

        except Exception as e:
            self.m_logger.error("ProcessLoader failed with error " + str(e))
            sys.exit(1)
Exemplo n.º 25
0
class Recon():
    #class variables
    m_logger = ""

    #database objects
    m_oracle_db = ""
    m_netezza_db = ""

    def __init__(self, configFile, tradeDate, debugFlag):
        """
        Purpose: Constructor
        :param self:        class object itself
        :param configFile:  Configuration file to use
        """

        # Initialize m_logger object from class Logger and add Header to the log, using addGenericInfo function
        self.m_logger = Logger(logging.INFO, configFile, tradeDate)
        self.m_logger.addFileHandler(logging.DEBUG)
        self.m_logger.addGenericInfo(__file__)

        self.tradeDate = tradeDate
        self.debugFlag = debugFlag
        self.configFile = configFile

        try:
            # Get configuration to a dictionary
            self.m_configDict = configuration(self.configFile, True).m_dictionary

            #Initialize Oracle instance along with connection
            self.m_oracle_db = Oracle(self.m_configDict, self.m_logger)

        except Exception as exp:
                   # An exception occurred
            self.m_logger.error("Unable to initialize the configuration " + str(exp))
            print("ERROR: Unable to initialize the configuration for logger " + str(exp))
            sys.exit(1)

    def readMktConfigFile(self, mktConfigFile):
        """
        Purpose - To read the content of mktConfigFile into the global dictionary m_mktConfigDict for reference
        :param mktConfigFile:
        :return:
        """
        try:
            self.m_mktConfigDict = configuration(mktConfigFile, True).m_dictionary
        except Exception as exp:
                   # An exception occurred
            self.m_logger.error("Unable to initialize the configuration for logger " + str(exp))
            print("ERROR: Unable to initialize the configuration for logger " + str(exp))
            sys.exit(1)

    def processRecon(self, tidalRunID):
        """
        Purpose - Function responsible for reading the datasets, get market name, call AWS ack files and other db calls

        :param tradeDate: Trade Date
        :param tidalRunID: Tidal Run ID
        :return:
        """
        try:
            # Read the manifest filename and get the suffix i.e. datasetname
            # Assumption - Manifest file format - manifest.<datasetName>.<tradeDate>.<tidalRunID>
            # Program will break otherwise

            self.tidalRunID = tidalRunID

            # DB_CALL
            # Make database call sp_ddy_get_market_info(datasetname) and get market info
            mktName = ''

            # select * from TB_DDY_PROCESS_STATUS where CREATE_TIME > SYSDATE - INTERVAL '1' DAY
            # select * from TB_DDY_MANIFEST_TRANS where  CREATE_TIME > SYSDATE - INTERVAL '1' DAY;
            # select * from TB_DDY_DATASET_MASTER where dataset_id = 49

            tempSql = "select DM.DATASET_NAME , PS.FILE_ID, TO_CHAR(PS.TRADE_DATE,'YYYYMMDD'), PS.RUN_ID, PS.FILE_NAME from TB_DDY_PROCESS_STATUS PS" \
                      " INNER JOIN TB_DDY_MANIFEST_TRANS MT ON MT.RUN_ID= PS.RUN_ID" \
                      " INNER JOIN TB_DDY_DATASET_MASTER DM ON DM.DATASET_ID= PS.DATASET_ID" \
                      " WHERE PS.CREATE_TIME > SYSDATE - INTERVAL '1' DAY" \
                      " AND NOT EXISTS " \
                      " ( SELECT 1 FROM TB_DDY_PROCESS_STATUS PS1 WHERE PS1.RUN_ID = PS.RUN_ID and PS1.FILE_ID = PS.FILE_ID and PS.STATUS = 'R')" \
                      #" AND PS.STATUS = 'S'" \
                      #" AND rownum < 10000"


            print(tempSql)
            #myParamsDict = {}
            #tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
            #mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
            returnList = self.m_oracle_db.runSqlWthParamsGetMultipleRows(tempSql)
            # Remove last element
            #print(returnList)
            #returnList.pop()

            returnDataDict= {d[0]: ','.join(d[2:]) if d[2] else 0 for d in returnList}
            returnDict = {d[0]+"_"+d[1]+"_"+d[2]: ','.join(d[0:]) if d[1:] else 0 for d in returnList}

            #print(returnDataDict)
            #print(returnDict)

            self._sqlerror_ = 0
            returnMktList = []
            for datasetName in returnDataDict:
                tempSql = self.m_configDict["SQL"]["get_mkt_defaults_filename"]
                myParamsDict = { 'datasetName' : datasetName }
                tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
                mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
                returnList = []
                returnList = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
                print(returnList[0])

                if int(returnList[0]) == 0:

                    if returnList[2] not in returnMktList:
                        returnMktList.append(returnList[2])
                    #returnMktList.append(returnList[2])
                    #print(datasetName, returnList[2] )
                elif int(returnList[0]) < 0:
                    self.m_logger.error("Error in Get Market Deafults Filename for Dataset : {1}, SQL : {0}".format(mySql,datasetName))
                    self._sqlerror_ += 1
                else:
                    self.m_logger.error("Warning in Get Market Deafults Filename for Dataset : {1}, SQL : {0}".format(mySql,datasetName))

            print(returnMktList)
      
            #returnMktList = ['NYSE']

            for mktName in returnMktList :

                #Build the string for mktConfigFile based on mktName and configFile info
                mktConfigFile = os.path.dirname(self.configFile) + '/' + os.path.basename(self.configFile).split('.',1)[0].strip() + '_' + mktName.lower() + '.' + os.path.basename(self.configFile).split('.',1)[1].strip()

                print("mktConfigFile = ", mktConfigFile)
                #Validata Manifest file is a valid file
                if not os.path.isfile(mktConfigFile):
                    #print "Inside invalid mktConfigFile" + self.mktConfigFile
                    self.m_logger.error("Invalid market manifest file " + mktConfigFile)
                    sys.exit(1)

                self.readMktConfigFile(mktConfigFile)

                self.s3object = S3(mktConfigFile, self.m_logger, self.debugFlag)
                self.s3object.getToken()
                #print(self.s3object.m_configFile["TOKEN"])
                print(self.s3object.m_configFile["S3"])
                bucket = self.s3object.m_configFile["S3"]["bucket"]
                path   = self.s3object.m_configFile["S3"]["path"]
                ackpath= self.s3object.m_configFile["S3"]["ack_path"]
                print(bucket, path)
                print(ackpath)
                #ackPath = bucket + "/" + str(self.s3object.m_configFile["S3"]["path"]) + "/" + "acknowldge"
                #ackPath = str(bucket) + "/" + str(path)
                encryptKeyFlag = self.s3object.m_configFile["S3"]["encrypt_key"]

                #myBucket = self.s3object.m_connection.get_bucket(bucket, validate = False)

                #for testkey in myBucket.list(prefix='50006/'):
                #     print("File = ", testkey.name)

                #b = self.s3object.m_connection.get_bucket(bucket)
                #rs = b.list()
                # get the result set from bucket
                #print(b.list())

                rs = self.s3object.listBucketWPath(bucket,ackpath)
                print(rs)


                #ackList = s3object.getBucketList(ackPath)
                #print(ackList)


        except:
            self.m_logger.error("Error while creating S3 recon file Exception : {0}".format(sys.exc_info()[0]))
            # Not exitting at this point
            #sys.exit(1)

        sys.exit(0)
Exemplo n.º 26
0
def Newton_V(Oracle, x0):
    ##### Initialisation des variables

    iter_max = 100
    # gradient_step_ini = 1. # Problème primal.
    gradient_step_ini = 1000  # Problème dual.
    threshold = 0.000001

    error_count = 0  # Compteur de non-convergence de l'algorithme de Fletcher-Lemarechal.

    gradient_norm_list = []
    gradient_step_list = []
    critere_list = []

    time_start = process_time()

    x = x0

    ##### Boucle sur les iterations
    for k in range(iter_max):
        # Valeur du critere et du gradient
        critere, gradient, hessien = Oracle(x, 7)

        # Test de convergence
        gradient_norm = norm(gradient)
        if gradient_norm <= threshold:
            break

        # Direction de descente
        direction = -dot(inv(hessien), gradient)

        # Pas de descente
        gradient_step, error_code = Wolfe(gradient_step_ini, x, direction,
                                          Oracle)

        if error_code != 1:
            error_count += 1

        # Mise a jour des variables
        x = x + (gradient_step * direction)

        # Evolution du gradient, du pas, et du critere
        gradient_norm_list.append(gradient_norm)
        gradient_step_list.append(gradient_step)
        critere_list.append(critere)

    if error_count > 0:
        print()
        print("Non-convergence de l'algorithme de Fletcher-Lemarechal : {}".
              format(error_count))

    ##### Resultats de l'optimisation
    critere_opt = critere
    gradient_opt = gradient
    x_opt = x
    time_cpu = process_time() - time_start

    print()
    print('Iteration :', k)
    print('Temps CPU :', time_cpu)
    print('Critere optimal :', critere_opt)
    print('Norme du gradient :', norm(gradient_opt))

    # Visualisation de la convergence
    Visualg(gradient_norm_list, gradient_step_list, critere_list)

    return critere_opt, gradient_opt, x_opt,
Exemplo n.º 27
0
Arquivo: dxt.py Projeto: tnraman/ddy
class Extractor():
    #class variables
    m_logger = ""

    #database objects
    m_oracle_db = ""

    def __init__(self, configFile, mktName, tradeDate, debugFlag):
        """
        Purpose: Constructor
        :param self:        class object itself
        :param configFile:  Configuration file to use
        """

        # Initialize m_logger object from class Logger and add Header to the log, using addGenericInfo function
        self.m_logger = Logger(logging.INFO, configFile, tradeDate)
        self.m_logger.addFileHandler(logging.DEBUG)
        self.m_logger.addGenericInfo(__file__)
        
        self.tradeDate = tradeDate
        self.debugFlag = debugFlag
        self.configFile = configFile
        self.mktName = mktName

        try:
            # Get configuration to a dictionary
            self.m_configDict = configuration(self.configFile, True).m_dictionary
            
            #Initialize Oracle instance along with connection
            self.m_oracle_db = Oracle(self.m_configDict, self.m_logger)

        except Exception as exp:
                   # An exception occurred
            self.m_logger.error("Unable to initialize the configuration " + str(exp))
            print("ERROR: Unable to initialize the configuration for logger " + str(exp))
            sys.exit(1)

    def readMktConfigFile(self, mktConfigFile):
        """
        Purpose - To read the content of mktConfigFile into the global dictionary m_mktConfigDict for reference
        :param mktConfigFile:
        :return:
        """
        try:
            self.m_mktConfigDict = configuration(mktConfigFile, True).m_dictionary
        except Exception as exp:
                   # An exception occurred
            self.m_logger.error("Unable to initialize the configuration for logger " + str(exp))
            print("ERROR: Unable to initialize the configuration for logger " + str(exp))
            sys.exit(1)

    def chkActiveLoads(self):
        """
        Purpose - To check the count of active Active loads happening at a given point
        :param None: None at this point
        :return:
        """
        try:
            if self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_check_flag"] == 'Y': 
                localActiveLoadMax = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max"]
                localActiveLoadWaitTime = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_wait_time"]
                localActiveLoadMaxWaitTime = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max_wait_time"]

                mySql = ""
                myParams = ""
                mySql = self.m_configDict["SQL"]["get_active_loads"]
    
                activeFlag=1
                totalActiveWaitTime=0
                while activeFlag:
                    returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
                    if self.debugFlag:
                        print "mySql = ", mySql
                        print "returnStr = ", returnStr
                        print "chkActiveLoads - Active Loads value = ", int(returnStr[1].strip())
                    if returnStr[0] != '0':
                        self.m_logger.error("Unable to get active loads using sql " + mySql + ". Error = " + returnStr[1])
                        sys.exit(1)

                    #Check if actual active loads is <= configured active loads.  If so, return out of the fn
                    if int(returnStr[1].strip()) <= localActiveLoadMax:
                        activeFlag=0
                        return 0

                    #Sleep for time defined by configured value for "active_load_wait_time"
                    time.sleep(localActiveLoadWaitTime)
                    totalActiveWaitTime += localActiveLoadWaitTime

                    #Check if actual Total wait time is > configured total wait time.  If so, throw an error and exit
                    if totalActiveWaitTime > localActiveMaxWaitTime:
                        self.m_logger.error("In Fn chkActiveLoads.  Total Actual Wait Time exceeds the configured value active_load_max_wait_time.  Either cleanup orphaned loads or increase the either active_load_max or active_load_max_wait_time. totalActiveWaitTime = " + str(totalActiveWaitTime) + " localActiveMaxWaitTime=" + str(localActiveMaxWaitTime))
                        return 1
            else:
                return 0

            #Return failure
            return 1
                    
        except Exception as exp:
            self.m_logger.error("Failure in chkActiveLoads process for file with the error " + str(exp))
            sys.exit(1)

    def chkRaceStatus(self):
        """
        Purpose - To check if a load is already running for the given dataset
        :param None: None at this point
        :return:
        """
        try:
            if self.m_mktConfigDict["RACE"]["race_status_check_flag"] == 'Y':
                localRaceStatusWaitTime = int(self.m_mktConfigDict["RACE"]["race_status_wait_time"])
                localRaceStatusMaxWaitTime = int(self.m_mktConfigDict["RACE"]["race_status_max_wait_time"])

                mySql = ""

                #myParams = {"datasetName":self.datasetName}
                tempSql = self.m_configDict["SQL"]["get_race_status"]
                myParamsDict = { 'datasetName' : self.datasetName }
                tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
                mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
    
                raceFlag=1
                totalRaceStatusWaitTime=0
                while raceFlag:
                    returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
                    if self.debugFlag:
                        print "tempSql = ", tempSql
                        print "myParamsDict = ", myParamsDict
                        print "mySql = ", mySql
                        print "returnStr = ", returnStr
                        print "chkRaceStatus - ReturnCode = ", int(returnStr[1].strip())
                    if returnStr[0] != '0':
                        self.m_logger.error("Unable to get race status using sql " + mySql + ". Error = " + returnStr[1])
                        sys.exit(1)

                    #Check if the load for this dataset is already running. If not, exit out of the function with normal return value
                    if int(returnStr[1].strip()) <= 1:
                        raceFlag=0
                        return 0

                    #Check if actual Total wait time is > configured total wait time.  If so, throw an error and exit


                    #Sleep for time defined by configured value for "active_load_wait_time"
                    time.sleep(localRaceStatusWaitTime)
                    #time.sleep(90)
                    totalRaceStatusWaitTime += localRaceStatusWaitTime
                    if self.debugFlag:
                        print "totalRaceStatusWaitTime = ", totalRaceStatusWaitTime, "localRaceStatusWaitTime =", localRaceStatusWaitTime
                    if totalRaceStatusWaitTime > localRaceStatusMaxWaitTime:
                        self.m_logger.error("In Fn chkRaceStatusLoads.  Total Actual Wait Time exceeds the configured value race_status_max_wait_time.  Either check if the Dataset is getting loaded or increase the either active_load_max or active_load_max_wait_time. totalRaceStatusWaitTime = " + str(totalRaceStatusWaitTime) + " localRaceStatusMaxWaitTime=" + str(localRaceStatusMaxWaitTime))
                        return 1
            else:
                return 0

            #Return failure
            return 1
                    
        except Exception as exp:
            self.m_logger.error("Failure in chkRaceStatus process for file with the error " + str(exp))
            sys.exit(1)

    def extractData(self,localDataRecordDict, localFileID, localFileIDQueue, localDBFlag):
        """
        Purpose - To load the given datafile to the S3 bucket specified in the global mktConfigFile
        :param localDataRecordDict: Datafile related info fetched from FINRA's manifest file including filename, filesize, recordcount
        :param localFileID: Internal File ID assigned to the local datafile
        :param localFileIDQueue: Queue in which, results of the operation is stored
        :param localDBFlag: Flag indicating if database should be used or not
        :return:
        """
        try:
            if self.debugFlag:
                print "Inside extractData function"
                print "localDataRecordDict = ", localDataRecordDict

            if localDBFlag:
                """ Not sure if we need Race Status check for Extract
                raceStatusReturnValue=self.chkRaceStatus()
                if self.debugFlag:
                    print "raceStatusReturnValue=", raceStatusReturnValue

                if raceStatusReturnValue:
                    self.m_logger.error("Failure value returned by chkRaceStatus fn.  Return value = " + str(raceStatusReturnValue))
                    localFileIDQueue.put((localFileID, raceStatusReturnValue))
                    return 1
                """

                """  Need to integrate Active loads with tb_dxt_process_status and tb_dxt_process_status ?
                activeLoadsReturnValue=self.chkActiveLoads()
                if activeLoadsReturnValue:
                    self.m_logger.error("Failure value returned by chkRaceStatus fn.  Return value = " + str(activeLoadsReturnValue))
                    localFileIDQueue.put((localFileID, raceStatusReturnValue))
                    return 1
                """

                processID = os.getpid()
                hostName = socket.gethostname()
                # Need to check the order
                test_var = str(self.m_configDict["dxt"]["DATA_FILE_NAME_STR"])
                localDataFile = localDataRecordDict[self.m_configDict["dxt"]["DATA_FILE_NAME_STR"]]
                localDataFileSize = int(localDataRecordDict[self.m_configDict["dxt"]["DATA_FILE_SIZE_STR"]])
                localDataFileRecordCount = int(localDataRecordDict[self.m_configDict["dxt"]["NO_OF_ROWS_STR"]])

                if self.debugFlag:
                    print "localDataFile = ", localDataFile
                    print "localDataFileSize = ", localDataFileSize
                    print "localDataFileRecordCount = ", localDataFileRecordCount

                #Insert Process status into Oracle db
                #DB_CALL - sp_dxt_insert_process_status(RUNID, FILE_ID, etc)
                mySql = ""
                myParams = ""
                tempSql = self.m_configDict["SQL"]["put_process_status"]
                pStatus = 'P'
                pComment = 'Load started'
                #myParamsDict = {"datasetName":self.datasetName, "runID":str(self.runID), "fileID":str(localFileID), "fileName":localDataFile, "tDate":str(self.tradeDate), "processID":str(processID), "hostName":hostName, "fileSize": localDataFileSize, "recordCount" : localDataFileRecordCount, "status":pStatus , "lcomment":pComment}
                myParamsDict = {"datasetName":self.datasetName, "runID":str(self.runID), "fileName":localDataFile, "tDate":str(self.tradeDate), "processID":str(processID), "hostName":hostName, "fileSize": str(localDataFileSize), "recordCount" : str(localDataFileRecordCount), "status":pStatus , "lcomment":pComment}
                tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
                mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)

                returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)

                if self.debugFlag:
                    print "tempSql = ", tempSql
                    print "myParamsDict = ", myParamsDict
                    print "mySql = ", mySql
                    print "returnStr = ", returnStr
    
                if returnStr[0] != '0':
                    self.m_logger.error("Unable to put process status info into the database using sql " + mySql + ". Error = " + returnStr[1])
                    sys.exit(1)

            # Get the dataFileName file to be extracted from AWS
            dataFileName = localDataRecordDict[self.m_configDict["dxt"]["DATA_FILE_NAME_STR"]]
            #Here localFileWthPath is the local stage dir with file name
            localFileWthPath = self.m_configDict["ENV"]["stage_dir"] + "/" + dataFileName
            
            #Here targetFileWthPath is the AWS dir with file name
            targetFileWthPath = os.path.join(self.s3object.m_configFile["S3"]["path"], os.path.basename(dataFileName))
            targetBucket = self.s3object.m_configFile["S3"]["bucket"]
            encryptKeyFlag = self.s3object.m_configFile["S3"]["encrypt_key"]

            localAWSRetries = int(self.m_mktConfigDict["ENV"]["aws_retries"])
            if self.debugFlag:
                print("localFileWthPath =", localFileWthPath)
                print("targetFileWthPath =", targetFileWthPath)
                print("targetBucket =", targetBucket)
                print("encryptKeyFlag =", encryptKeyFlag)
                print("localAWSRetries =", localAWSRetries)
            initCount = 0
            while (initCount < localAWSRetries):
                extractReturnValue = 0

                #Call s3.data download to extract the manifest file (single part load)
                #extractReturnValue = self.s3object.getDataSinglePart(localFileWthPath, targetFileWthPath, targetBucket, encryptKeyFlag )
                extractReturnValue = self.s3object.getDataSinglePart(localFileWthPath, targetFileWthPath, targetBucket)

                if self.debugFlag:
                    print "extractReturnValue = ", extractReturnValue

                if int(extractReturnValue) == 0:
                    pStatus = 'S'
                    pComment = 'Load completed'
                    break
                else:
                    pStatus = 'F'
                    pComment = 'Load failed'
                    initCount += 1


            # Get the size of the file downloaded 
            localFileSize = os.stat(localFileWthPath).st_size

            # Check if the downloaded file size is matching with what is mentioned in manifest file.  If not mark it as failed
            if localFileSize != localDataFileSize:
                pStatus = 'F'
                pComment = 'Actual file size != Manifest file size'


            localRecordCount = 0
 
            if localDBFlag:
                #Call DB to insert 'S' or 'F' in tb_dxt_process_status
                #localFileIDQueue.put((localFileID, extractReturnValue))
                mySql = ""
                myParams = ""
                tempSql = self.m_configDict["SQL"]["put_process_status"]
                #myParamsDict = {"datasetName":self.datasetName, "runID":str(self.runID), "fileID":str(localFileID), "fileName":localDataFile, "tDate":str(self.tradeDate), "processID":str(processID), "hostName":hostName, "fileSize": localFileSize, "recordCount" : localRecordCount, "status":pStatus , "lcomment":pComment}
                myParamsDict = {"datasetName":self.datasetName, "runID":str(self.runID), "fileName":localDataFile, "tDate":str(self.tradeDate), "processID":str(processID), "hostName":hostName, "fileSize": str(localFileSize), "recordCount" : str(localRecordCount), "status":pStatus , "lcomment":pComment}
                tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
                mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
                returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
                if self.debugFlag:
                    print "tempSql = ", tempSql
                    print "myParamsDict = ", myParamsDict
                    print "mySql = ", mySql
                    print "returnStr = ", returnStr

                if returnStr[0] != '0':
                    self.m_logger.error("Unable to put process status info into the database using sql " + mySql + ". Error = " + returnStr[1])
                    sys.exit(1)

                localFileIDQueue.put((localFileID,extractReturnValue))
            else:
                return extractReturnValue
      
        except Exception as exp:
            self.m_logger.error("Failure in extractData process for file with the error " + str(exp))
            if localDBFlag:
                localFileIDQueue.put((localFileID, 1))
            else:
                return 1

    def getRecords(self, fileDict, startDateTime, endDateTime):
        """
        Purpose - Function to sort the dictionary based on the key and return a sorted list
        :param fileDict : Dictionary containing Last_modified Date and file name
        :param startDateTime : Start DateTime in the format '2016-02-01 00:00:00'
        :param endDateTime : End DateTime in the format '2016-02-10 00:00:00'
        """
        try:
            patternToSearch =  self.m_configDict["ENV"]["pattern_to_search"]
            
            if self.debugFlag:
                print "fileDict = ", fileDict
                print "patternToSearch = ", patternToSearch
                print "startDateTime = ", startDateTime
                print "endDateTime = ", endDateTime
            sorted_values = sorted(fileDict.values())
            start = bisect.bisect_left(sorted_values, startDateTime)
            end = bisect.bisect_right(sorted_values, endDateTime)
            if self.debugFlag:
                print "start = ", start
                print "end = ", end
            for fileItem in sorted(fileDict.iteritems())[start:end]:
                if patternToSearch in fileItem[0]:
                    if self.debugFlag:
                        print "fileItem[0] = ", fileItem[0]
                    yield fileItem[0]

        except Exception as exp:
            self.m_logger.error("Failed while executing getRecords to sort the dictionary content of dictionary  with Error = " + str(exp))
            sys.exit(1)

    def readManifestFile(self, manifestFileName):
        """
        Purpose - To read the content of Finra's manifest file stored in key-value pair into Nested dictionary 
        :param manifestFileName : Finra's manifestFileName containing data filenames, file size & no of rows
        """
        try:
            manifestRecordStartPattern = self.m_configDict["dxt"]["MANIFEST_RECORD_START_PATTERN"]

            if self.debugFlag:
                print "manifestRecordStartPattern =", manifestRecordStartPattern

            with open(manifestFileName) as infile:
                manifestFileDict = {}
                file = 0
                line_count = 0
                for line in infile:
                    line = line.strip()
                    if line.startswith(manifestRecordStartPattern):
                        file = line_count
                        line_count += 1
                        manifestFileDict[file] = {}
                    var, val = line.split('=',1)
                    if self.debugFlag:
                        print "var = ", var, "val = ", val
                    manifestFileDict[file][var.strip()] = val.strip()

            if self.debugFlag:
                print "====================================="
                print "manifestFileDict = ", manifestFileDict
                print "====================================="

            return manifestFileDict

            #for key, values in manifest.items():
                #if key == 1:
                    #for k,v in values.items():
                        #print k, v

        except Exception as exp:
            self.m_logger.error("Failed while executing readManifestFile to get FINRA manifest file into nested dictionary, Error = " + str(exp))
            sys.exit(1)


    def getManifestFileList(self, startDateTime, endDateTime, s3Bucket, s3Path, folderPosition):
        """
        Purpose - Function to sort the dictionary based on the key and return a sorted list
        :param startDateTime : Start DateTime in the format '2016-02-01 00:00:00'
        :param endDateTime : End DateTime in the format '2016-02-10 00:00:00'
        """
        try:
            if self.debugFlag:
                print "s3Bucket = ", s3Bucket
                print "s3Path = ", s3Path
                print "startDateTime = ", startDateTime
                print "endDateTime = ", endDateTime
                print "folderPosition = ", folderPosition

            fileListDict = self.s3object.listBucketWPathByLastModified(s3Bucket, s3Path, folderPosition)
            if self.debugFlag:
                print "fileListDict = ", fileListDict

           
            #endDateTime = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            manifestFileList = list(self.getRecords(fileListDict, startDateTime, endDateTime))

            if self.debugFlag:
                print "fileListDict = ", fileListDict
                print "manifestFileList = ", manifestFileList

            return manifestFileList
        except Exception as exp:
            self.m_logger.error("Failed while creating AWS manifest file list with Error = " + str(exp))
            return 1

    def processExtractor(self):
        """
        Purpose - Function responsible for getting the AWS token and reading the last modified date in DB and fetch the list of files from AWS to be processed
        :param : None
        :return:
        """
        try:
            # DB_CALL
            # Make database call sp_dxt_validate_mktName(mktName) to validate mktName

            tempSql = self.m_configDict["SQL"]["validate_market_name"]
            myParamsDict = { 'mktName' : self.mktName }
            tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
            mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
            returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)

            if self.debugFlag:
                print "tempSql = ", tempSql
                print "myParamsDict = ", myParamsDict
                print "mySql = ", mySql
                print "returnStr = ", returnStr

            if returnStr[0] != '0':
                self.m_logger.error("Invalid market name provided " + mySql + ". Error = " + self.mktName)
                sys.exit(1)

            if self.debugFlag:
                print "MktName from DB = ", self.mktName
          
            #Build the string for mktConfigFile based on mktName and configFile info
            self.mktConfigFile = os.path.dirname(self.configFile) + '/' + os.path.basename(self.configFile).split('.',1)[0].strip() + '_' + self.mktName.lower() + '.' + os.path.basename(self.configFile).split('.',1)[1].strip()

            if self.debugFlag:
                print("mktConfigFile = ", self.mktConfigFile)

            #Validate Market Config file is a valid file
            if not os.path.isfile(self.mktConfigFile):
                self.m_logger.error("Invalid market manifest file " + self.mktConfigFile)
                sys.exit(1)

            # Read Market specific config file and store it in a specific dictionary
            self.readMktConfigFile(self.mktConfigFile)

            if self.debugFlag:
                print("m_mktConfigDict=",self.m_mktConfigDict)

            # Read the table for the given market and fetch the last modified timestamp for the given manifest file
            tempSql = self.m_configDict["SQL"]["get_last_modified"]
            myParamsDict = { 'mktName' : self.mktName.upper() }
            tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
            mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
            returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)

            if self.debugFlag:
                print "tempSql = ", tempSql
                print "myParamsDict = ", myParamsDict
                print "mySql = ", mySql
                print "returnStr = ", returnStr

            if returnStr[0] == '0':
                if returnStr[1]:
                    lastModifiedDate=returnStr[1] 
                else:
                    lastModifiedDate="2015-01-01 00:00:00"
            else:
                self.m_logger.error("Unable to get last_modified date using the sql " + mySql + ". Error = " + self.mktName)
                sys.exit(1)
            if self.debugFlag:
                print("lastModifiedDate=",lastModifiedDate)

            #Temp call.  Need to enable the previous lines to use DB call.  Comment them bfr production

            if self.mktName == 'nyse_mkt': 
                lastModifiedDate="2016-06-03 15:00:00"
            else:
                lastModifiedDate="2016-06-01 00:00:00"

            #print "Ram - Last Modified Date = ", lastModifiedDate, "mktName = ", self.mktName

            # Get RunID
            self.runID = generate_runId()
            if self.debugFlag:
                print("RunID = ", self.runID)

            # Initialize S3 object and get FINRA cloud service token and establish s3 session
            self.s3object = S3(self.mktConfigFile, self.m_logger, self.debugFlag)

            tokenRetryTimes = int(self.m_configDict["TOKEN"]["token_retry_times"])
            tokenRetryWaitTime = int(self.m_configDict["TOKEN"]["token_retry_wait_time"])

            initCount = 0
            while (initCount < tokenRetryTimes):
                tokenReturnCode = self.s3object.getToken()
                if tokenReturnCode:
                    if initCount == tokenRetryTimes:
                        self.m_logger.error("Error: Exceeded the max retries " + tokenRetryTimes + " to get AWS Token from FINRA.  Please re-try after some time or escalate.. ")
                        sys.exit(1)
                    initCount += 1
                    time.sleep(tokenRetryWaitTime)
                else:
                    break

            self.currentEpochTime = int(time.time())

            # Get list of Manifest files to be processed

            #currentDate = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            #currentDate = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            currentDate = (datetime.now() + timedelta(days=1)).strftime("%Y-%m-%d %H:%M:%S")

            folderPosition =  int(self.s3object.m_configFile["S3"]["folder_position"])
            targetBucket = self.s3object.m_configFile["S3"]["bucket"]
            targetFolder = self.s3object.m_configFile["S3"]["path"]
            targetFilePath = targetFolder 

            finraManifestFileList = self.getManifestFileList(lastModifiedDate, currentDate, targetBucket, targetFilePath, folderPosition)
            
            if self.debugFlag:
                print("finraManifestFileList = ", finraManifestFileList)

            # Download manifest files in the manifest file list to a specific folder from AWS
            localFileDir = self.s3object.m_configFile["ENV"]["stage_dir"]

            #targetFileWthPath = os.path.join(self.s3object.m_configFile["S3"]["path"], os.path.basename(localFileWthPath))
            encryptKeyFlag = self.s3object.m_configFile["S3"]["encrypt_key"]

            # Get an instance of the Manifest class
            fileIDQueue = Queue()
            localAWSRetries = int(self.m_mktConfigDict["ENV"]["aws_retries"])

            for finraManifestFile in finraManifestFileList:
                # Following 2 lines temporarily written to avoid bad manifest files.  Please remove them before go-live
                if finraManifestFile == 'manifest.TSP_A_20160425.txt':
                    continue
                if finraManifestFile == 'manifest.TSP_P_20160425.txt':
                    continue
                
                targetFileWthPath = targetFolder + finraManifestFile
                localFileWthPath = localFileDir + "/" + finraManifestFile
                if self.debugFlag:
                    print "targetFileWthPath = ", targetFileWthPath
                    print "localFileWthPath = ", localFileWthPath
                    print "finraManifestFile = ", finraManifestFile
                
                initCount = 0
                while (initCount < localAWSRetries):
                    extractReturnValue = 0

                    #Call s3.data download to extract the manifest file (single part load)
                    #extractReturnValue = self.s3object.getDataSinglePart(localFileWthPath, targetFileWthPath, targetBucket, encryptKeyFlag )
                    extractReturnValue = self.s3object.getDataSinglePart(localFileWthPath, targetFileWthPath, targetBucket)

                    if self.debugFlag:
                        print "extractReturnValue = ", extractReturnValue

                    if extractReturnValue:
                        # Try it again
                        initCount += 1
                    else:
                        # Come out of the loop
                        break
                # End of while loop for AWS Retries
                  
                if extractReturnValue:
                    self.m_logger.error("Unable to fetch manifestFile = " + finraManifestFile + "from the path = " + targetFileWthPath + " to the local filesystem = " + localFileWthPath )
                    sys.exit(1)

                    """ Not needed
                    if extractReturnValue == 0:
                        pStatus = 'P'
                        pComment = 'Load completed'
                        break
                    else:
                        pStatus = 'F'
                        pComment = 'Load failed'
                    """
                    initCount += 1

                # get datasetname from the manifest file.  Need check based on FINRA naming

                # Original requirement
                #self.datasetName = os.path.basename(finraManifestFile).split('.',3)[1].strip().upper()

                # Customized for FINRA's latest file
                self.datasetName = os.path.basename(finraManifestFile).split('.')[1].split('_')[1].strip().upper()
                if self.debugFlag:
                    print "datasetName = ", self.datasetName

                # Need to check DB call, once it is ready

                # Validate the manifest file name to make sure that we are expecting it
                tempSql = self.m_configDict["SQL"]["validate_dataset_name"]
                myParamsDict = { 'datasetName' : self.datasetName }
                tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
                mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
                returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)

                if self.debugFlag:
                    print "tempSql = ", tempSql
                    print "myParamsDict = ", myParamsDict
                    print "mySql = ", mySql
                    print "returnStr = ", returnStr
    
                #Check if dataset is there in the tb_dxt_dataset_master, if not, skip it and move to the next file.  For other errors, exit out of the program
                if int(returnStr[0]) < 0:
                    self.m_logger.error("Unable to validate datasetName " + mySql + ". Error = " + self.datasetName)
                    sys.exit(1)
                elif int(returnStr[0]) > 0:
                    self.m_logger.info("Give Dataset is not in the list to process.  Skipping it" + mySql + ". Dataset Name = " + self.datasetName)
                    # Continue to the next file entry in the manifest list
                    continue
                    
                # Insert a record into tb_dxt_dataset_trans with status 'P' for the given datasetName, saying that we start the process for this manifest file

                pStatus = 'P'
                tempSql = self.m_configDict["SQL"]["put_dataset"]
                myParamsDict = {'datasetName':self.datasetName, 'runID': str(self.runID), 'tDate':str(self.tradeDate), 'status': pStatus }
                tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
                mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
                returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)

                if self.debugFlag:
                    print "tempSql = ", tempSql
                    print "myParamsDict = ", myParamsDict
                    print "mySql = ", mySql
                    print "returnStr = ", returnStr
    
                if returnStr[0] != '0':
                    self.m_logger.error("Unable to insert into tb_dxt_dataset_trans table " + mySql + ". Error = " + self.datasetName)
                    sys.exit(1)

                # Read the contents of manifestfile i.e. dataFileNames into a list - Will validate the datafiles as well
                manifestDelim = self.m_configDict["ENV"]["manifest_delim"]
                if self.debugFlag:
                    print "localFileWthPath = ", localFileWthPath

                #Need to change the following line to read a nested dictionary from a keyValuePair
                manifestFileDict = self.readManifestFile(localFileWthPath)
                if self.debugFlag:
                    print "manifestDelim = ", manifestDelim
                    print "manifestFileDict = ", manifestFileDict

                process_count = int(self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max"])

                # Now go into multiprocessing and call extractData function and extract files ones by one 
                fileID=1
                dbFlag=1
                fileIDQueue = Queue()
                procs = []
                doneCounter = 0
                sendCounter = 0
                failureFlag = 0
                finraManifestFileCounter=0

                while doneCounter < len(manifestFileDict):
                    while sendCounter < len(manifestFileDict) and sendCounter - doneCounter < process_count:
                        if self.debugFlag:
                            print "manifestFileDict[self.m_configDict[dxt][DATA_FILE_NAME_STR]] = ", manifestFileDict[sendCounter]['Datafilename']
                        # Call fn extractData to fetch files from AWS.  Pass manifestFileDict[sendCounter] as it contains the whole record including the filename, filesize & row count
                        processHandle = Process(target=Extractor.extractData, args=(self, manifestFileDict[sendCounter],fileID, fileIDQueue, dbFlag))
                        processFlag=1
    
                        s3TimeoutTime = int(self.m_configDict["dxt"]["S3_TIMEOUT_TIME"])
                        if ((int(time.time()) - self.currentEpochTime) > s3TimeoutTime):
                            self.currentEpochTime = int(time.time())
                            self.m_logger.info("Getting New Token for Batch : {0}, Max batches : {1}".format(batch_count,max_batches))
                            if self.debugFlag:
                                print 'Memory usage: %s (kb)' % resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
                                print "self.currentEpochTime = ", self.currentEpochTime
                                print "Current Time in Epoch = ", int(time.time())
                            if self.debugFlag:
                                print "Inside get new token - self.currentEpochTime = ", self.currentEpochTime
                            initCount = 0
                            while (initCount < tokenRetryTimes):
                                tokenReturnCode = 0
                                tokenReturnCode = self.s3object.getToken()
                                if tokenReturnCode:
                                    if initCount == tokenRetryTimes:
                                        self.m_logger.error("Error: Exceed the max retries " + tokenRetryTimes + " to get AWS Token from FINRA.  Please re-try after some time or escalate.. ")
                                        sys.exit(1)
                                    initCount += 1
                                    time.sleep(tokenRetryWaitTime)
                                else:
                                    break

                        threadDelayTime = int(self.m_configDict["dxt"]["THREAD_DELAY_TIME"])
                        time.sleep(threadDelayTime)

                        processHandle.start()
                        procs.append(processHandle)
                        sendCounter += 1
                        fileID += 1
                    if processFlag:
                        for p in procs:
                            p.join()
                            procs=[]
                        processFlag=0
                    while not fileIDQueue.empty():  # process completed results as they arrive
                        #time.sleep(3)
                        qFileID, qResult = fileIDQueue.get()
                        if self.debugFlag:
                            print("qFileID = ", qFileID, "qResult = ", qResult)
                        doneCounter += 1
                        if qResult:
                            failureFlag = 1
                    if self.debugFlag:
                        print "ProcessFlag = ", processFlag, "sendCounter = ", sendCounter, "doneCounter = ", doneCounter
                    if failureFlag:
                        break
                        
                if self.debugFlag:
                    print "Failure Flag = ", failureFlag
    
                if failureFlag:
                    pStatus = 'F'
                else:
                    pStatus = 'S'

                    tblName = self.m_mktConfigDict["dxt"]["TARGET_TBL_NAME"] + "_" + self.mktName.upper()
                    manifestDate = os.path.basename(finraManifestFile).split('.',3)[1][6:12]
                    fatlManifestFile = self.m_configDict["ENV"]["stage_dir"] + "/" + tblName + "." + manifestDate + ".manifest"
                    with open(fatlManifestFile,"w") as fh:
                        counter = 0
                        for dictRecord in manifestFileDict:
                            dataFile = manifestFileDict[dictRecord][self.m_configDict["dxt"]["DATA_FILE_NAME_STR"]]
                            sourceFileWthPath = self.m_configDict["ENV"]["stage_dir"] + "/" + dataFile
                            dataFileSize = int(manifestFileDict[dictRecord][self.m_configDict["dxt"]["DATA_FILE_SIZE_STR"]])
                            dataFileRecordCount = int(manifestFileDict[dictRecord][self.m_configDict["dxt"]["NO_OF_ROWS_STR"]])
                            #fileSize = os.stat(sourceFileWthPath).st_size
                            if self.debugFlag:
                                print "dataFile = ", dataFile
                                print "dataFileSize = ", dataFileSize
                                print "dataFileRecordCount = ", dataFileRecordCount
                                print "sourceFileWthPath = ", sourceFileWthPath
                                print "tblName = ", tblName, "dataFile = ", dataFile, "dataFileSize = ", dataFileSize, "mktName = ", self.mktName
                                
                            fh.write(tblName + "|" + str(dataFile) + "|" + str(dataFileSize) + "|" + str(dataFileRecordCount) + "|" + "0" + "\n")
                            counter += 1

                # Move all the data files to inbox  from the stg location.  No need for this step, as Joejo mentioned there will be another Tidal job doing this step

                # Move the manifest file to inbox from the stg location

                # insert a record into tb_dxt_dataset_trans table with 'S' or 'F' record

                #Call Oracle fn to insert status 'S' into TB_DDY_DATASET_TRANS with RUNID etc
                #DB_CALL
                # Make database call sp_dxt_insert_dataset_trans and insert data based on Failure or Success

                mySql = ""
                myParams = ""
                tempSql = self.m_configDict["SQL"]["put_dataset"]
            
                myParamsDict = {"datasetName":self.datasetName, "runID": str(self.runID), "tDate":str(self.tradeDate), "status": pStatus }
                tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
                mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
    
                returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
    
                if self.debugFlag:
                    print "tempSql = ", tempSql
                    print "myParamsDict = ", myParamsDict
                    print "mySql = ", mySql
                    print "returnStr = ", returnStr
    
                if returnStr[0] != '0':
                    self.m_logger.error("Unable to put dataset info into the database using sql " + mySql + ". Error = " + returnStr[1])
                    sys.exit(1)

                if failureFlag:
                    self.m_logger.error("Extract failed for data files for manifest file " + finraManifestFile)
                    sys.exit(1)

                finraManifestFileCounter += 1

           # End of for loop for finraManifestFiles

        except Exception as e:
            self.m_logger.error("ProcessExtractor failed with error " + str(e))
            sys.exit(1)
def get_phones_in():
    phones_in = Oracle.consulta_fones()

    return phones_in