def process (self): 'Creates the XML file in the output buffer' try: self.tmpMsg = StringIO.StringIO() self.updateAttributesFromMetadata() if self.hasHeader == 'y': self.fieldNames = self.getHeader() self.tmpMsg.write('<?xml version=\"1.0\" encoding=\"' + self.encoding + '\"?>') self.tmpMsg.write('\n<' + self.rootTag + '>\n') current_record = self.InMsg.readline() #print current_record while len(current_record) > 0 : #print current_record if current_record[0] == '#' : current_record = self.InMsg.readline() continue if current_record[-1] == '\n': self.writeRecordAsXML(current_record[:-1]) else: self.writeRecordAsXML(current_record) current_record = self.InMsg.readline() self.tmpMsg.write('</' + self.rootTag + '>') self.tmpMsg.seek(0) self.msg = self.tmpMsg.read() #print self.msg self.msgList.append(self.msg) #print type(self.msgList) reticLog.logInfo(self.logList, '(' + self.name + ') ' + "Message process is finished in pipe") return 0 except: errorMessage = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0] reticLog.logError(self.logList, '(' + self.name + ') ' + "Error during message processing in pipe") reticLog.logError(self.logList, '(' + self.name + ') ' + errorMessage) return 1
def adpator_run(self): #preprocessing, so that all intermediate files for file source are generated try: preprocess_batch(direcotryPath,self.logList) except: import sys,traceback errorMessage = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0] reticLog.logError(self.logList, "( " + self.adtName + " ) Unknown error during initialization in sink") reticLog.logError(self.logList, "( " + self.adtName + " ) " + errorMessage) self.adaptorSource.start() while self.adaptorSource.next()==1: self.adaptorPipe.getMsg(self.adaptorSource.msg) self.adaptorPipe.process() self.adaptorSource.commit() self.adaptorSource.commit() for i in range(len(self.adaptorPipe.msgList)): self.adaptorSink.getMsg(self.adaptorPipe.msgList[i]) #print self.adaptorSink.msg recordList = self.adaptorSink.getRecordList() print str(len(recordList))+" XML records"+' generated'+' for %s'%self.adaptorSource.metadata['filename'][i] self.adaptorSink.processSites(recordList,self.adaptorSource.metadata['filename'][i]) updateList = self.adaptorSink.prepareUpdateObject(recordList,testTPWDmodel.tpwdProcessInfo) print str(len(updateList))+" database records"+' to be inserted' self.adaptorSink.updateDB(updateList) #redefine runCommand #def
def getMsg (self, messages): """ for TCEQ, every time, it passes in 2 message StringIO object, one for event, another for result """ try: reticLog.logInfo(self.logList, "( " + self.name + " ) Retrieving message for sink : " + self.name) # Re-initialize msg to get new message # eventString IO is message[0], resultStringIO is messag[1] # get a mini hash table for each segment: # key: RFA tag id, # value: [event list, result list] self.basinSegmentInfo = {} eventCSVList = csv.reader(messages[0],delimiter="|") resultCSVLIst = csv.reader(messages[1],delimiter="|") # raw_input("print event...") # for row in eventCSVList: # print row # raw_input("print result...") # for row in resultCSVLIst: # print row for row in eventCSVList: #this is for basiID 6, year 2010 if (row[0] == "" and row[1] == "" and row[2] == ""): row = row[3:] print row self.basinSegmentInfo[row[RFATAG_COLUMN]] = {} self.basinSegmentInfo[row[RFATAG_COLUMN]][EVENT_IN_HASHTable] = row for row in resultCSVLIst: if self.basinSegmentInfo[row[RFATAG_COLUMN]].has_key(RESULT_IN_HASHTable): self.basinSegmentInfo[row[RFATAG_COLUMN]][RESULT_IN_HASHTable].append(row) else: #if this is the first result row for this RFATAG self.basinSegmentInfo[row[RFATAG_COLUMN]][RESULT_IN_HASHTable] = [row] # resultCounter,eventCounter = 0,0 # for key in self.basinSegmentInfo.keys(): # #print "key => ",self.basinSegmentInfo[key] # eventCounter += 1 # for resultRow in self.basinSegmentInfo[key][RESULT_IN_HASHTable]: # #print resultRow # resultCounter += 1 # #print "%d result in Total......" % eventCounter # print "%d result in Total......" % resultCounter reticLog.logInfo(self.logList, "( " + self.name + " ) Message retrieved in sink : " + self.name) return 0 except Exception, e: import traceback #if row[RFATAG_COLUMN] in self.basinSegmentInfo: # print "In Dictionary Already!" #else: # print "Not In Dictionary!" traceback.print_exc(file=sys.stdout) errorMessage = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0] reticLog.logError(self.logList, "( " + self.name + " ) Error during message retrieval in sink : " + self.name) reticLog.logError(self.logList, "( " + self.name + " ) " + errorMessage) return 1
def start (self): 'Start the source of the adaptor (begin work...)' try: reticLog.logInfo(self.logList, '( ' + self.name + ' ) ' + "Starting the source adaptor") self.getMsg() return 0 except: errorMessage = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0] reticLog.logError(self.logList, '( ' + self.name + ' ) ' + "Unknown error on start of source") reticLog.logError(self.logList, '( ' + self.name + ' ) ' + errorMessage) sys.exit(1)
def getMsg (self, message): try: reticLog.logInfo(self.logList, "( " + self.name + " ) Retrieving message for sink : " + self.name) # Re-initialize msg to get new message self.msg = '' self.msg = message reticLog.logInfo(self.logList, "( " + self.name + " ) Message retrieved in sink : " + self.name) return 0 except: errorMessage = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0] reticLog.logError(self.logList, "( " + self.name + " ) Error during message retrieval in sink : " + self.name) reticLog.logError(self.logList, "( " + self.name + " ) " + errorMessage) return 1
def getMsg (self, message): 'Initializes input buffer with message content' try: reticLog.logInfo(self.logList, '(' + self.name + ') ' + "Getting message into pipe") self.InMsg = StringIO.StringIO() self.msg = '' self.InMsg.write(message) self.InMsg.seek(0) return 0 except: errorMessage = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0] reticLog.logError(self.logList, '(' + self.name + ') ' + "Error during message retrieval in pipe" ) reticLog.logError(self.logList, '(' + self.name + ') ' + errorMessage) return 1
def __init__ (self, args, logger): try: self.args = args self.logList = logger self.name = args['name'] self.InMsg = '' self.msg = '' self.msgKind = args['msgKind'] self.delimiter = '' self.fieldNames = [] self.fieldLength = [] self.msgList = [] self.rootTag = args['rootTag'] self.recTag = args['recTag'] self.encoding = args['encoding'] self.metadata = {} #here, for update if self.msgKind == 'delimited': self.delimiter = args['delimiter'] self.hasHeader = args['hasHeader'] if self.hasHeader == 'n': self.fieldNames = args['fieldNames'] elif self.msgKind == 'fixedLength': self.fieldNames = args['fieldNames'] self.fieldLength = args['fieldLength'] self.hasHeader = 'n' except KeyError: reticLog.logError(self.logList, '(' + self.name + ') ' + "Error on ToXMLPipe initialization") reticLog.logError(self.logList, '(' + self.name + ') ' + "Parameter " + str(sys.exc_info()[1]) + " is missing on pipe definition. Exiting..." ) sys.exit(1) except: errorMessage = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0] reticLog.logError(self.logList, '(' + self.name + ') ' + "Unknown error during initialization of pipe : " + self.name) reticLog.logError(self.logList, '(' + self.name + ') ' + errorMessage) sys.exit(1)
def __init__ (self, args, logger): try: self.logList = logger self.name = args['name'] reticLog.logInfo(self.logList, '( ' + self.name + ' ) ' + "Intitializing HTTPSource : " + self.name) self.URL = args['URL'] self.exitOnError = 'n' self.msgList = [] self.nbMsg = 0 self.msg = [] self.msgName = '' self.params = [] self.metadata = {} #this is used for wait(interval) function, so use float if args.has_key('pollPeriod'): self.interval = float(args['pollPeriod']) if args.has_key('params'): self.params = args['params'] if args.has_key('exitOnError'): self.exitOnError = args['exitOnError'] except KeyError: reticLog.logError(self.logList, '( ' + self.name + ' ) ' + "Error on HTTPSource initialization") reticLog.logError(self.logList, '( ' + self.name + ' ) ' + "Parameter " + str(sys.exc_info()[1]) + " is missing on source definition" ) sys.exit(1) except: reticLog.logError(self.logList, '( ' + self.name + ' ) ' + "Unknown error on HTTPSource initialization") reticLog.logError(self.logList, '( ' + self.name + ' ) ' + traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])) raise sys.exit(1)
def __init__ (self, args, logList): try: self.logList = [] self.logList = logList self.name = args['name'] self.exitOnError = 'y' reticLog.logInfo(self.logList, '( ' + self.name + ' ) ' + "Intitializing fileSource") self.fileFilter = args['fileFilter'] self.newExtension = args['newExtension'] self.msgList = [] self.metadata = {} self.nbMsg = 0 self.msg = '' self.msgName = '' self.filePath = args['filePath'] #this is used for wait(interval) function, so use float self.interval = float(args['pollPeriod']) #here,determine the os path seperator, '\\' for nt, '/' for linux import os self.filePath = self.filePath + os.sep if args.has_key('exitOnError'): self.exitOnError = args['exitOnError'] except KeyError: reticLog.logError(self.logList, '( ' + self.name + ' ) ' + "Error on fileSource initialization") reticLog.logError(self.logList, '( ' + self.name + ' ) ' + "Parameter " + str(sys.exc_info()[1]) + " is missing on source definition" ) sys.exit(1) except: errorMessage = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0] reticLog.logError(self.logList, '( ' + self.name + ' ) ' + "Unknown error on initialization on source") reticLog.logError(self.logList, '( ' + self.name + ' ) ' + errorMessage) sys.exit(1)
def connect (self, args): 'Establish a connection with the database' try: reticLog.logInfo(self.logList, "Intitializing Database Connection : " + args['dsn']) #construct connection string according to parameters engineStr = string.join([args['dbType'], ''.join(['//', args['user']],), ''.join([args['password'],"@",args['dsn']]) ], ':') #print engineStr dbDriverMod = __import__(args['driverName']) self.engine = create_engine(engineStr, module=dbDriverMod) reticLog.logInfo(self.logList, "DataBase Connection established") except: errorMessage = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0] reticLog.logError(self.logList, "Database error : " + errorMessage ); raise "Database Error";
def __init__ (self, args, logger): try: self.args = args self.msg = '' self.logList = logger self.name = args['name'] self.dbFactoryArg = args['dbFactroyArg'] self.inputFormat = args['inputFormat'] self.delimiter = args['delimiter'] self.hasHeader = args['hasHeader'] #self.replaceEmptyFieldBy = args['replaceEmptyFieldBy'] if not self.args.has_key('autoCommit'): self.autocommit = False #session pool self.session = None self.nbThreads = int(args['nbThreads']) if self.nbThreads == 0: self.nbThreads = 1 if args.has_key('retries'): self.retries = int(args['retries']) else: self.retries = 5 #self.curArgs = {} #self.curArgs['cursorName'] = 'cursor1' self.metadata = {} self.fieldNames = [] self.fieldLength = [] if self.inputFormat == 'delimited': if self.args.has_key('fieldNames'): self.fieldNames = args['fieldNames'] elif self.inputFormat == 'fixedLength': if self.args.has_key('fieldNames') and self.args.has_key('fieldLength'): self.fieldNames = args['fieldNames'] self.fieldLength = args['fieldLength'] #self.args['cursorName'] = "cursor" #args['cursorName'] = "cursor" #here, for multithread updating database, speed up here self.connection = dbFactory.dbFactory(args['dbFactroyArg'], self.logList) #for i in range(self.nbThreads): self.session = self.makeSession(self.connection) self.parallelize = 'n' self.nbQueriesParal = 10 ######################################################## if args.has_key('parallelize'): self.parallelize = args['parallelize'] if args.has_key('nbQueriesParal'): self.nbQueriesParal = int(args['nbQueriesParal']) except KeyError: reticLog.logError(self.logList, "( " + self.name + " ) Error during SQLSink initialization") reticLog.logError(self.logList, "( " + self.name + " ) Parameter " + str(sys.exc_info()[1]) + " is missing in sink definition" ) sys.exit(1) except: errorMessage = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0] reticLog.logError(self.logList, "( " + self.name + " ) Unknown error during initialization in sink") reticLog.logError(self.logList, "( " + self.name + " ) " + errorMessage) sys.exit(1)
def commit(self): 'Commit the current message treatment' if self.nbMsg > 0 : reticLog.logInfo(self.logList, '( ' + self.name + ' ) ' + "Commiting msg " + self.msgList[0] + " on source : " + self.name) msgName = self.msgList[0] try: #if self.newExtension != '' and self.newExtension != ' ': #shutil.copyfile(self.filePath+self.msgName,self.filePath+self.msgName+self.newExtension) shutil.os.remove(self.filePath+self.msgName) self.nbMsg = self.nbMsg - 1 self.msgList = self.msgList[1:] self.msg = '' return 0 except: errorMessage = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0] reticLog.logError(self.logList, '( ' + self.name + ' ) ' + "Error on commit phase on source - File : " + msgName) reticLog.logError(self.logList, '( ' + self.name + ' ) ' + errorMessage) return 1 else: return 1
def next(self): 'Get the next message to be processed or return that sources are dry' if self.nbMsg == 0: reticLog.logInfo(self.logList, '( ' + self.name + ' ) ' + "Sources dry on source ") return 0 else: try: reticLog.logInfo(self.logList, '( ' + self.name + ' ) ' + "Messages Left on queue of adaptor : " + str(self.nbMsg)) reticLog.logInfo(self.logList, '( ' + self.name + ' ) ' + "Retrieving file : " + self.msgList[0]) self.msgName = self.msgList[0] dotIndex = string.find(self.msgName,'.') if dotIndex > 0: if(not self.metadata.has_key('filename')): self.metadata['filename'] = [] self.metadata['filename'].append(string.split(self.msgName,'.')[0]) self.metadata['extension'] = string.split(self.msgName,'.')[1] else: if(not self.metadata.has_key('filename')): self.metadata['filename'] = [] self.metadata['filename'] = self.msgName self.metadata['extension'] = '' #file reading happend here fp = open(os.path.join(self.filePath,self.msgList[0])) if reticUtils.istext(fp): fp.close() fp = open(os.path.join(self.filePath,self.msgList[0]),'r') else: fp.close() fp = open(os.path.join(self.filePath,self.msgList[0]),'rb') self.msg = fp.read() fp.close() return 1 except: errorMessage = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0] reticLog.logError(self.logList, '( ' + self.name + ' ) ' + "Error on message retrieval on source : " + self.name) reticLog.logError(self.logList, '( ' + self.name + ' ) ' + errorMessage) if self.exitOnError.lower() == 'y': return 0 else: return 1
def next(self): 'Get the next message to be processed or return that sources are dry' if self.nbMsg == 0: return 0 else: reticLog.logInfo(self.logList, '( ' + self.name + ' ) ' + "Messages Left on queue of adaptor : " + str(self.nbMsg)) # No params are provided, processing raw URL. (without GET/POST request) if len(self.params) == 0: reticLog.logInfo(self.logList, '( ' + self.name + ' ) ' + "Retrieving message from : " + self.URL) try: request = urllib2.Request(self.URL) connection = urllib2.urlopen(request) self.msg.append(connection.read()) reticLog.logInfo(self.logList, '( ' + self.name + ' ) ' + "Message retrieved on adaptor: " + self.name) except: errorMessage = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0] reticLog.logError(self.logList, '( ' + self.name + ' ) ' + "Error on message retrieval on source : " + self.name) reticLog.logError(self.logList, '( ' + self.name + ' ) ' + errorMessage) if self.exitOnError.lower() == 'y': sys.exit(1) else: self.msg = [] for param in self.params: # Params are provided, processing URL passing them through GET method # There are as many calls as there are param lists paramLine = '?' for key in param.keys(): self.metadata[key] = param[key] paramLine = urlencode(self.metadata) reticLog.logInfo(self.logList, '( ' + self.name + ' ) ' + "Retrieving message from : " + self.URL+"?" + paramLine) successful = False while not successful: try: request = urllib2.Request("?".join([self.URL,paramLine])) connection = urllib2.urlopen(request) self.msg.append(connection.read()) reticLog.logInfo(self.logList, '( ' + self.name + ' ) ' + "Message retrieved on adaptor: " + self.name) except: errorMessage = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0] reticLog.logError(self.logList, '( ' + self.name + ' ) ' + "Error on message retrieval on source : " + self.name) reticLog.logError(self.logList, '( ' + self.name + ' ) ' + errorMessage) if self.exitOnError.lower() == 'y': sys.exit(1) else: successful = True return 1
def __init__ (self, args, logger): try: self.args = args self.msg = '' self.logList = logger self.name = args['name'] self.dbFactoryArg = args['dbFactroyArg'] self.inputFormat = args['inputFormat'] self.delimiter = args['delimiter'] self.hasHeader = args['hasHeader'] #self.replaceEmptyFieldBy = args['replaceEmptyFieldBy'] if not self.args.has_key('autoCommit'): self.autocommit = False #session pool self.session = None self.nbThreads = int(args['nbThreads']) if self.nbThreads == 0: self.nbThreads = 1 if args.has_key('retries'): self.retries = int(args['retries']) else: self.retries = 5 #self.curArgs = {} #self.curArgs['cursorName'] = 'cursor1' self.metadata = {} self.fieldNames = [] self.fieldLength = [] if self.inputFormat == 'delimited': if self.args.has_key('fieldNames'): self.fieldNames = args['fieldNames'] elif self.inputFormat == 'fixedLength': if self.args.has_key('fieldNames') and self.args.has_key('fieldLength'): self.fieldNames = args['fieldNames'] self.fieldLength = args['fieldLength'] self.connection = dbFactory.dbFactory(args['dbFactroyArg'], self.logList) # map DataValue table in ODM database to DataValue python class # to utilize the magic power of sqlalchemy metadata = schema.MetaData() metadata.bind = self.connection.engine DataValues_table = schema.Table('DataValues',metadata, \ autoload=True,autoload_with=self.connection.engine) orm.mapper(DataValues,DataValues_table) Sites_table = schema.Table('Sites',metadata, \ autoload=True,autoload_with=self.connection.engine) orm.mapper(Sites,Sites_table) Variables_table = schema.Table('Variables',metadata, \ autoload=True,autoload_with=self.connection.engine) orm.mapper(Variables,Variables_table) Methods_table = schema.Table('Methods',metadata, \ autoload=True,autoload_with=self.connection.engine) orm.mapper(Methods,Methods_table) #for i in range(self.nbThreads): self.session = self.makeSession(self.connection) self.parallelize = 'n' self.nbQueriesParal = 10 ######################################################## if args.has_key('parallelize'): self.parallelize = args['parallelize'] if args.has_key('nbQueriesParal'): self.nbQueriesParal = int(args['nbQueriesParal']) except KeyError: reticLog.logError(self.logList, "( " + self.name + " ) Error during SQLSink initialization") reticLog.logError(self.logList, "( " + self.name + " ) Parameter " + str(sys.exc_info()[1]) + " is missing in sink definition" ) sys.exit(1) except: errorMessage = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0] reticLog.logError(self.logList, "( " + self.name + " ) Unknown error during initialization in sink") reticLog.logError(self.logList, "( " + self.name + " ) " + errorMessage) sys.exit(1)
################################### newSite.Latitude,newSite.Longitude = float(record['start_latitude_num']),float(record['start_longitude_num']) newSite.LatLongDatumID= 2 newSite.VerticalDatum = u'Unknown' newSite.State = u'Texas' self.session.add(newSite) self.session.flush() record['SiteID'] = newSite.SiteID else: record['SiteID'] = sites_exist.SiteID self.session.commit() reticLog.logInfo(self.logList, "( " + self.name + " ) sites info processed on sink : " + self.name) return 0 except: errorMessage = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0] reticLog.logError(self.logList, "( " + self.name + " ) Error during Sites message processing in sink : " + self.name) reticLog.logError(self.logList, "( " + self.name + " ) " + errorMessage) return 1 # ============================================================= # # # Private methods (optional) # # ============================================================= # #transform the xml/flat/other format of file into a list of record(dictionary) #return type : a list of csv records to be inserted into database def getRecordList(self): """Extraction of the fields and values to map to the SQL statement. The method returns a list of dictionnaries"""