Exemplo n.º 1
0
def main():
    #session = initDB()
    logAttDic = {#get self name
                 'name': 'httpsource testing',
                 'level': 'DEBUG',
                 'format':'Simple',
                 'handler':'ConsoleAppender',}
    logList = []
    reticLog.addLogger(logList,logAttDic)
    src_args = {}
    src_args['name'] = "testing httpsource"
    src_args['URL'] = "http://www.tceq.state.tx.us/cgi-bin/compliance/monops/crp/sampquery.pl"
    src_args['pollPeriod'] = 12.45
    src_args['params'] = [{"filetype":EVENT, "basinid":"0510","year":"2005"}]
    src_args['params'].append({"filetype":RESULT, "basinid":"0510","year":"2005"})
    sampleSource = HTTPSource.source(src_args,logList)
    sampleSource.start()
    #echoResult = []
    while(sampleSource.next()==1):
        raw_input("Content of this URL:  %s" % sampleSource.URL)
        print sampleSource.msg
        #add stringIO here
        sampleSource.commit()
    #sampleSource.commit()
    raw_input("here,more message")
    for msg in sampleSource.msg:
        print msg
Exemplo n.º 2
0
 def __init__(self, adtName, srcConfName, pipeConfName, sinkConfName, logConfName=config[0], firstTimeRun=True):
     self.adtName = adtName
     self.firstTimeRun = firstTimeRun
     # assert not logConfName
     # get log config parameter dictionary
     self.logList = []
     # initialize log(s) according to parameters.
     log_args = get_conf_attr(logConfName)
     for attDict in log_args:
         reticLog.addLogger(self.logList, attDict)
     # source object
     self.adaptorSource = HTTPSource.source(get_conf_attr(srcConfName, "source"), self.logList)
     self.interval = self.adaptorSource.interval
     # no pipe object for TCEQ
     # sink obj
     self.adaptorSink = SQLSink_forTCEQ.sink(get_conf_attr(sinkConfName, "sink"), self.logList)
Exemplo n.º 3
0
 def __init__(self, adtName,
                    srcConfName,pipeConfName,sinkConfName,
                    logConfName = config[0]):
     self.adtName = adtName
     #assert not logConfName
     #get log config parameter dictionary         
     self.logList = []
     #initialize log(s) according to parameters.
     log_args = get_conf_attr(logConfName)
     for attDict in log_args:
         reticLog.addLogger(self.logList,attDict)
     #source object
     
     self.adaptorSource = fileSource.source(get_conf_attr(srcConfName,'source'),self.logList)
     self.interval = self.adaptorSource.interval
     #pipe object
     self.adaptorPipe = toXML.pipe(get_conf_attr(pipeConfName,'pipe'),self.logList)
     #sink obj
     self.adaptorSink = SQLSink_forTPWD.sink(get_conf_attr(sinkConfName,'sink'),self.logList)      
Exemplo n.º 4
0
Arquivo: toXML.py Projeto: twdb/txhis
    logAttDic = {#get self name
                 'name':sys.argv[0].split(".")[0],
                 'level': 'DEBUG',
                 'format':'Simple',
                 'handler':'ConsoleAppender',
                }
    logAttDic_2 = {#get self name
                 'name':sys.argv[0].split(".")[0],
                 'level': 'DEBUG',
                 'format':'Simple',
                 'handler':'FileAppender',
                 'mode': 'w',
                 'fileName':'fileSourceTest.txt'
                }
    logList = []
    reticLog.addLogger(logList,logAttDic)
    reticLog.addLogger(logList,logAttDic_2)
    pipe_args = {}
    pipe_args['msgKind']='delimited'
    pipe_args['hasHeader']='y'
    pipe_args['rootTag']='test_TPWD_Emat'
    pipe_args['recTag']='test_TPWD_Emat_record'
    pipe_args['encoding']='UTF-8'
    pipe_args['delimiter']=','
    #pipe_args['fieldNames']={}
    pipe_args['fieldLength']={}
#    pipe_args['fieldNames'][0]='Source_File_Name'
#    pipe_args['fieldNames'][1]='Year'
#    pipe_args['fieldNames'][2]='Month'
#    pipe_args['fieldNames'][3]='Day'
#    pipe_args['fieldNames'][4]='Hour'
Exemplo n.º 5
0
def main():
    session = initDB()
    #logger setup. Here, simply set a consloe logger
    logAttDic = {'name': 'TCEQ sites and parameters importing for the first time',
                 'level': 'DEBUG',
                 'format':'Simple',
                 'handler':'ConsoleAppender'}
    logList = []
    reticLog.addLogger(logList,logAttDic)
    #get sites list (a text file) from an HTTPSource
    # and insert all the sites into the "Sites" table of ODM database
    siteSrc_args = {}
    siteSrc_args['name'] = "TCEQ sites httpsource"
    siteSrc_args['URL'] = "ftp://ftp.tceq.state.tx.us/pub/WaterResourceManagement/WaterQuality/DataCollection/CleanRivers/public/stations.txt"
    sitesHTTPSource = HTTPSource.source(siteSrc_args,logList)
    sitesHTTPSource.start()
    while(sitesHTTPSource.next()==1):
        print "Content of this URL:  %s" % sitesHTTPSource.URL
        sitesFile = StringIO(sitesHTTPSource.msg[0])
        sitesListReader = csv.reader(sitesFile, delimiter='|')
        for index,row in enumerate(sitesListReader):
            if index == 0:
                continue
            try:
                newRecordSiteName = row[SITENAME] if len(row[SITENAME]) <= 255 else row[SITENAME][0:255]
                where = and_(Sites.SiteCode == unicode(row[SITECODE])
                              ,Sites.Latitude == float(row[LATITUDE])
                              ,Sites.Longitude == float(row[LONGITUDE]))
                valueExist = session.query(Sites).filter(where).one()
                print "find record with SiteCode %s in database, skip it..." % row[SITECODE]
                continue
            #this site record does not exist, so insert it
            except NoResultFound, e:
                # This is for system robust
                # retries is max number of insertion times, and execOk is to show whether update is successful
                retries,execOk = 5,0                
                newSiteRecord = Sites(row[SITECODE],newRecordSiteName,
                                      float(row[LATITUDE]),float(row[LONGITUDE]),row[COUNTY], \
                                      ";".join(["HUC 8 = ",row[HUC],"EPA_Type1 = ",row[TYPE1],"EPA_Type2 = ",row[TYPE2]]))
                while retries >= 0 and execOk == 0:
                    try:
                        ############
                        newSiteRecord.SiteID = getMaxId(session,"Sites")
                        session.add(newSiteRecord)
                        session.flush()
                        execOk = 1
                    #this is the handler for some violation of unique constriant on keys
                    except exc.OperationalError:
                        print "DB constraint violation happen"
                        session.rollback()
                        retries = retries - 1
                        continue
                    #this is the handler or invalid request error
                    except exc.InvalidRequestError:
                        print "DB constraint violation happen"
                        session.rollback()
                        retries = retries - 1
                        continue
                if execOk == 0 and retries < 0:
                    raise "Database Exception : all retries failed"
                elif execOk == 1:
                    print "inert new Site record with SiteCode ==> %s" % row[SITECODE]
                else:
                    errorMessage = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0]
                    reticLog.logWarning(logList, "Sites Table in Database Update failed : " + errorMessage)
        #unit of work pattern, only commit one time
        try:                                            
            session.commit()
            reticLog.logInfo(logList, "( " + "TCEQ Sites" + " ) Update commited")                    
        except:
            session.rollback()
            reticLog.logWarning(logList, "Commit Failed in SQLSink")
        sitesHTTPSource.commit()
Exemplo n.º 6
0
                pass
        return maxid

if __name__ == '__main__':
    import time
    #place starting time stub
    start = time.clock()
    print 'testing sink....'
    config = ["..\\testAdaptor_config\\logParam.xml","..\\testAdaptor_config\\sinkParam.xml"]
    #get log config parameter dictionary 
    log_args = get_conf_attr(config[0])
    #logList parameter, parameter for source, pipe and sink
    logList = []
    #initialize log(s) according to parameters.
    for attDict in log_args:
         reticLog.addLogger(logList,attDict)
    #initialize sink 
    sink_args = get_conf_attr(config[1],'sink')   #get sink parameters
    testSink = sink(sink_args,logList)            #actual initialization
    #get the message, here we get the "dummy message" from an xml file for test purposes
    dummyPipe = file('..\\pipes\\test_TPWD_Emat_out.xml','r')  
    testSink.getMsg(dummyPipe.read())
    recordList = testSink.getRecordList()
    raw_input(str(len(recordList))+" XML records"+' generated')
    testSink.processSites(recordList)
    print "yeah, sites info processed......"
    updateList = testSink.prepareUpdateObject(recordList,testTPWDmodel.tpwdProcessInfo)
    print raw_input(str(len(updateList))+" database records"+' to be inserted')
    testSink.updateDB(updateList)
    #end time stub
    elapsed_time = time.clock() - start