def updateDB(self, objects): reticLog.logInfo(self.logList, "( " + self.name + " ) Starting update objects from sink : " + self.name) count = 0 for ob in objects : retries = self.retries execOk = 0 ob.ValueID = self.getMaxId('DV') try: where = and_(testTPWDmodel.DataValues.DataValue==ob.DataValue, testTPWDmodel.DataValues.LocalDateTime==ob.LocalDateTime, testTPWDmodel.DataValues.SiteID==ob.SiteID, testTPWDmodel.DataValues.VariableID==ob.VariableID) valueExist = self.session.query(testTPWDmodel.DataValues).filter(where).one() #this record does not exist,insert it except NoResultFound, e: while retries >= 0 and execOk == 0: try: ############ self.session.add(ob) self.session.flush() execOk = 1 #this is the handler for some violation of unique constriant on keys except exc.OperationalError: print "DB constraint violation happen" self.session.rollback() continue #execOk = 0 # retries = retries - 1 #this is the handler or invalid request error except exc.InvalidRequestError: print "DB constraint violation happen" self.session.rollback() continue #raise if execOk == 0 and retries < 0: raise "Database Exception : all retries failed" elif execOk == 1: print "recordNo == >", ob.ValueID, "generated" count += 1 else: errorMessage = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0] reticLog.logWarning(self.logList, "Database Update failed : " + errorMessage) #this record exists, skip it else: print "record skipped" continue
def updateDB(self,methodLookUpfile): reticLog.logInfo(self.logList, "( " + self.name + " ) Starting update objects from sink : " + self.name) count = 0 for key in self.basinSegmentInfo.keys(): #need this because for some years (eg: 1973), there is no data collected in result file if self.basinSegmentInfo[key].has_key(RESULT_IN_HASHTable) and len(self.basinSegmentInfo[key][RESULT_IN_HASHTable]) >= VALUE : for resultRow in self.basinSegmentInfo[key][RESULT_IN_HASHTable]: retries = self.retries execOk = 0 try: import datetime ValueID = self.getMaxId('DV') DataValue = float(resultRow[VALUE]) LocalDateTime = datetime.datetime.strptime(" ".join([self.basinSegmentInfo[key][EVENT_IN_HASHTable][DATE], self.basinSegmentInfo[key][EVENT_IN_HASHTable][TIME]]), "%m/%d/%Y %H:%M") SiteID = self.lookUpSite(self.basinSegmentInfo[key][EVENT_IN_HASHTable][SITECODE]) VariableID = self.lookUpVariableID(resultRow[VARIABLECODE]) if not self.basinSegmentInfo[key][EVENT_IN_HASHTable][OFFSETDEPTH] == "": OffsetValue = float(self.basinSegmentInfo[key][EVENT_IN_HASHTable][OFFSETDEPTH]) else: OffsetValue = float(-9999) CensorCode = u'nc' #find method id import anydbm methodDBMfile = anydbm.open(methodLookUpfile, 'r') MethodDescription = methodDBMfile[resultRow[VARIABLECODE]] MethodID = self.lookUpMethodID(MethodDescription) #for production databse: #SourceID = 1 where = and_(DataValues.DataValue == DataValue, DataValues.LocalDateTime== LocalDateTime, DataValues.SiteID== SiteID, DataValues.VariableID == VariableID, DataValues.OffsetValue == OffsetValue, DataValues.MethodID == MethodID) valueExist = self.session.query(DataValues).filter(where).one() #this DataValue record does not exist,insert it except NoResultFound, e: while retries >= 0 and execOk == 0: try: ############ newDataValueRecord = DataValues(ValueID,DataValue,LocalDateTime,SiteID,VariableID,OffsetValue,MethodID) self.session.add(newDataValueRecord) self.session.flush() execOk = 1 #this is the handler for some violation of unique constriant on keys except exc.OperationalError: print "DB constraint violation happen" self.session.rollback() #execOk = 0 # retries = retries - 1 #this is the handler or invalid request error except exc.InvalidRequestError: print "DB constraint violation happen" self.session.rollback() #raise if execOk == 0 and retries < 0: raise "Database Exception : all retries failed" elif execOk == 1: print "recordNo == >", newDataValueRecord.ValueID, "generated" count += 1 else: errorMessage = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0] reticLog.logWarning(self.logList, "Database Update failed : " + errorMessage) except TCEQRecordNotFoundError, e: errorMessage = str(e) reticLog.logWarning(self.logList, "Database Update failed : " + errorMessage) #this record exists, skip it except Exception, e: traceback.print_exc(file=sys.stdout) print resultRow raise else: print "record found, need to skip this record (may be wrong behavior....)"
print resultRow raise else: print "record found, need to skip this record (may be wrong behavior....)" #end big for loop reticLog.logInfo(self.logList, "( " + self.name + " ) Number DB record (%d) added : " % count + self.name) reticLog.logInfo(self.logList, "( " + self.name + " ) Update of Databases ended in sink : " + self.name) #unit of work pattern, only commit one time try: self.session.commit() #self.session.close() reticLog.logInfo(self.logList, "( " + self.name + " ) Update commited") except: self.session.rollback() #self.session.close() reticLog.logWarning(self.logList, "Commit Failed in SQLSink") #auto generate new record for new DataValues table record def getMaxId(self,tabFlag): maxid = 0 #for TCEQ, right now only need generate id generate DataValue if tabFlag == 'DV': try: column = DataValues.ValueID.property.columns[0] maxid = (self.session.query(func.max(column)).one()[0]) + 1 except Exception: pass return maxid
def main(): session = initDB() #logger setup. Here, simply set a consloe logger logAttDic = {'name': 'TCEQ sites and parameters importing for the first time', 'level': 'DEBUG', 'format':'Simple', 'handler':'ConsoleAppender'} logList = [] reticLog.addLogger(logList,logAttDic) #get sites list (a text file) from an HTTPSource # and insert all the sites into the "Sites" table of ODM database siteSrc_args = {} siteSrc_args['name'] = "TCEQ sites httpsource" siteSrc_args['URL'] = "ftp://ftp.tceq.state.tx.us/pub/WaterResourceManagement/WaterQuality/DataCollection/CleanRivers/public/stations.txt" sitesHTTPSource = HTTPSource.source(siteSrc_args,logList) sitesHTTPSource.start() while(sitesHTTPSource.next()==1): print "Content of this URL: %s" % sitesHTTPSource.URL sitesFile = StringIO(sitesHTTPSource.msg[0]) sitesListReader = csv.reader(sitesFile, delimiter='|') for index,row in enumerate(sitesListReader): if index == 0: continue try: newRecordSiteName = row[SITENAME] if len(row[SITENAME]) <= 255 else row[SITENAME][0:255] where = and_(Sites.SiteCode == unicode(row[SITECODE]) ,Sites.Latitude == float(row[LATITUDE]) ,Sites.Longitude == float(row[LONGITUDE])) valueExist = session.query(Sites).filter(where).one() print "find record with SiteCode %s in database, skip it..." % row[SITECODE] continue #this site record does not exist, so insert it except NoResultFound, e: # This is for system robust # retries is max number of insertion times, and execOk is to show whether update is successful retries,execOk = 5,0 newSiteRecord = Sites(row[SITECODE],newRecordSiteName, float(row[LATITUDE]),float(row[LONGITUDE]),row[COUNTY], \ ";".join(["HUC 8 = ",row[HUC],"EPA_Type1 = ",row[TYPE1],"EPA_Type2 = ",row[TYPE2]])) while retries >= 0 and execOk == 0: try: ############ newSiteRecord.SiteID = getMaxId(session,"Sites") session.add(newSiteRecord) session.flush() execOk = 1 #this is the handler for some violation of unique constriant on keys except exc.OperationalError: print "DB constraint violation happen" session.rollback() retries = retries - 1 continue #this is the handler or invalid request error except exc.InvalidRequestError: print "DB constraint violation happen" session.rollback() retries = retries - 1 continue if execOk == 0 and retries < 0: raise "Database Exception : all retries failed" elif execOk == 1: print "inert new Site record with SiteCode ==> %s" % row[SITECODE] else: errorMessage = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0] reticLog.logWarning(logList, "Sites Table in Database Update failed : " + errorMessage) #unit of work pattern, only commit one time try: session.commit() reticLog.logInfo(logList, "( " + "TCEQ Sites" + " ) Update commited") except: session.rollback() reticLog.logWarning(logList, "Commit Failed in SQLSink") sitesHTTPSource.commit()