def blob2text(self): #self.table = 'IngestObjectMetadata' res, errno, why = ESSDB.DB().action( 'IngestObjectMetadata', 'GET3', ('ObjectIdentifierValue', 'ObjectMetadataType', 'ObjectMetadataBLOB'), ('id', 9)) if errno: print 'why', why print res[0][2] file = open('/ESSArch/bin/src/testdata/X0000003_MySQL.res', 'wb') file.write(res[0][2]) file.close() #self.table = 'IngestObjectMetadata' res, errno, why = ESSMSSQL.DB().action( 'IngestObjectMetadata', 'GET3', ('ObjectIdentifierValue', 'ObjectMetadataType', 'ObjectMetadataBLOB'), ('ObjectIdentifierValue', 'X0000003', 'AND', 'ObjectMetadataType', 28)) if errno: print 'why', why for i in res: print i[2] file = open('/ESSArch/bin/src/testdata/X0000003_MsSQL.res', 'wb') file.write(res[0][2]) file.close()
def blob2ftp(self): AgentIdentifierValue = 'ESSArch_Marieberg' self.table = 'IngestObjectMetadata_old' res, errno, why = ESSDB.DB().action( self.table, 'GET4', ('id', ), ('ObjectMetadataServer', 'IS', 'NULL')) if errno: print 'Error: why', why else: print 'Found all metadataobject id (number: %s), res: %s' % (str( len(res)), str(res)) for i in res: IngestObjectMetadata_row, errno, why = ESSDB.DB().action( self.table, 'GET4', ('id', 'ObjectUUID', 'ObjectIdentifierValue', 'ObjectMetadataBLOB'), ('id', '=', i[0])) if errno: print 'Error1: why', why else: Cmets_objpath = '/store/metablob/%s_Content_METS.xml' % str( IngestObjectMetadata_row[0][2]) print('Store blob for rowid: %s object: %s to filename: %s' ) % (str(IngestObjectMetadata_row[0][0]), str(IngestObjectMetadata_row[0][2]), Cmets_objpath) file = open(Cmets_objpath, 'wb') file.write(IngestObjectMetadata_row[0][3]) file.close() res, errno, why = prod().StoreMetadataBlob( ObjectUUID=IngestObjectMetadata_row[0][1], ObjectIdentifierValue=IngestObjectMetadata_row[0][2], ObjectMetadataType=26, FILENAME=Cmets_objpath, FTPflag=1, DBflag=0, AgentIdentifierValue=AgentIdentifierValue) if errno: print 'Error2: why', why
Debug = 1 ProcName = 'FTPServer' ProcVersion = __version__ #LogLevel = logging.INFO LogLevel = logging.DEBUG #LogLevel = multiprocessing.SUBDEBUG MultiProc = 0 Console = 0 if len(sys.argv) > 1: if sys.argv[1] == '-d': Debug = 1 if sys.argv[1] == '-v' or sys.argv[1] == '-V': print ProcName, 'Version', ProcVersion sys.exit() LogFile, Time, Status, Run = ESSDB.DB().action( 'ESSProc', 'GET', ('LogFile', 'Time', 'Status', 'Run'), ('Name', ProcName))[0] ########################## # Log format if MultiProc: formatter = logging.Formatter( '%(asctime)s %(levelname)s/%(processName)-8s %(message)s', '%d %b %Y %H:%M:%S') formatter2 = logging.Formatter( '%(levelname)s/%(processName)-8s %(message)s', '%d %b %Y %H:%M:%S') else: formatter = logging.Formatter( '%(asctime)s %(levelname)-8s %(message)s', '%d %b %Y %H:%M:%S') formatter2 = logging.Formatter('%(levelname)-8s %(message)s', '%d %b %Y %H:%M:%S')
def ThreadMain(self, ProcName): logging.info('Starting ' + ProcName) self.tz = timezone.get_default_timezone() self.ChecksumAlgorithm_CHOICES_dict = dict(ChecksumAlgorithm_CHOICES) self.ChecksumAlgorithm_CHOICES_invdict = ESSPGM.Check().invert_dict( self.ChecksumAlgorithm_CHOICES_dict) while 1: if self.mDieFlag == 1: break # Request for death self.mLock.acquire() self.Time, self.Run = ESSDB.DB().action('ESSProc', 'GET', ('Time', 'Run'), ('Name', ProcName))[0] if self.Run == '0': logging.info('Stopping ' + ProcName) ESSDB.DB().action('ESSProc', 'UPD', ('Status', '0', 'Run', '0', 'PID', '0'), ('Name', ProcName)) self.RunFlag = 0 self.mLock.release() if Debug: logging.info('RunFlag: 0') time.sleep(2) continue # Process Item lock = thread.allocate_lock() self.IngestTable = ESSDB.DB().action('ESSConfig', 'GET', ('Value', ), ('Name', 'IngestTable'))[0][0] if ExtDBupdate: self.ext_IngestTable = self.IngestTable else: self.ext_IngestTable = '' #if Debug: logging.info('Start to list worklist (self.dbget)') self.dbget, errno, why = ESSDB.DB().action( self.IngestTable, 'GET4', ('ObjectIdentifierValue', 'ObjectPackageName', 'PolicyId', 'MetaObjectIdentifier', 'MetaObjectSize', 'DataObjectSize', 'ObjectSize', 'ObjectUUID'), ('StatusActivity', '=', '0', 'AND', 'StatusProcess', 'BETWEEN', 49, 'AND', 51)) if errno: logging.error('Failed to access Local DB, error: ' + str(why)) for self.obj in self.dbget: self.ProcDB = ESSDB.DB().action('ESSProc', 'GET', ('Run', 'Pause'), ('Name', ProcName))[0] if self.ProcDB[0] == '0': logging.info('Stopping ' + ProcName) ESSDB.DB().action('ESSProc', 'UPD', ('Status', '0', 'Run', '0', 'PID', '0'), ('Name', ProcName)) thread.interrupt_main() time.sleep(5) break elif self.ProcDB[1] == 1: while 1: time.sleep(60) self.ProcDB = ESSDB.DB().action( 'ESSProc', 'GET', ('Run', 'Pause'), ('Name', ProcName))[0] if self.ProcDB[1] == 1: logging.info('Process is in pause state') else: break self.ObjectIdentifierValue = self.obj[0] self.ObjectPackageName = self.obj[1] self.PolicyId = self.obj[2] #self.MetaObjectIdentifier = self.obj[3] #METS filename #self.MetaObjectSize = self.obj[4] #METS size (bytes) #self.DataObjectSize = self.obj[5] self.ObjectSize = self.obj[6] self.ObjectUUID = self.obj[7] logging.debug('self.obj: ' + str(self.obj)) self.ok = 1 ArchivePolicy_obj = ArchivePolicy.objects.get( PolicyStat=1, PolicyID=self.PolicyId) if self.ok: ########################################################### # set variables self.AIPpath = ArchivePolicy_obj.AIPpath self.metatype = ArchivePolicy_obj.IngestMetadata self.ChecksumAlgorithm = ArchivePolicy_obj.ChecksumAlgorithm self.SIPpath = ArchivePolicy_obj.IngestPath self.ValidateChecksum = ArchivePolicy_obj.ValidateChecksum self.ValidateXML = ArchivePolicy_obj.ValidateXML self.ObjectPath = os.path.join(self.AIPpath, self.ObjectPackageName) self.Pmets_objpath = os.path.join( self.AIPpath, self.ObjectIdentifierValue + '_Package_METS.xml') if self.metatype > 0: self.startVerTime = datetime.timedelta( seconds=time.localtime()[5], minutes=time.localtime()[4], hours=time.localtime()[3]) errno, why = ESSPGM.DB().SetAIPstatus( self.IngestTable, self.ext_IngestTable, AgentIdentifierValue, self.ObjectUUID, 50, 5) if errno: logging.error('Failed to update DB status for AIP: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) logging.info('Start validate AIP package: ' + self.ObjectIdentifierValue) self.Cmets_obj = None self.Cmets_objpath = None self.premis_obj = None self.premis_objpath = None self.addml_obj = None self.addml_objpath = None ########################################## # Get PMETS info if self.ok: [self.Package_info, self.CMets_info], errno, why = ESSMD.getPMETSInfo( FILENAME=self.Pmets_objpath) if errno: self.event_info = 'Failed to get PMETS info for object: %s, errno: %s, why: %s' % ( self.ObjectIdentifierValue, str(errno), str(why)) logging.error(self.event_info) ESSPGM.Events().create('1050', '', 'ESSArch AIPValidate', ProcVersion, '1', self.event_info, 2, self.ObjectIdentifierValue) self.ok = 0 else: logging.debug( 'Object: %s, Package_info: %s, CMets_info: %s', self.ObjectIdentifierValue, str(self.Package_info), str(self.CMets_info)) # CMets_info and Package_info: ['A0007600_Content_METS.xml', 'MD5', 'b0270cb4d196b72b87fe27ce6242df18', 64058, 'text/xml'] #if self.Package_info[1] == 'MD5': self.PackageMessageDigestAlgorithm = 1 self.PackageMessageDigestAlgorithm = self.ChecksumAlgorithm_CHOICES_invdict[ self.Package_info[1]] if self.Package_info[2]: self.PackageMessageDigest = self.Package_info[ 2] if self.Package_info[3]: self.PackageSize = int(self.Package_info[3]) self.Cmets_obj = self.CMets_info[0] if self.metatype in [1, 2, 3]: self.Cmets_objpath = os.path.join( self.AIPpath, self.Cmets_obj) elif self.metatype in [4]: self.Cmets_objpath = os.path.join( self.SIPpath, self.Cmets_obj) #if self.CMets_info[1] == 'MD5': self.CMetsMessageDigestAlgorithm = 1 self.CMetsMessageDigestAlgorithm = self.ChecksumAlgorithm_CHOICES_invdict[ self.CMets_info[1]] if self.CMets_info[2]: self.CMetsMessageDigest = self.CMets_info[2] if self.CMets_info[3]: self.CMetsSize = int(self.CMets_info[3]) ########################################################## # Check if ObjectPath and Cmets_objpath exist if os.path.exists(self.ObjectPath) and os.path.exists( self.Cmets_objpath): self.ok = 1 else: self.event_info = 'The path to Object: %s or METS_Metaobject: %s is not accessible!' % ( self.ObjectPath, self.Cmets_objpath) logging.error(self.event_info) ESSPGM.Events().create('1050', '', 'ESSArch AIPValidate', ProcVersion, '1', self.event_info, 2, self.ObjectIdentifierValue) self.ok = 0 ########################################## # Get MetsFgrp001TotalSize from METSfile if self.ok: self.MetsFgrp001TotalSize, errno, why = ESSMD.getFileSizeFgrp001( FILENAME=self.Cmets_objpath) if errno: self.event_info = 'Failed to get MetsFgrp001TotalSize for object: %s, errno: %s, why: %s' % ( self.ObjectIdentifierValue, str(errno), str(why)) logging.error(self.event_info) ESSPGM.Events().create('1050', '', 'ESSArch AIPValidate', ProcVersion, '1', self.event_info, 2, self.ObjectIdentifierValue) self.ok = 0 else: logging.debug( 'Object: %s, MetsFgrp001TotalSize: %s', self.ObjectIdentifierValue, str(self.MetsFgrp001TotalSize)) ########################################## # Get PremisObjectTotalSize from METSfile if self.ok: if self.metatype in [1, 2, 3]: self.premis_objpath = self.Cmets_objpath elif self.metatype in [4]: # Get metadata from METS file res_info, res_files, res_struct, errno, why = ESSMD.getMETSFileList( FILENAME=self.Cmets_objpath) if errno: self.event_info = 'Failed to get metadata from content METS for object: %s, errno: %s, why: %s' % ( self.ObjectIdentifierValue, str(errno), str(why)) logging.error(self.event_info) ESSPGM.Events().create( '1050', '', 'ESSArch AIPValidate', ProcVersion, '1', self.event_info, 2, self.ObjectIdentifierValue) self.ok = 0 else: for res_file in res_files: if res_file[0] == 'amdSec' and res_file[ 2] == 'digiprovMD': self.premis_obj = res_file[8][5:] self.premis_objpath = '%s/%s/%s' % ( self.SIPpath, self.ObjectIdentifierValue, self.premis_obj) elif res_file[0] == 'amdSec' and res_file[ 2] == 'techMD' and res_file[ 16] == 'ADDML': self.addml_obj = res_file[8][5:] self.addml_objpath = '%s/%s/%s' % ( self.SIPpath, self.ObjectIdentifierValue, self.addml_obj) self.PremisObjectTotalSize, errno, why = ESSMD.getFileSizePremis( FILENAME=self.premis_objpath) if errno: self.event_info = 'Failed to get PREMISobjects total size in METSfile for object: %s, errno: %s, why: %s' % ( self.ObjectIdentifierValue, str(errno), str(why)) logging.error(self.event_info) ESSPGM.Events().create('1050', '', 'ESSArch AIPValidate', ProcVersion, '1', self.event_info, 2, self.ObjectIdentifierValue) self.ok = 0 else: logging.debug( 'Object: %s, PremisObjectTotalSize: %s', self.ObjectIdentifierValue, str(self.PremisObjectTotalSize)) ################################################### # Check if MetsFgrp001TotalSize is equal to PremisObjectTotalSize if self.ok: if self.MetsFgrp001TotalSize == self.PremisObjectTotalSize: logging.info( 'Succeeded to verify METS Fgrp001 and PREMIS for object: %s, MetsFgrp001TotalSize: %s is equal to PremisObjectTotalSize: %s', self.ObjectIdentifierValue, str(self.MetsFgrp001TotalSize), str(self.PremisObjectTotalSize)) else: self.event_info = 'Failed to verify METS Fgrp001 and PREMIS for object: %s, MetsFgrp001TotalSize: %s is not equal to PremisObjectTotalSize: %s' % ( self.ObjectIdentifierValue, str(self.MetsFgrp001TotalSize), str(self.PremisObjectTotalSize)) logging.error(self.event_info) ESSPGM.Events().create('1050', '', 'ESSArch AIPValidate', ProcVersion, '1', self.event_info, 2, self.ObjectIdentifierValue) self.ok = 0 ########################################## # Get CMetsTotalSize if self.ok: self.CMetsTotalSize, errno, why = ESSMD.getTotalSize( FILENAME=self.Cmets_objpath) if errno: self.event_info = 'Failed to get CMetsTotalSize for object: %s, errno: %s, why: %s' % ( self.ObjectIdentifierValue, str(errno), str(why)) logging.error(self.event_info) ESSPGM.Events().create('1050', '', 'ESSArch AIPValidate', ProcVersion, '1', self.event_info, 2, self.ObjectIdentifierValue) self.ok = 0 else: logging.debug('Object: %s, raw CMetsTotalSize: %s', self.ObjectIdentifierValue, str(self.CMetsTotalSize)) if self.CMetsSize: # Add Content Mets filesize self.CMetsTotalSize[0] += 1 self.CMetsTotalSize[1] += self.CMetsSize logging.debug('Object: %s, CMetsTotalSize: %s', self.ObjectIdentifierValue, str(self.CMetsTotalSize)) ########################################## # Get TarFileSize if self.ok: self.TarFileSize, errno, why = ESSPGM.Check( ).getFileSizeTAR(self.ObjectPath) if errno: self.event_info = 'Failed to get TarFileSize for object: %s, errno: %s, why: %s' % ( self.ObjectIdentifierValue, str(errno), str(why)) logging.error(self.event_info) ESSPGM.Events().create('1050', '', 'ESSArch AIPValidate', ProcVersion, '1', self.event_info, 2, self.ObjectIdentifierValue) self.ok = 0 else: logging.debug('Object: %s, TarFileSize: %s', self.ObjectIdentifierValue, str(self.TarFileSize)) ################################################### # Check if CMetsTotalSize is equal to TarFileSize if self.ok: if self.CMetsTotalSize == self.TarFileSize: logging.info( 'Succeeded to verify total package size for object: %s, CMetsTotalSize: %s is equal to TarFileSize: %s', self.ObjectIdentifierValue, str(self.CMetsTotalSize), str(self.TarFileSize)) else: self.event_info = 'Failed to verify total package size for object: %s, CMetsTotalSize: %s is not equal to TarFileSize: %s' % ( self.ObjectIdentifierValue, str(self.CMetsTotalSize), str( self.TarFileSize)) logging.error(self.event_info) ESSPGM.Events().create('1050', '', 'ESSArch AIPValidate', ProcVersion, '1', self.event_info, 2, self.ObjectIdentifierValue) self.ok = 0 ######################## # get checksum for PMETS file if self.ok: self.PMetsMessageDigestAlgorithm = self.ChecksumAlgorithm self.PMetsMessageDigest = '' self.Pmets_obj_checksum, errno, why = ESSPGM.Check( ).checksum( self.Pmets_objpath, self.PMetsMessageDigestAlgorithm) # Checksum if errno: self.event_info = 'Failed to get checksum for: %s, Error: %s' % ( self.Cmets_objpath, str(why)) logging.error(self.event_info) ESSPGM.Events().create('1050', '', 'ESSArch AIPValidate', ProcVersion, '1', self.event_info, 2, self.ObjectIdentifierValue) self.ok = 0 else: logging.debug('Checksum for PMETS file: %s', self.Pmets_obj_checksum) self.PMetsMessageDigestAlgorithm = 1 self.PMetsMessageDigest = self.Pmets_obj_checksum ######################## # get checksum for CMETS file if self.ok: self.Cmets_obj_checksum, errno, why = ESSPGM.Check( ).checksum( self.Cmets_objpath, self.CMetsMessageDigestAlgorithm) # Checksum if errno: self.event_info = 'Failed to get checksum for: %s, Error: %s' % ( self.Cmets_objpath, str(why)) logging.error(self.event_info) ESSPGM.Events().create('1050', '', 'ESSArch AIPValidate', ProcVersion, '1', self.event_info, 2, self.ObjectIdentifierValue) self.ok = 0 else: logging.debug('Checksum for CMETS file: %s', self.Cmets_obj_checksum) ################################################### # Check CMetsMessageDigest if self.ok: if self.ValidateChecksum: if self.CMetsMessageDigest == self.Cmets_obj_checksum: logging.info( 'Succeeded to verify Content Mets MessageDigest for object: %s', self.ObjectIdentifierValue) else: self.event_info = 'Failed to verify Content Mets MessageDigest for object: %s, CMetsMessageDigest: %s is equal to FileMessageDigest: %s' % ( self.ObjectIdentifierValue, str(self.CMetsMessageDigest), str(self.Cmets_obj_checksum)) logging.error(self.event_info) ESSPGM.Events().create( '1050', '', 'ESSArch AIPValidate', ProcVersion, '1', self.event_info, 2, self.ObjectIdentifierValue) self.ok = 0 else: logging.warning( 'Checksum validate is disabled for object: %s', self.ObjectIdentifierValue) if self.ValidateXML: ######################## # XML Schema Validate CMETS file if self.ok: errno, why = ESSMD.validate( FILENAME=self.Cmets_objpath) if errno: self.event_info = 'Failed to schema validate Content METS file for object: %s, why: %s' % ( self.ObjectIdentifierValue, str(why)) logging.error(self.event_info) ESSPGM.Events().create( '1050', '', 'ESSArch AIPValidate', ProcVersion, '1', self.event_info, 2, self.ObjectIdentifierValue) self.ok = 0 else: logging.info( 'Succeeded to schema validate Content METS file for object: %s', self.ObjectIdentifierValue) ######################## # XML Schema Validate PMETS file if self.ok: errno, why = ESSMD.validate( FILENAME=self.Pmets_objpath) if errno: self.event_info = 'Failed to schema validate Package METS file for object: %s, why: %s' % ( self.ObjectIdentifierValue, str(why)) logging.error(self.event_info) ESSPGM.Events().create( '1050', '', 'ESSArch AIPValidate', ProcVersion, '1', self.event_info, 2, self.ObjectIdentifierValue) self.ok = 0 else: logging.info( 'Succeeded to schema validate Package METS file for object: %s', self.ObjectIdentifierValue) ######################## # XML Schema Validate PREMIS file if self.ok and self.premis_obj: errno, why = ESSMD.validate( FILENAME=self.premis_objpath) if errno: self.event_info = 'Failed to schema validate PREMIS file for object: %s, why: %s' % ( self.ObjectIdentifierValue, str(why)) logging.error(self.event_info) ESSPGM.Events().create( '1050', '', 'ESSArch AIPValidate', ProcVersion, '1', self.event_info, 2, self.ObjectIdentifierValue) self.ok = 0 else: logging.info( 'Succeeded to schema validate PREMIS file for object: %s', self.ObjectIdentifierValue) ######################## # XML Schema Validate ADDML file if self.ok and self.addml_obj: errno, why = ESSMD.validate( FILENAME=self.addml_objpath) if errno: self.event_info = 'Failed to schema validate ADDML file for object: %s, why: %s' % ( self.ObjectIdentifierValue, str(why)) logging.error(self.event_info) ESSPGM.Events().create( '1050', '', 'ESSArch AIPValidate', ProcVersion, '1', self.event_info, 2, self.ObjectIdentifierValue) self.ok = 0 else: logging.info( 'Succeeded to schema validate ADDML file for object: %s', self.ObjectIdentifierValue) else: logging.warning( 'Schema validate XML is disabled for object: %s', self.ObjectIdentifierValue) self.stopVerTime = datetime.timedelta( seconds=time.localtime()[5], minutes=time.localtime()[4], hours=time.localtime()[3]) self.VerTime = self.stopVerTime - self.startVerTime if self.VerTime.seconds < 1: self.VerTime = datetime.timedelta( seconds=1 ) #Fix min time to 1 second if it is zero. self.ObjectSizeMB = int(self.ObjectSize) / 1048576 self.VerMBperSEC = int(self.ObjectSizeMB) / int( self.VerTime.seconds) ################################## # Write metadatafiles to DB-blob or FTP if self.ok: if ESSDB.DB().action('ESSConfig', 'GET', ('Value', ), ('Name', 'MD_FTP_HOST'))[0][0]: if self.Cmets_obj: ################################## # Write CMETS metadatafile to DB-blob res, errno, why = ESSmetablob.prod( ).StoreMetadataBlob( ObjectUUID=self.ObjectUUID, ObjectIdentifierValue=self. ObjectIdentifierValue, ObjectMetadataType=26, FILENAME=self.Cmets_objpath, FTPFileName=string.replace( self.Cmets_obj, self.ObjectIdentifierValue + '/', ''), FTPflag=1, DBflag=0, AgentIdentifierValue=AgentIdentifierValue) if errno: self.event_info = 'Failed to store Content METS file to FTP server or DB-blob: %s, errno: %s, why: %s' % ( self.ObjectIdentifierValue, str(errno), str(why)) logging.error(self.event_info) ESSPGM.Events().create( '1050', '', 'ESSArch AIPValidate', ProcVersion, '1', self.event_info, 2, self.ObjectIdentifierValue) self.ok = 0 else: logging.info( 'Succeeded to store Content METS file to FTP server or DB-blob for object: %s', self.ObjectIdentifierValue) if self.premis_obj: ################################## # Write PREMIS metadatafile to DB-blob res, errno, why = ESSmetablob.prod( ).StoreMetadataBlob( ObjectUUID=self.ObjectUUID, ObjectIdentifierValue=self. ObjectIdentifierValue, ObjectMetadataType=27, FILENAME=self.premis_objpath, FTPFileName=self.premis_obj, FTPflag=1, DBflag=0, AgentIdentifierValue=AgentIdentifierValue) if errno: self.event_info = 'Failed to store PREMIS file to FTP server or DB-blob: %s, errno: %s, why: %s' % ( self.ObjectIdentifierValue, str(errno), str(why)) logging.error(self.event_info) ESSPGM.Events().create( '1050', '', 'ESSArch AIPValidate', ProcVersion, '1', self.event_info, 2, self.ObjectIdentifierValue) self.ok = 0 else: logging.info( 'Succeeded to store PREMIS file to FTP server or DB-blob for object: %s', self.ObjectIdentifierValue) if self.addml_obj: ################################## # Write ADDML metadatafile to DB-blob res, errno, why = ESSmetablob.prod( ).StoreMetadataBlob( ObjectUUID=self.ObjectUUID, ObjectIdentifierValue=self. ObjectIdentifierValue, ObjectMetadataType=25, FILENAME=self.addml_objpath, FTPFileName=self.addml_obj, FTPflag=1, DBflag=0, AgentIdentifierValue=AgentIdentifierValue) if errno: self.event_info = 'Failed to store ADDML file to FTP server or DB-blob: %s, errno: %s, why: %s' % ( self.ObjectIdentifierValue, str(errno), str(why)) logging.error(self.event_info) ESSPGM.Events().create( '1050', '', 'ESSArch AIPValidate', ProcVersion, '1', self.event_info, 2, self.ObjectIdentifierValue) self.ok = 0 else: logging.info( 'Succeeded to store ADDML file to FTP server or DB-blob for object: %s', self.ObjectIdentifierValue) else: logging.info( 'Skip to store metadata to FTP server or DB-blob for object: %s', self.ObjectIdentifierValue) if not self.ok: errno, why = ESSPGM.DB().SetAIPstatus( self.IngestTable, self.ext_IngestTable, AgentIdentifierValue, self.ObjectUUID, 51, 4) if errno: logging.error( 'Failed to update DB status for AIP: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) else: self.event_info = 'Failed to validate AIP package: ' + self.ObjectIdentifierValue logging.error(self.event_info) ESSPGM.Events().create('1050', '', 'ESSArch AIPValidate', ProcVersion, '1', self.event_info, 2, self.ObjectIdentifierValue) else: logging.info('Succeeded to validate AIP package: ' + self.ObjectIdentifierValue + ' , ' + str(self.VerMBperSEC) + ' MB/Sec and Time: ' + str(self.VerTime)) self.timestamp_utc = datetime.datetime.utcnow( ).replace(microsecond=0, tzinfo=pytz.utc) self.timestamp_dst = self.timestamp_utc.astimezone( self.tz) res, errno, why = ESSDB.DB().action( self.IngestTable, 'UPD', ('StatusProcess', '59', 'StatusActivity', '0', 'CMetaMessageDigestAlgorithm', self.CMetsMessageDigestAlgorithm, 'CMetaMessageDigest', self.CMetsMessageDigest, 'PMetaMessageDigestAlgorithm', self.PMetsMessageDigestAlgorithm, 'PMetaMessageDigest', self.PMetsMessageDigest, 'LastEventDate', self.timestamp_utc.replace(tzinfo=None), 'linkingAgentIdentifierValue', AgentIdentifierValue, 'LocalDBdatetime', self.timestamp_utc.replace(tzinfo=None)), ('ObjectIdentifierValue', self.ObjectIdentifierValue)) if errno: logging.error('Failed to update Local DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) else: ESSPGM.Events().create('1050', '', 'ESSArch AIPValidate', ProcVersion, '0', '', 2, self.ObjectIdentifierValue) if errno == 0 and ExtDBupdate: ext_res, ext_errno, ext_why = ESSMSSQL.DB().action( self.IngestTable, 'UPD', ('StatusProcess', '59', 'StatusActivity', '0', 'CMetaMessageDigestAlgorithm', self.CMetsMessageDigestAlgorithm, 'CMetaMessageDigest', self.CMetsMessageDigest, 'PMetaMessageDigestAlgorithm', self.PMetsMessageDigestAlgorithm, 'PMetaMessageDigest', self.PMetsMessageDigest, 'LastEventDate', self.timestamp_dst.replace(tzinfo=None), 'linkingAgentIdentifierValue', AgentIdentifierValue), ('ObjectIdentifierValue', self.ObjectIdentifierValue)) if ext_errno: logging.error( 'Failed to update External DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(ext_why)) else: res, errno, why = ESSDB.DB().action( self.IngestTable, 'UPD', ('ExtDBdatetime', self.timestamp_utc.replace(tzinfo=None)), ('ObjectIdentifierValue', self.ObjectIdentifierValue)) if errno: logging.error( 'Failed to update Local DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) elif self.metatype == 0: #self.metatype 0 = No metadata logging.info('Skip to validate AIP package: ' + self.ObjectIdentifierValue) self.timestamp_utc = datetime.datetime.utcnow().replace( microsecond=0, tzinfo=pytz.utc) self.timestamp_dst = self.timestamp_utc.astimezone(self.tz) res, errno, why = ESSDB.DB().action( self.IngestTable, 'UPD', ('StatusProcess', '59', 'StatusActivity', '0', 'CMetaMessageDigestAlgorithm', '0', 'PMetaMessageDigestAlgorithm', '0', 'LastEventDate', self.timestamp_utc.replace(tzinfo=None), 'linkingAgentIdentifierValue', AgentIdentifierValue, 'LocalDBdatetime', self.timestamp_utc.replace(tzinfo=None)), ('ObjectIdentifierValue', self.ObjectIdentifierValue)) if errno: logging.error('Failed to update Local DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) else: ESSPGM.Events().create('1050', '', 'ESSArch AIPValidate', ProcVersion, '0', 'Skip to validate AIP package', 2, self.ObjectIdentifierValue) if errno == 0 and ExtDBupdate: ext_res, ext_errno, ext_why = ESSMSSQL.DB().action( self.IngestTable, 'UPD', ('StatusProcess', '59', 'StatusActivity', '0', 'CMetaMessageDigestAlgorithm', '0', 'PMetaMessageDigestAlgorithm', '0', 'LastEventDate', self.timestamp_dst.replace(tzinfo=None), 'linkingAgentIdentifierValue', AgentIdentifierValue), ('ObjectIdentifierValue', self.ObjectIdentifierValue)) if ext_errno: logging.error('Failed to update External DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(ext_why)) else: res, errno, why = ESSDB.DB().action( self.IngestTable, 'UPD', ('ExtDBdatetime', self.timestamp_utc.replace(tzinfo=None)), ('ObjectIdentifierValue', self.ObjectIdentifierValue)) if errno: logging.error('Failed to update Local DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) db.close_old_connections() self.mLock.release() time.sleep(int(self.Time)) self.mDieFlag = 0
def ThreadMain(self, ProcName): logging.info('Starting ' + ProcName) while 1: if self.mDieFlag == 1: break # Request for death self.mLock.acquire() self.Time, self.Run = ESSProc.objects.filter( Name=ProcName).values_list('Time', 'Run')[0] if self.Run == '0': logging.info('Stopping ' + ProcName) ESSProc.objects.filter(Name=ProcName).update(Status='0', Run='0', PID=0) self.RunFlag = 0 self.mLock.release() if Debug: logging.info('RunFlag: 0') time.sleep(2) continue # Process Item lock = thread.allocate_lock() self.IngestTable = ESSDB.DB().action('ESSConfig', 'GET', ('Value', ), ('Name', 'IngestTable'))[0][0] if ExtDBupdate: self.ext_IngestTable = self.IngestTable else: self.ext_IngestTable = '' if Debug: logging.info('Start to list worklist (self.dbget)') self.dbget, errno, why = ESSDB.DB().action( self.IngestTable, 'GET4', ('ObjectUUID', 'ObjectIdentifierValue', 'PolicyId', 'DataObjectSize'), ('StatusProcess', 'BETWEEN', 59, 'AND', 61, 'AND', 'StatusActivity', '=', 0)) if errno: logging.error('Failed to access Local DB, error: ' + str(why)) for self.obj in self.dbget: if ESSProc.objects.get(Name=ProcName).Run == '0': logging.info('Stopping ' + ProcName) ESSProc.objects.filter(Name=ProcName).update(Status='0', Run='0', PID=0) thread.interrupt_main() break self.ObjectUUID = self.obj[0] self.ObjectIdentifierValue = self.obj[1] self.PolicyId = self.obj[2] self.DataObjectSize = self.obj[3] ArchivePolicy_obj = ArchivePolicy.objects.get( PolicyStat=1, PolicyID=self.PolicyId) self.metatype = ArchivePolicy_obj.IngestMetadata self.RemoveFlag = ArchivePolicy_obj.IngestDelete self.IngestPath = ArchivePolicy_obj.IngestPath self.dirpath = os.path.join(self.IngestPath, self.ObjectIdentifierValue) if Debug: logging.info('self.obj: ' + str(self.obj)) logging.info( 'InPath (IngestPath + self.ObjectIdentifierValue): ' + str(self.dirpath)) #self.RemoveFlag='1' #If self.RemoveFlag = 1 then remove self.dirpath if self.RemoveFlag == 1: self.startTime = datetime.timedelta( seconds=time.localtime()[5], minutes=time.localtime()[4], hours=time.localtime()[3]) errno, why = ESSPGM.DB().SetAIPstatus( self.IngestTable, self.ext_IngestTable, AgentIdentifierValue, self.ObjectUUID, 60, 5) if errno: logging.error('Failed to update DB status for AIP: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) logging.info('Try to remove IngestObjectPath: ' + self.dirpath) try: shutil.rmtree(self.dirpath) if self.metatype == 1: os.remove( os.path.join( self.IngestPath, self.ObjectIdentifierValue + '.tar')) os.remove( os.path.join( self.IngestPath, self.ObjectIdentifierValue + '_Content_METS.xml')) os.remove( os.path.join( self.IngestPath, self.ObjectIdentifierValue + '_Package_METS.xml')) except (IOError, os.error), why: errno, why = ESSPGM.DB().SetAIPstatus( self.IngestTable, self.ext_IngestTable, AgentIdentifierValue, self.ObjectUUID, 61, 4) if errno: logging.error( 'Failed to update DB status for AIP: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) else: self.event_info = 'Problem to remove IngestObjectPath: %s, Error: %s' % ( self.dirpath, str(why)) logging.error(self.event_info) ESSPGM.Events().create('1060', '', 'ESSArch SIPRemove', ProcVersion, '1', self.event_info, 2, self.ObjectIdentifierValue) else: self.stopTime = datetime.timedelta( seconds=time.localtime()[5], minutes=time.localtime()[4], hours=time.localtime()[3]) self.ProcTime = self.stopTime - self.startTime if self.ProcTime.seconds < 1: self.ProcTime = datetime.timedelta( seconds=1 ) #Fix min time to 1 second if it is zero. self.DataObjectSizeMB = self.DataObjectSize / 1048576 self.ProcMBperSEC = int(self.DataObjectSizeMB) / int( self.ProcTime.seconds) logging.info('Succeeded to remove IngestObjectPath: ' + self.dirpath + ' , ' + str(self.ProcMBperSEC) + ' MB/Sec and Time: ' + str(self.ProcTime)) errno, why = ESSPGM.DB().SetAIPstatus( self.IngestTable, self.ext_IngestTable, AgentIdentifierValue, self.ObjectUUID, 69, 0) if errno: logging.error( 'Failed to update DB status for AIP: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) else: ESSPGM.Events().create('1060', '', 'ESSArch SIPRemove', ProcVersion, '0', '', 2, self.ObjectIdentifierValue) else: logging.info('Skip to remove IngestObjectPath: ' + self.dirpath) errno, why = ESSPGM.DB().SetAIPstatus( self.IngestTable, self.ext_IngestTable, AgentIdentifierValue, self.ObjectUUID, 69, 0) if errno: logging.error('Failed to update DB status for AIP: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) else: ESSPGM.Events().create( '1060', '', 'ESSArch SIPRemove', ProcVersion, '0', 'Skip to remove IngestObjectPath', 2, self.ObjectIdentifierValue) self.mLock.release() time.sleep(int(self.Time))
logger.setLevel(LogLevel) logging = logging.getLogger('') logging.setLevel(0) logging.addHandler(essLocalFileHandler) if MultiProc: logger.addHandler(essLocalFileHandler) if Console: logging.addHandler(essConsoleHandler) if MultiProc: logger.addHandler(essConsoleHandler) logging.debug('LogFile: ' + str(LogFile)) logging.debug('Time: ' + str(Time)) logging.debug('Status: ' + str(Status)) logging.debug('Run: ' + str(Run)) AgentIdentifierValue = ESSDB.DB().action( 'ESSConfig', 'GET', ('Value', ), ('Name', 'AgentIdentifierValue'))[0][0] ExtDBupdate = int(ESSDB.DB().action('ESSConfig', 'GET', ('Value', ), ('Name', 'ExtDBupdate'))[0][0]) x = WorkingThread(ProcName) while 1: if x.RunFlag == 99: if Debug: logging.info('test1: ' + str(x.RunFlag)) sys.exit(10) elif x.RunFlag == 0: if Debug: logging.info('test2: ' + str(x.RunFlag)) x.Die() break time.sleep(5) if Debug: logging.info('test3: ' + str(x.RunFlag))
def ThreadMain(self, ProcName): logger.info('Starting ' + ProcName) # Start Process pool with 2 process self.ReqTags = 2 self.ProcPool = multiprocessing.Pool(self.ReqTags) jobs = [] while 1: if self.mDieFlag == 1: break # Request for death self.mLock.acquire() self.ProcPoolProblemFlag = 0 for self.worker in self.ProcPool._pool: if not self.worker.is_alive(): self.ProcPoolProblemFlag = 1 logger.error( 'Problem with process_name: %s, process_pid: %s, process_exitcode: %s', self.worker.name, self.worker.pid, self.worker.exitcode) self.Time, self.Run = ESSDB.DB().action('ESSProc', 'GET', ('Time', 'Run'), ('Name', ProcName))[0] if self.Run == '0' or self.ProcPoolProblemFlag == 1: logger.info('Stopping ' + ProcName) if self.ProcPoolProblemFlag: self.ProcPool.terminate() else: self.ProcPool.close() self.ProcPool.join() ESSDB.DB().action('ESSProc', 'UPD', ('Status', '0', 'Run', '0', 'PID', '0'), ('Name', ProcName)) time.sleep(1) self.mLock.release() logger.info('RunFlag: 0') time.sleep(1) break # Process Item lock = thread.allocate_lock() AccessQueue_DbRows = AccessQueue.objects.filter(Status=0).all() for AccessQueue_DbRow in AccessQueue_DbRows: ##############################################################################################Y # if self.ProcPool._state == 0 then pool is working. if self.ProcPool._state == 0: # Get active queue depth for self.ProcPool._cache. self.ActiveProcQueue = len(self.ProcPool._cache) ########################################################################################### # If self.ActiveProcQueue < self.ReqTags start DIPRequest process if self.ActiveProcQueue < self.ReqTags: AccessQueue_DbRow.Status = 2 #model.meta.Session.commit() AccessQueue_DbRow.save() logger.info('Add ReqUUID: %s to GenerateDIPProc' % AccessQueue_DbRow.ReqUUID) res = self.ProcPool.apply_async( GenerateDIPProc, (AccessQueue_DbRow.ReqUUID, )) jobs.append(res) for job in jobs: try: msg = 'Result from GenerateDIPProc: %s' % repr( job.get(timeout=1)) except multiprocessing.TimeoutError as e: msg = 'Timeout wait for result from GenerateDIPProc' logger.debug(msg) if len(self.ProcPool._cache) == 0: jobs = [] logger.debug('ProcPool_cache: %r', self.ProcPool._cache) connection.close() time.sleep(5) self.mLock.release() time.sleep(10) self.RunFlag = 0 self.mDieFlag = 0
def ObjectValidate(self): self.IngestTable = ESSDB.DB().action('ESSConfig', 'GET', ('Value', ), ('Name', 'IngestTable'))[0][0] if ExtDBupdate: self.ext_IngestTable = self.IngestTable else: self.ext_IngestTable = '' # Check if exist extDB and got projektid self.dbget, errno, why = ESSDB.DB().action( self.IngestTable, 'GET4', ('ObjectIdentifierValue', 'PolicyID', 'StatusProcess', 'StatusActivity', 'ObjectUUID'), ('StatusActivity', '=', '0', 'AND', 'StatusProcess', 'BETWEEN', 9, 'AND', 14, 'OR', 'StatusActivity', '=', '4', 'AND', 'StatusProcess', 'BETWEEN', 10, 'AND', 11)) if errno: logging.error('Failed to access Local DB, error: ' + str(why)) for self.obj in self.dbget: self.ObjectIdentifierValue = self.obj[0] self.PolicyID = self.obj[1] self.StatusProcess = self.obj[2] self.StatusActivity = self.obj[3] ObjectUUID = self.obj[4] self.DBmode = 0 self.ext_ObjectGuid = ObjectUUID self.ext_EntryDate = datetime.datetime.utcnow().replace( microsecond=0, tzinfo=pytz.utc) self.ext_EntryAgentIdentifierValue = None self.ext_OAISPackageType = 2 self.ext_preservationLevelValue = 1 self.ext_ObjectActive = 0 self.objectstatus = 0 self.ext_ProjectGroupCode = '' self.ext_ObjectPackageName = '' if Debug: logging.info('StatusProcess 9, ObjectIdentifierValue ' + str(self.ObjectIdentifierValue)) #Check.... if self.PolicyID: ArchivePolicy_objs = ArchivePolicy.objects.filter( PolicyStat=1, PolicyID=str(self.PolicyID))[:1] if ArchivePolicy_objs: ArchivePolicy_obj = ArchivePolicy_objs.get() if ArchivePolicy_obj.Mode in range(0, 2): self.DBmode = ArchivePolicy_obj.Mode logging.info( 'Policy found for Object: %s in ESSArch mode' % self.ObjectIdentifierValue) if ArchivePolicy_obj.IngestMetadata in [1, 2, 3]: metsfilename = os.path.join( ArchivePolicy_obj.IngestPath, self.ObjectIdentifierValue + '_Package_METS.xml') elif ArchivePolicy_obj.IngestMetadata in [4]: ObjectPath = os.path.join( ArchivePolicy_obj.IngestPath, self.ObjectIdentifierValue) if os.path.exists( os.path.join(ObjectPath, 'sip.xml')): metsfilename = os.path.join( ObjectPath, 'sip.xml') elif os.path.exists( os.path.join(ObjectPath, 'mets.xml')): metsfilename = os.path.join( ObjectPath, 'mets.xml') #elif os.path.exists(os.path.join(ObjectPath,'%s_Content_METS.xml' % self.ObjectIdentifierValue)): # metsfilename = os.path.join(ObjectPath,'%s_Content_METS.xml' % self.ObjectIdentifierValue) else: metsfilename = '' #metsfilename = '%s/sip.xml' % os.path.join(ArchivePolicy_obj.IngestPath,self.ObjectIdentifierValue) else: metsfilename = '' res_info, res_files, res_struct, error, why = ESSMD.getMETSFileList( FILENAME=metsfilename) if not error: # cut off microsecond and timezone info ".xxxxxxx+02:00" #for c in res_info[1][0]: # if c == '.' or c == '+': # break # else: # self.ext_EntryDate += c self.ext_EntryDate = parse_datetime( res_info[1][0]).astimezone(pytz.utc) self.ext_EntryAgentIdentifierValue = res_info[2][ 0][4] try: self.ext_ObjectGuid = str( uuid.UUID(self.ObjectIdentifierValue)) except ValueError, why: logging.warning( 'ObjectIdentifierValue: %s is not a valid UUID, why: %s , start to generate a new UUID' % (self.ObjectIdentifierValue, str(why))) self.ext_ObjectGuid = str(uuid.uuid1()) logging.info( 'New UUID: %s for ObjectIdentifierValue: %s' % (self.ext_ObjectGuid, str(self.ObjectIdentifierValue))) self.ext_ObjectActive = 1 self.ext_OAISPackageType = 2 self.ext_preservationLevelValue = 1 self.objectstatus = 1 else: self.objectstatus = 102 # Problem to get information from package METS elif ArchivePolicy_obj.Mode == 2: # AIS but POLICYID from METS, Check in AIS if object is active. self.DBmode = ArchivePolicy_obj.Mode self.extOBJdbget, ext_errno, ext_why = ESSMSSQL.DB( ).action(self.IngestTable, 'GET3', ('ProjectGroupCode', 'ObjectPackageName', 'ObjectGuid', 'ObjectActive', 'EntryDate', 'EntryAgentIdentifierValue', 'OAISPackageType', 'preservationLevelValue'), ('ObjectIdentifierValue', self.ObjectIdentifierValue)) #self.extOBJdbget = [[10,'','7283074a-00c0-11e2-a78f-002215836500',1,'2010-07-12 16:57:45','entryagent',2,1]] #ext_errno = 0 #ext_why = 'whywhy' if ext_errno: logging.error('Failed to access External DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(ext_why)) elif self.extOBJdbget: if Debug: logging.info( 'Found object: %s in AIS, self.extOBJdbget: %s', self.ObjectIdentifierValue, str(self.extOBJdbget)) self.objectstatus = 10 # Object found in external DB if self.objectstatus < 100: ######################################## # Check if object alredy have an AIP if disable_ObjectPackageName: self.ext_ObjectPackageName = '' else: self.ext_ObjectPackageName = self.extOBJdbget[ 0][1] if not self.ext_ObjectPackageName: self.objectstatus = 12 # Object do not have an AIP else: self.objectstatus = 112 # Object already have an AIP if self.objectstatus < 100: ######################################## # Get GUID/UUID #self.ext_ObjectGuid = uuid.UUID(bytes_le=self.extOBJdbget[0][2]) #When pymssql self.ext_ObjectGuid = uuid.UUID( self.extOBJdbget[0][2]) if self.objectstatus < 100: ######################################## # Check if object is active self.ext_ObjectActive = self.extOBJdbget[0][3] if self.ext_ObjectActive == 1: self.objectstatus = 13 # Object is active else: self.objectstatus = 113 # Object is not active if self.objectstatus < 100: ######################################## # Check if POLICYID in local DB "METS" is equal to ProjectGroupCode in AIS self.ext_ProjectGroupCode = str( self.extOBJdbget[0][0]) if self.ext_ProjectGroupCode == self.PolicyID: self.objectstatus = 1 # Object have an ProjectCode logging.info( 'Object: %s found in AIS with correct POLICYID' % self.ObjectIdentifierValue) else: self.objectstatus = 111 # Object do not have an ProjectCode ######################################## self.ext_EntryDate = self.extOBJdbget[0][ 4].replace(microsecond=0, tzinfo=self.tz).astimezone(pytz.utc) self.ext_EntryAgentIdentifierValue = self.extOBJdbget[ 0][5] self.ext_OAISPackageType = self.extOBJdbget[0][6] self.ext_preservationLevelValue = self.extOBJdbget[ 0][7] ######################################## # Special function only for test if force_ProjectGroupCode: logging.info( 'Force set ProjectGroupCode for Object: %s' % self.ObjectIdentifierValue) self.objectstatus = 10 self.ext_ProjectGroupCode = force_ProjectGroupCode self.ext_ObjectPackageName = '' self.ext_ObjectGuid = str( uuid.uuid1()) # updDB self.ext_ObjectActive = 1 # updDB self.ext_EntryDate = datetime.datetime.utcnow( ).replace(microsecond=0, tzinfo=pytz.utc) # updDB self.ext_EntryAgentIdentifierValue = None # updDB self.ext_OAISPackageType = 2 # updDB self.ext_preservationLevelValue = 1 # updDB else: self.objectstatus = 110 # Object not found in external DB if Debug: logging.info( 'Missing object: %s in AIS, self.extOBJdbget: %s', self.ObjectIdentifierValue, str(self.extOBJdbget)) else: self.objectstatus = 100 # Policy is not in ESSArch mode else: self.objectstatus = 101 # Policy not found or not active #model.meta.Session.close() else: self.DBmode = 2 # AIS self.extOBJdbget, ext_errno, ext_why = ESSMSSQL.DB().action( self.IngestTable, 'GET3', ('ProjectGroupCode', 'ObjectPackageName', 'ObjectGuid', 'ObjectActive', 'EntryDate', 'EntryAgentIdentifierValue', 'OAISPackageType', 'preservationLevelValue'), ('ObjectIdentifierValue', self.ObjectIdentifierValue)) if ext_errno: logging.error('Failed to access External DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(ext_why)) elif self.extOBJdbget: #if not ext_errno and self.extOBJdbget: if Debug: logging.info( 'Found object: %s in AIS, self.extOBJdbget: %s', self.ObjectIdentifierValue, str(self.extOBJdbget)) self.objectstatus = 10 # Object found in external DB ######################################## # Check if object alredy have an AIP self.ext_ProjectGroupCode = str(self.extOBJdbget[0][0]) if self.objectstatus < 100 and self.ext_ProjectGroupCode: self.objectstatus = 11 # Object have an ProjectCode else: self.objectstatus = 111 # Object do not have an ProjectCode ######################################## # Check if object alredy have an AIP if disable_ObjectPackageName: self.ext_ObjectPackageName = '' else: self.ext_ObjectPackageName = self.extOBJdbget[0][1] if self.objectstatus < 100 and not self.ext_ObjectPackageName: self.objectstatus = 12 # Object do not have an AIP else: self.objectstatus = 112 # Object already have an AIP ######################################## # Get GUID/UUID #self.ext_ObjectGuid = uuid.UUID(bytes_le=self.extOBJdbget[0][2]) #When pymssql self.ext_ObjectGuid = uuid.UUID(self.extOBJdbget[0][2]) ######################################## # Check if object is active self.ext_ObjectActive = self.extOBJdbget[0][3] if self.objectstatus < 100 and self.ext_ObjectActive == 1: self.objectstatus = 13 # Object is active else: self.objectstatus = 113 # Object is not active ######################################## self.ext_EntryDate = self.extOBJdbget[0][4].replace( microsecond=0, tzinfo=self.tz).astimezone(pytz.utc) self.ext_EntryAgentIdentifierValue = self.extOBJdbget[0][5] self.ext_OAISPackageType = self.extOBJdbget[0][6] self.ext_preservationLevelValue = self.extOBJdbget[0][7] if Debug: logging.info('ext_ProjectGroupCode is: ' + str(self.ext_ProjectGroupCode) + ' for ObjectIdentifierValue ' + str(self.ObjectIdentifierValue)) if force_ProjectGroupCode: logging.info( 'Force set ProjectGroupCode for Object: %s' % self.ObjectIdentifierValue) self.objectstatus = 10 self.ext_ProjectGroupCode = force_ProjectGroupCode self.ext_ObjectPackageName = '' self.ext_ObjectGuid = str(uuid.uuid1()) # updDB self.ext_ObjectActive = 1 # updDB self.ext_EntryDate = datetime.datetime.utcnow( ).replace(microsecond=0, tzinfo=pytz.utc) # updDB self.ext_EntryAgentIdentifierValue = None # updDB self.ext_OAISPackageType = 2 # updDB self.ext_preservationLevelValue = 1 # updDB if self.objectstatus < 100: ArchivePolicy_objs = ArchivePolicy.objects.filter( PolicyStat=1, AISProjectID=self.ext_ProjectGroupCode)[:1] if ArchivePolicy_objs: self.PolicyID = ArchivePolicy_objs.get().PolicyID if Debug: logging.info('PolicyID: ' + str(self.PolicyID)) self.objectstatus = 14 # Object got PolicyID else: self.objectstatus = 114 # Object mising PolicyID else: self.objectstatus = 110 # Object not found in external DB if Debug: logging.info( 'Missing object: %s in AIS, self.extOBJdbget: %s', self.ObjectIdentifierValue, str(self.extOBJdbget)) if self.objectstatus == 100: # Policy is not in ESSArch mode ################################################################### # Policy is not in ESSArch mode(12) and Need of assistance(4) ################################################################### self.StatusProcess = 12 self.StatusActivity = 4 if Debug: logging.info( 'Change to StatusProcess 12, ObjectIdentifierValue ' + str(self.ObjectIdentifierValue)) self.event_info = 'Policy is not in ESSArch mode for Object: ' + str( self.ObjectIdentifierValue) logging.error(self.event_info) ESSPGM.Events().create('1010', '', 'ESSArch SIPValidateAIS', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) elif self.objectstatus == 101: # Policy not found or not active ################################################################### # Policy not found or active(12) and Need of assistance(4) ################################################################### self.StatusProcess = 12 self.StatusActivity = 4 if Debug: logging.info( 'Change to StatusProcess 12, ObjectIdentifierValue ' + str(self.ObjectIdentifierValue)) self.event_info = 'Policy not found or active for Object: ' + str( self.ObjectIdentifierValue) logging.error(self.event_info) ESSPGM.Events().create('1010', '', 'ESSArch SIPValidateAIS', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) elif self.objectstatus == 102: # Problem to get information from Package_METS ################################################################### # Problem to get information from Package_METS(12) and Need of assistance(4) ################################################################### self.StatusProcess = 12 self.StatusActivity = 4 if Debug: logging.info( 'Change to StatusProcess 12, ObjectIdentifierValue ' + str(self.ObjectIdentifierValue)) self.event_info = 'Problem to get information from Package_METS for Object: ' + str( self.ObjectIdentifierValue) logging.error(self.event_info) ESSPGM.Events().create('1010', '', 'ESSArch SIPValidateAIS', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) elif self.objectstatus == 110: ################################################################### #Object don't exist in extDB(10) and Need of assistance(4) ################################################################### self.StatusProcess = 10 self.StatusActivity = 4 if Debug: logging.info( 'Change to StatusProcess 10, ObjectIdentifierValue ' + str(self.ObjectIdentifierValue)) elif self.objectstatus == 111: ################################################################### #Object don't have any projektkod in extDB(11) and Need of assistance(4) ################################################################### self.StatusProcess = 11 self.StatusActivity = 4 if Debug: logging.info( 'Change to StatusProcess 11, ObjectIdentifierValue ' + str(self.ObjectIdentifierValue)) elif self.objectstatus == 112: ################################################################### #Object already have an AIP!! ################################################################### self.StatusProcess = 13 self.StatusActivity = 4 if Debug: logging.info( 'Change to StatusProcess 13, ObjectIdentifierValue ' + str(self.ObjectIdentifierValue)) self.event_info = 'Object: ' + str( self.ObjectIdentifierValue) + ' already have an AIP!' logging.error(self.event_info) ESSPGM.Events().create('1010', '', 'ESSArch SIPValidateAIS', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) elif self.objectstatus == 113: ################################################################### # Object is not active!! ################################################################### self.StatusProcess = 14 self.StatusActivity = 4 if Debug: logging.info( 'Change to StatusProcess 14, ObjectIdentifierValue ' + str(self.ObjectIdentifierValue)) self.event_info = 'Object: ' + str( self.ObjectIdentifierValue ) + ' is not active in external DB!' logging.error(self.event_info) ESSPGM.Events().create('1010', '', 'ESSArch SIPValidateAIS', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) elif self.objectstatus == 114: ################################################################### #Object don't have any local policy(12) and Need of assistance(4) ################################################################### self.StatusProcess = 12 self.StatusActivity = 4 if Debug: logging.info( 'Change to StatusProcess 12, ObjectIdentifierValue ' + str(self.ObjectIdentifierValue)) self.event_info = 'Object: ' + str( self.ObjectIdentifierValue ) + ' do not have any local policy!' logging.error(self.event_info) ESSPGM.Events().create('1010', '', 'ESSArch SIPValidateAIS', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) elif self.objectstatus == 14 or self.objectstatus == 1: ################################################################### #Object got a policy(19) and RFNext and OK(0) ################################################################### self.StatusProcess = 19 self.StatusActivity = 0 if Debug: logging.info( 'Change to StatusProcess 19, ObjectIdentifierValue ' + str(self.ObjectIdentifierValue)) ESSPGM.Events().create('1010', '', 'ESSArch SIPValidateAIS', ProcVersion, '0', '', self.DBmode, self.ObjectIdentifierValue) logging.info( 'objectstatus:%s,StatusProcess:%s,StatusActivity:%s,EntryDate:%s' % (self.objectstatus, self.StatusProcess, self.StatusActivity, self.ext_EntryDate)) if self.objectstatus: self.timestamp_utc = datetime.datetime.utcnow().replace( microsecond=0, tzinfo=pytz.utc) self.timestamp_dst = self.timestamp_utc.astimezone(self.tz) res, errno, why = ESSDB.DB().action( self.IngestTable, 'UPD', ('PolicyId', self.PolicyID, 'ObjectUUID', self.ext_ObjectGuid, 'EntryDate', self.ext_EntryDate.replace(tzinfo=None), 'EntryAgentIdentifierValue', self.ext_EntryAgentIdentifierValue, 'StatusProcess', self.StatusProcess, 'StatusActivity', self.StatusActivity, 'LastEventDate', self.timestamp_utc.replace(tzinfo=None), 'linkingAgentIdentifierValue', AgentIdentifierValue, 'OAISPackageType', self.ext_OAISPackageType, 'preservationLevelValue', self.ext_preservationLevelValue, 'ObjectActive', self.ext_ObjectActive, 'LocalDBdatetime', self.timestamp_utc.replace(tzinfo=None)), ('ObjectIdentifierValue', self.ObjectIdentifierValue)) if errno: logging.error('Failed to update Local DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) if errno == 0 and self.ext_IngestTable: ext_res, ext_errno, ext_why = ESSMSSQL.DB().action( self.IngestTable, 'UPD', ('PolicyId', self.PolicyID, 'StatusProcess', self.StatusProcess, 'StatusActivity', self.StatusActivity, 'LastEventDate', self.timestamp_dst.replace(tzinfo=None), 'linkingAgentIdentifierValue', AgentIdentifierValue), ('ObjectIdentifierValue', self.ObjectIdentifierValue)) if ext_errno: logging.error('Failed to update External DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(ext_why)) else: res, errno, why = ESSDB.DB().action( self.IngestTable, 'UPD', ('ExtDBdatetime', self.timestamp_utc.replace(tzinfo=None)), ('ObjectIdentifierValue', self.ObjectIdentifierValue)) if errno: logging.error('Failed to update Local DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why))
if options.LogLevel == 'CRITICAL': LogLevel = 50 elif options.LogLevel == 'ERROR': LogLevel = 40 elif options.LogLevel == 'WARNING': LogLevel = 30 elif options.LogLevel == 'INFO': LogLevel = 20 elif options.LogLevel == 'DEBUG': LogLevel = 10 else: op.error("Invalid LogLevel") if LogLevel == 10: Debug = 1 else: Debug = 0 else: LogLevel = 20 #Console = 1 Console = 0 if options.process: LogFile,Time,Status,Run = ESSDB.DB().action('ESSProc','GET',('LogFile','Time','Status','Run'),('Name',ProcName))[0] else: LogFile = '/ESSArch/log/ESSlogging.log' ########################## # Log format MultiProc = 1 if MultiProc: essFormatter1 = logging.Formatter('%(asctime)s %(name)s %(levelname)s/%(processName)-8s %(message)s','%d %b %Y %H:%M:%S') essFormatter2 = logging.Formatter('%(name)s %(levelname)s/%(processName)-8s %(message)s','%d %b %Y %H:%M:%S') #essFormatter1 = logging.Formatter('%(asctime)s %(levelname)s/%(processName)-8s %(message)s','%d %b %Y %H:%M:%S') #essFormatter2 = logging.Formatter('%(levelname)s/%(processName)-8s %(message)s','%d %b %Y %H:%M:%S') else: essFormatter1 = logging.Formatter('%(asctime)s %(levelname)-8s %(message)s','%d %b %Y %H:%M:%S') essFormatter2 = logging.Formatter('%(levelname)-8s %(message)s','%d %b %Y %H:%M:%S') ########################### # LocalFileHandler (main or root)
def ThreadMain(self, ProcName): logging.info('Starting ' + ProcName) TimeZone = timezone.get_default_timezone_name() self.tz = pytz.timezone(TimeZone) METS_NAMESPACE = SchemaProfile.objects.get( entity='mets_namespace').value METS_SCHEMALOCATION = SchemaProfile.objects.get( entity='mets_schemalocation').value METS_PROFILE = SchemaProfile.objects.get(entity='mets_profile').value XLINK_NAMESPACE = SchemaProfile.objects.get( entity='xlink_namespace').value XSI_NAMESPACE = SchemaProfile.objects.get(entity='xsi_namespace').value while 1: if self.mDieFlag == 1: break # Request for death self.mLock.acquire() self.Time, self.Run = ESSDB.DB().action('ESSProc', 'GET', ('Time', 'Run'), ('Name', ProcName))[0] if self.Run == '0': logging.info('Stopping ' + ProcName) ESSDB.DB().action('ESSProc', 'UPD', ('Status', '0', 'Run', '0', 'PID', '0'), ('Name', ProcName)) self.RunFlag = 0 self.mLock.release() if Debug: logging.info('RunFlag: 0') time.sleep(2) continue # Process Item lock = thread.allocate_lock() Cmets_obj = Parameter.objects.get( entity='content_descriptionfile').value self.IngestTable = ESSDB.DB().action('ESSConfig', 'GET', ('Value', ), ('Name', 'IngestTable'))[0][0] if ExtDBupdate: self.ext_IngestTable = self.IngestTable else: self.ext_IngestTable = '' self.dbget, errno, why = ESSDB.DB().action( self.IngestTable, 'GET4', ('ObjectIdentifierValue', 'ObjectUUID', 'PolicyId', 'ObjectSize'), ('StatusProcess', 'BETWEEN', 39, 'AND', 40, 'AND', 'StatusActivity', '=', '0')) if errno: logging.error('Failed to access Local DB, error: ' + str(why)) for self.obj in self.dbget: self.ok = 1 self.ProcDB = ESSDB.DB().action('ESSProc', 'GET', ('Run', 'Pause'), ('Name', ProcName))[0] if self.ProcDB[0] == '0': logging.info('Stopping ' + ProcName) ESSDB.DB().action('ESSProc', 'UPD', ('Status', '0', 'Run', '0', 'PID', '0'), ('Name', ProcName)) thread.interrupt_main() time.sleep(5) break elif self.ProcDB[1] == 1: while 1: time.sleep(60) self.ProcDB = ESSDB.DB().action( 'ESSProc', 'GET', ('Run', 'Pause'), ('Name', ProcName))[0] if self.ProcDB[1] == 1: logging.info('Process is in pause state') else: break self.ObjectIdentifierValue = self.obj[0] self.ObjectUUID = self.obj[1] self.PolicyId = self.obj[2] self.ObjectSize = self.obj[3] ArchivePolicy_obj = ArchivePolicy.objects.get( PolicyStat=1, PolicyID=self.PolicyId) if self.ok: ########################################################### # set variables self.AIPpath = ArchivePolicy_obj.AIPpath self.metatype = ArchivePolicy_obj.IngestMetadata self.ChecksumAlgorithm = ArchivePolicy_obj.ChecksumAlgorithm self.CA = dict(ChecksumAlgorithm_CHOICES)[ self.ChecksumAlgorithm] self.SIPpath = ArchivePolicy_obj.IngestPath self.p_obj = self.ObjectIdentifierValue + '.tar' self.ObjectPath = os.path.join(self.AIPpath, self.p_obj) self.SIProotpath = os.path.join(self.SIPpath, self.ObjectIdentifierValue) if self.metatype in [4]: #self.Cmets_obj = '%s/%s_Content_METS.xml' % (self.ObjectIdentifierValue,self.ObjectIdentifierValue) #self.Cmets_objpath = os.path.join(self.SIPpath,self.Cmets_obj) #self.Cmets_obj = Cmets_obj.replace('{uuid}',self.ObjectIdentifierValue) self.Cmets_obj = Cmets_obj.replace( '{objid}', self.ObjectIdentifierValue) self.Cmets_objpath = os.path.join( self.SIProotpath, self.Cmets_obj) elif self.metatype in [1, 2, 3]: self.Cmets_obj = '%s_Content_METS.xml' % ( self.ObjectIdentifierValue) self.Cmets_objpath = os.path.join( self.AIPpath, self.Cmets_obj) self.Pmets_obj = '%s_Package_METS.xml' % ( self.ObjectIdentifierValue) self.Pmets_objpath = os.path.join(self.AIPpath, self.Pmets_obj) self.AIC_UUID = None self.AIC_UUID_rel_ObjectUUIDs = [] if self.ok: METS_agent_list = [] METS_altRecordID_list = [] if self.metatype == 1: ############################################ # Object have metatype 1 (METS) self.METS_LABEL = 'ESSArch AIP' # Get SIP Content METS information self.METSfilepath = os.path.join( self.SIPpath, self.ObjectIdentifierValue + '/metadata/SIP/' + self.ObjectIdentifierValue + '_Content_METS.xml') res_info, res_files, res_struct, error, why = ESSMD.getMETSFileList( FILENAME=self.METSfilepath) for agent in res_info[2]: if not (agent[0] == 'CREATOR' and agent[3] == 'SOFTWARE'): METS_agent_list.append(agent) METS_agent_list.append([ 'CREATOR', 'INDIVIDUAL', '', AgentIdentifierValue, [] ]) METS_agent_list.append([ 'CREATOR', 'OTHER', 'SOFTWARE', 'ESSArch', ['VERSION=%s' % ProcVersion] ]) elif self.metatype == 2: ############################################ # Object have metatype 2 (RES) self.METS_LABEL = 'Imaging AIP RA' METS_agent_list.append([ 'ARCHIVIST', 'ORGANIZATION', '', 'Riksarkivet', [] ]) METS_agent_list.append( ['CREATOR', 'ORGANIZATION', '', 'Riksarkivet', []]) METS_agent_list.append([ 'CREATOR', 'INDIVIDUAL', '', AgentIdentifierValue, [] ]) METS_agent_list.append([ 'CREATOR', 'OTHER', 'SOFTWARE', 'ESSArch', ['VERSION=%s' % ProcVersion] ]) elif self.metatype == 3: ############################################ # Object have metatype 3 (ADDML) self.METS_LABEL = 'Born Digital AIP RA' METS_agent_list.append([ 'ARCHIVIST', 'ORGANIZATION', '', 'Riksarkivet', [] ]) METS_agent_list.append( ['CREATOR', 'ORGANIZATION', '', 'Riksarkivet', []]) METS_agent_list.append([ 'CREATOR', 'INDIVIDUAL', '', AgentIdentifierValue, [] ]) METS_agent_list.append([ 'CREATOR', 'OTHER', 'SOFTWARE', 'ESSArch', ['VERSION=%s' % ProcVersion] ]) elif self.metatype in [4]: ############################################ # Object have metatype 4 (eARD METS) res_info, res_files, res_struct, error, why = ESSMD.getMETSFileList( FILENAME=self.Cmets_objpath) for agent in res_info[2]: #if not (agent[0] == 'CREATOR' and agent[3] == 'SOFTWARE'): METS_agent_list.append(agent) self.METS_LABEL = res_info[0][0] METS_agent_list.append([ 'CREATOR', None, 'INDIVIDUAL', None, AgentIdentifierValue, [] ]) METS_agent_list.append([ 'CREATOR', None, 'OTHER', 'SOFTWARE', 'ESSArch', ['VERSION=%s' % ProcVersion] ]) for altRecordID in res_info[3]: METS_altRecordID_list.append(altRecordID) logging.debug('self.obj: ' + str(self.obj)) if self.ChecksumAlgorithm > 0: #self.ChecksumAlgorithm 1 = MD5, 2 = SHA-256 self.startCalTime = datetime.timedelta( seconds=time.localtime()[5], minutes=time.localtime()[4], hours=time.localtime()[3]) errno, why = ESSPGM.DB().SetAIPstatus( self.IngestTable, self.ext_IngestTable, AgentIdentifierValue, self.ObjectUUID, 40, 5) if errno: logging.error('Failed to update DB status for AIP: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) logging.info('Start create Package METS for: ' + self.ObjectIdentifierValue) if self.ok: ########################################################### # Create PMETS for AIP package self.M_CHECKSUM, errno, why = ESSPGM.Check().checksum( self.Cmets_objpath, self.CA) if errno: self.event_info = 'Problem to get checksum for METS object for AIP package: ' + str( self.Cmets_objpath) logging.error(self.event_info) ESSPGM.Events().create('1030', '', 'ESSArch AIPCreator', ProcVersion, '1', self.event_info, 2, self.ObjectIdentifierValue) self.ok = 0 self.M_statinfo = os.stat(self.Cmets_objpath) self.M_SIZE = self.M_statinfo.st_size self.M_utc_mtime = datetime.datetime.utcfromtimestamp( self.M_statinfo.st_mtime).replace(tzinfo=pytz.utc) self.M_lociso_mtime = self.M_utc_mtime.astimezone( self.tz).isoformat() self.P_CHECKSUM, errno, why = ESSPGM.Check().checksum( self.ObjectPath, self.CA) if errno: self.event_info = 'Problem to get checksum for AIP package: ' + str( self.ObjectPath) logging.error(self.event_info) ESSPGM.Events().create('1040', '', 'ESSArch AIPChecksum', ProcVersion, '1', self.event_info, 2, self.ObjectIdentifierValue) self.ok = 0 self.P_statinfo = os.stat(self.ObjectPath) self.P_SIZE = self.P_statinfo.st_size self.P_utc_mtime = datetime.datetime.utcfromtimestamp( self.P_statinfo.st_mtime).replace(tzinfo=pytz.utc) self.P_lociso_mtime = self.P_utc_mtime.astimezone( self.tz).isoformat() if self.metatype in [1, 2, 3]: self.PMETSdoc = ESSMD.createPMets( ID=self.ObjectIdentifierValue, LABEL=self.METS_LABEL, AGENT=METS_agent_list, P_SIZE=self.P_SIZE, P_CREATED=self.P_lociso_mtime, P_CHECKSUM=self.P_CHECKSUM, P_CHECKSUMTYPE=self.CA, M_SIZE=self.M_SIZE, M_CREATED=self.M_lociso_mtime, M_CHECKSUM=self.M_CHECKSUM, M_CHECKSUMTYPE=self.CA, ) errno, why = ESSMD.writeToFile( self.PMETSdoc, self.Pmets_objpath) if errno: self.event_info = 'Problem to write PMETS to file for AIP package: ' + str( self.Pmets_objpath) logging.error(self.event_info) ESSPGM.Events().create( '1040', '', 'ESSArch AIPChecksum', ProcVersion, '1', self.event_info, 2, self.ObjectIdentifierValue) time.sleep(2) self.ok = 0 elif self.metatype in [4]: ms_files = [] ms_files.append([ 'amdSec', None, 'techMD', 'techMD001', None, None, 'ID%s' % str(uuid.uuid1()), 'URL', 'file:%s/%s' % (self.ObjectIdentifierValue, self.Cmets_obj), 'simple', self.M_CHECKSUM, self.CA, self.M_SIZE, 'text/xml', self.M_lociso_mtime, 'OTHER', 'METS', None ]) ms_files.append([ 'fileSec', None, None, None, None, None, 'ID%s' % str(uuid.uuid1()), 'URL', 'file:%s' % self.p_obj, 'simple', self.P_CHECKSUM, self.CA, self.P_SIZE, 'application/x-tar', self.P_lociso_mtime, 'tar', 'techMD001', None ]) # define namespaces self.namespacedef = 'xmlns:mets="%s"' % METS_NAMESPACE self.namespacedef += ' xmlns:xlink="%s"' % XLINK_NAMESPACE self.namespacedef += ' xmlns:xsi="%s"' % XSI_NAMESPACE self.namespacedef += ' xsi:schemaLocation="%s %s"' % ( METS_NAMESPACE, METS_SCHEMALOCATION) errno, info_list = ESSMD.Create_IP_mets( ObjectIdentifierValue=self. ObjectIdentifierValue, METS_ObjectPath=self.Pmets_objpath, agent_list=METS_agent_list, altRecordID_list=METS_altRecordID_list, file_list=ms_files, namespacedef=self.namespacedef, METS_LABEL=self.METS_LABEL, METS_PROFILE=METS_PROFILE, METS_TYPE='AIP', METS_DocumentID=self.Pmets_obj, TimeZone=TimeZone) if errno: logging.error( 'Problem to create Package METS file, why: %s' % str(info_list)) self.ObjectMessageDigest = self.P_CHECKSUM self.stopCalTime = datetime.timedelta( seconds=time.localtime()[5], minutes=time.localtime()[4], hours=time.localtime()[3]) self.CalTime = self.stopCalTime - self.startCalTime self.ObjectSizeMB = self.ObjectSize / 1048576 if self.CalTime.seconds < 1: self.CalTime = datetime.timedelta( seconds=1 ) #Fix min time to 1 second if it is zero. self.CalMBperSEC = int(self.ObjectSizeMB) / int( self.CalTime.seconds) logging.info('Finished calculate checksum: ' + self.ObjectIdentifierValue + ' , ' + str(self.CalMBperSEC) + ' MB/Sec and Time: ' + str(self.CalTime)) if self.ok: self.timestamp_utc = datetime.datetime.utcnow( ).replace(microsecond=0, tzinfo=pytz.utc) self.timestamp_dst = self.timestamp_utc.astimezone( self.tz) res, errno, why = ESSDB.DB().action( self.IngestTable, 'UPD', ('ObjectMessageDigestAlgorithm', self.ChecksumAlgorithm, 'ObjectMessageDigest', self.ObjectMessageDigest, 'MetaObjectSize', self.M_SIZE, 'LastEventDate', self.timestamp_utc.replace(tzinfo=None), 'linkingAgentIdentifierValue', AgentIdentifierValue, 'LocalDBdatetime', self.timestamp_utc.replace(tzinfo=None)), ('ObjectIdentifierValue', self.ObjectIdentifierValue)) if errno: logging.error('Failed to update Local DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) if errno == 0 and ExtDBupdate: ext_res, ext_errno, ext_why = ESSMSSQL.DB().action( self.IngestTable, 'UPD', ('ObjectMessageDigestAlgorithm', self.ChecksumAlgorithm, 'ObjectMessageDigest', self.ObjectMessageDigest, 'MetaObjectSize', self.M_SIZE, 'LastEventDate', self.timestamp_dst.replace(tzinfo=None), 'linkingAgentIdentifierValue', AgentIdentifierValue), ('ObjectIdentifierValue', self.ObjectIdentifierValue)) if ext_errno: logging.error( 'Failed to update External DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(ext_why)) else: res, errno, why = ESSDB.DB().action( self.IngestTable, 'UPD', ('ExtDBdatetime', self.timestamp_utc.replace(tzinfo=None)), ('ObjectIdentifierValue', self.ObjectIdentifierValue)) if errno: logging.error( 'Failed to update Local DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) if self.ok and self.metatype == 4: #################################################### # Create AIC METS File: aic_obj = ArchiveObject.objects.filter( relaic_set__UUID=self.ObjectUUID)[:1] if aic_obj: self.AIC_UUID = aic_obj.get().ObjectUUID logging.info( 'Succeeded to get AIC_UUID: %s from DB' % self.AIC_UUID) else: logging.warning( 'AIC not found for IP object: %s, skip to create AIC METS file' % self.ObjectUUID) if self.ok and self.AIC_UUID: ip_obj_list = ArchiveObject.objects.filter( Q(StatusProcess=3000) | Q(ObjectUUID=self.ObjectUUID), reluuid_set__AIC_UUID=self.AIC_UUID).order_by( 'Generation') if ip_obj_list: logging.info('Start create AIC METS: ' + self.AIC_UUID) self.AICmets_objpath = os.path.join( self.AIPpath, self.AIC_UUID + '_AIC_METS.xml') ms_files = [] for ip_obj in ip_obj_list: logging.info( 'Add IP: %s to AIC METS: %s' % (ip_obj.ObjectUUID, self.AIC_UUID)) ms_files.append([ 'fileSec', None, None, None, None, None, 'ID%s' % str(uuid.uuid1()), 'URL', 'file:%s' % ip_obj.ObjectUUID, 'simple', ip_obj.ObjectMessageDigest, dict(ChecksumAlgorithm_CHOICES)[ ip_obj.ObjectMessageDigestAlgorithm], ip_obj.ObjectSize, 'application/x-tar', ip_obj.CreateDate, 'IP Package', None, None ]) # define namespaces self.namespacedef = 'xmlns:mets="%s"' % METS_NAMESPACE self.namespacedef += ' xmlns:xlink="%s"' % XLINK_NAMESPACE self.namespacedef += ' xmlns:xsi="%s"' % XSI_NAMESPACE self.namespacedef += ' xsi:schemaLocation="%s %s"' % ( METS_NAMESPACE, METS_SCHEMALOCATION) errno, info_list = ESSMD.Create_IP_mets( ObjectIdentifierValue=self.AIC_UUID, METS_ObjectPath=self.AICmets_objpath, agent_list=[], altRecordID_list=[], file_list=ms_files, namespacedef=self.namespacedef, METS_LABEL='AIC relation to IP', METS_PROFILE=METS_PROFILE, METS_TYPE='AIC', METS_DocumentID=self.AIC_UUID + '_AIC_METS.xml', TimeZone=TimeZone) if errno: logging.error( 'Problem to create AIC METS file, why: %s' % str(info_list)) else: logging.error( 'Problem to get objects related to AIC_UUID: %s from DB' % (self.AIC_UUID)) self.ok = 0 if self.ok: errno, why = ESSPGM.DB().SetAIPstatus( self.IngestTable, self.ext_IngestTable, AgentIdentifierValue, self.ObjectUUID, 49, 0) if errno: logging.error( 'Failed to update DB status for AIP: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) else: self.event_info = 'Succeeded to create checksum for Object: %s' % self.ObjectIdentifierValue logging.info(self.event_info) ESSPGM.Events().create('1040', '', 'ESSArch AIPChecksum', ProcVersion, '0', self.event_info, 2, self.ObjectIdentifierValue) else: errno, why = ESSPGM.DB().SetAIPstatus( self.IngestTable, self.ext_IngestTable, AgentIdentifierValue, self.ObjectUUID, 40, 100) if errno: logging.error( 'Failed to update DB status for AIP: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) else: self.event_info = 'Failed to create checksum for Object: %s' % self.ObjectIdentifierValue logging.error(self.event_info) ESSPGM.Events().create('1040', '', 'ESSArch AIPChecksum', ProcVersion, '1', self.event_info, 2, self.ObjectIdentifierValue) elif self.ChecksumAlgorithm == 0: #self.ChecksumAlgorithm 0 = No checksum logging.info('Skip creation of checksum: ' + self.ObjectIdentifierValue) self.ObjectMessageDigest = '' self.MetaObjectSize = os.stat(self.Cmets_objpath)[6] self.timestamp_utc = datetime.datetime.utcnow().replace( microsecond=0, tzinfo=pytz.utc) self.timestamp_dst = self.timestamp_utc.astimezone(self.tz) res, errno, why = ESSDB.DB().action( self.IngestTable, 'UPD', ('ObjectMessageDigestAlgorithm', self.ChecksumAlgorithm, 'ObjectMessageDigest', self.ObjectMessageDigest, 'StatusProcess', '49', 'StatusActivity', '0', 'MetaObjectSize', self.MetaObjectSize, 'LastEventDate', self.timestamp_utc.replace(tzinfo=None), 'linkingAgentIdentifierValue', AgentIdentifierValue, 'LocalDBdatetime', self.timestamp_utc.replace(tzinfo=None)), ('ObjectIdentifierValue', self.ObjectIdentifierValue)) if errno: logging.error('Failed to update Local DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) else: ESSPGM.Events().create('1040', '', 'ESSArch AIPChecksum', ProcVersion, '0', 'Skip creation of checksum', 2, self.ObjectIdentifierValue) if errno == 0 and ExtDBupdate: ext_res, ext_errno, ext_why = ESSMSSQL.DB().action( self.IngestTable, 'UPD', ('ObjectMessageDigestAlgorithm', self.ChecksumAlgorithm, 'ObjectMessageDigest', self.ObjectMessageDigest, 'StatusProcess', '49', 'StatusActivity', '0', 'MetaObjectSize', self.MetaObjectSize, 'LastEventDate', self.timestamp_dst.replace(tzinfo=None), 'linkingAgentIdentifierValue', AgentIdentifierValue), ('ObjectIdentifierValue', self.ObjectIdentifierValue)) if ext_errno: logging.error('Failed to update External DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(ext_why)) else: res, errno, why = ESSDB.DB().action( self.IngestTable, 'UPD', ('ExtDBdatetime', self.timestamp_utc.replace(tzinfo=None)), ('ObjectIdentifierValue', self.ObjectIdentifierValue)) if errno: logging.error('Failed to update Local DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) db.close_old_connections() self.mLock.release() time.sleep(int(self.Time)) self.mDieFlag = 0
def ThreadMain(self, ProcName): logging.info('Starting ' + ProcName) while 1: if self.mDieFlag == 1: break # Request for death self.mLock.acquire() self.Time, self.Run = ESSDB.DB().action('ESSProc', 'GET', ('Time', 'Run'), ('Name', ProcName))[0] if self.Run == '0': logging.info('Stopping ' + ProcName) ESSDB.DB().action('ESSProc', 'UPD', ('Status', '0', 'Run', '0', 'PID', '0'), ('Name', ProcName)) self.RunFlag = 0 self.mLock.release() #if Debug: print 'RunFlag: 0' time.sleep(2) continue # Process Item lock = thread.allocate_lock() self.IngestTable = ESSDB.DB().action('ESSConfig', 'GET', ('Value', ), ('Name', 'IngestTable'))[0][0] if ExtDBupdate: self.ext_IngestTable = self.IngestTable else: self.ext_IngestTable = '' #if Debug: logging.info('Start to list worklist (self.dbget)') self.dbget, errno, why = ESSDB.DB().action( self.IngestTable, 'GET4', ('ObjectIdentifierValue', 'ObjectUUID', 'PolicyId', 'INFORMATIONCLASS'), ('StatusActivity', '=', '0', 'AND', 'StatusProcess', 'BETWEEN', 24, 'AND', 26)) if errno: logging.error('Failed to access Local DB, error: ' + str(why)) for self.obj in self.dbget: self.ProcDB = ESSDB.DB().action('ESSProc', 'GET', ('Run', 'Pause'), ('Name', ProcName))[0] if self.ProcDB[0] == '0': logging.info('Stopping ' + ProcName) ESSDB.DB().action('ESSProc', 'UPD', ('Status', '0', 'Run', '0', 'PID', '0'), ('Name', ProcName)) thread.interrupt_main() time.sleep(5) break elif self.ProcDB[1] == 1: while 1: time.sleep(60) self.ProcDB = ESSDB.DB().action( 'ESSProc', 'GET', ('Run', 'Pause'), ('Name', ProcName))[0] if self.ProcDB[1] == 1: logging.info('Process is in pause state') else: break self.ok = 1 ########################################################### # get policy info self.ObjectIdentifierValue = ESSPGM.Check().str2unicode( self.obj[0]) self.ObjectUUID = self.obj[1] self.PolicyId = self.obj[2] self.INFORMATIONCLASS = self.obj[3] logging.info('Start to validate format for SIP: %s', self.ObjectIdentifierValue) self.ChecksumAlgorithm_CHOICES_dict = dict( ChecksumAlgorithm_CHOICES) self.ChecksumAlgorithm_CHOICES_invdict = ESSPGM.Check( ).invert_dict(self.ChecksumAlgorithm_CHOICES_dict) ArchivePolicy_obj = ArchivePolicy.objects.get( PolicyStat=1, PolicyID=self.PolicyId) if self.ok: ########################################################### # set variables self.AIPpath = ESSPGM.Check().str2unicode( ArchivePolicy_obj.AIPpath) self.metatype = ArchivePolicy_obj.IngestMetadata self.Policy_INFORMATIONCLASS = ArchivePolicy_obj.INFORMATIONCLASS self.ChecksumAlgorithm = ArchivePolicy_obj.ChecksumAlgorithm self.ChecksumAlgorithm_name = self.ChecksumAlgorithm_CHOICES_dict[ self.ChecksumAlgorithm] self.SIPpath = ESSPGM.Check().str2unicode( ArchivePolicy_obj.IngestPath) self.DBmode = ArchivePolicy_obj.Mode logging.debug('self.obj: %s', str(self.obj)) logging.debug('self.ObjectIdentifierValue: %s', self.ObjectIdentifierValue) logging.debug('Len self.ObjectIdentifierValue: %s', len(self.ObjectIdentifierValue)) logging.debug('self.SIPpath: %s', self.SIPpath) logging.debug('self.AIPpath: %s', self.AIPpath) if self.metatype == 2: ############################################ # Create PREMISfile from TIFFEdit.RES if metatype is 2 logging.info( 'Start to convert RESfile to PREMISfile for object: ' + self.ObjectIdentifierValue) self.xml_PREMIS, self.errno, self.why = ESSMD.RES2PREMIS( os.path.join(self.SIPpath, self.ObjectIdentifierValue), AgentIdentifierValue[8:]) if self.errno == 10: self.event_info = 'Failed to parse RESfile, error.num: %s error.det: %s' % ( str(self.errno), str(self.why)) logging.error(self.event_info) ESSPGM.Events().create('1022', 'RES2PREMIS', 'ESSArch SIPValidateFormat', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) elif self.errno == 20: self.event_info = 'I/O error to access RESfile, error.num: %s error.det: %s' % ( str(self.errno), str(self.why)) logging.error(self.event_info) ESSPGM.Events().create('1022', 'RES2PREMIS', 'ESSArch SIPValidateFormat', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) elif self.errno == 30: self.event_info = 'Validation errors for PREMIS file, error.num: %s error.det: %s' % ( str(self.errno), str(self.why)) logging.error(self.event_info) ESSPGM.Events().create('1022', 'RES2PREMIS', 'ESSArch SIPValidateFormat', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) elif self.errno == 40: self.event_info = 'Problem to write PREMIS file, error.num: %s error.det: %s' % ( str(self.errno), str(self.why)) logging.error(self.event_info) ESSPGM.Events().create('1022', 'RES2PREMIS', 'ESSArch SIPValidateFormat', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) if self.errno > 1: self.event_info = 'Problem to convert RES to PREMIS for SIP package: %s, error.num: %s error.desc: %s' % ( self.ObjectIdentifierValue, str( self.errno), str(self.why)) logging.error(self.event_info) ESSPGM.Events().create('1022', 'RES2PREMIS', 'ESSArch SIPValidateFormat', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) self.ok = 0 elif self.errno == 1: self.event_info = 'Warning in convert RES to PREMIS for SIP package: %s, error.num: %s warning.desc: %s' % ( self.ObjectIdentifierValue, str( self.errno), str(self.why)) logging.warning(self.event_info) ESSPGM.Events().create('1022', 'RES2PREMIS', 'ESSArch SIPValidateFormat', ProcVersion, '0', self.event_info, self.DBmode, self.ObjectIdentifierValue) else: ESSPGM.Events().create('1022', 'RES2PREMIS', 'ESSArch SIPValidateFormat', ProcVersion, '0', '', self.DBmode, self.ObjectIdentifierValue) if self.ok: ############################################ # Clean RES SIP from "junk" files self.errno, self.why = ESSPGM.Check().CleanRES_SIP( os.path.join(self.SIPpath, self.ObjectIdentifierValue)) if self.errno: self.event_info = 'Problem to clean RES SIP from "junk files" for SIP package: %s, error.num: %s error.desc: %s' % ( self.ObjectIdentifierValue, str( self.errno), str(self.why)) logging.error(self.event_info) ESSPGM.Events().create( '1022', 'CleanRES_SIP', 'ESSArch SIPValidateFormat', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) self.ok = 0 else: ESSPGM.Events().create( '1022', 'CleanRES_SIP', 'ESSArch SIPValidateFormat', ProcVersion, '0', '', self.DBmode, self.ObjectIdentifierValue) elif self.metatype == 1: ########################################################### # Create PREMISfile from Content_METS if metatype is 1 res, errno, why = ESSMD.METS2PREMIS( self.SIPpath, self.ObjectIdentifierValue) if not errno: logging.info( 'Succeeded to convert Content_METS to PREMISfile for information package: %s', self.ObjectIdentifierValue) else: self.event_info = 'Problem to convert Content_METS to PREMISfile for information package: %s, errno: %s, detail: %s' % ( self.ObjectIdentifierValue, str(errno), str(why)) logging.error(self.event_info) #ESSPGM.Events().create('1025','','ESSArch SIPValidateFormat',ProcVersion,'1',self.event_info,self.DBmode,self.ObjectIdentifierValue) self.ok = 0 elif self.metatype in [4]: self.SIPpath = os.path.join(self.SIPpath, self.ObjectIdentifierValue) if self.ok: if self.metatype in [1, 2, 3]: ########################################################### # get object_list from PREMIS file self.Premis_filepath = u'%s/%s/%s_PREMIS.xml' % ( self.SIPpath, self.ObjectIdentifierValue, self.ObjectIdentifierValue) self.object_list, errno, why = ESSMD.getPremisObjects( FILENAME=self.Premis_filepath) if errno == 0: logging.info( 'Succeeded to get object_list from premis for information package: %s', self.ObjectIdentifierValue) else: self.event_info = 'Problem to get object_list from premis for information package: %s, errno: %s, detail: %s' % ( self.ObjectIdentifierValue, str(errno), str(why)) logging.error(self.event_info) ESSPGM.Events().create( '1025', '', 'ESSArch SIPValidateFormat', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) self.ok = 0 elif self.metatype in [4]: ########################################################### # get object_list from METS if os.path.exists(os.path.join(self.SIPpath, 'sip.xml')): mets_file = 'sip.xml' self.SIPmets_objpath = os.path.join( self.SIPpath, mets_file) elif os.path.exists( os.path.join(self.SIPpath, 'mets.xml')): mets_file = 'mets.xml' self.SIPmets_objpath = os.path.join( self.SIPpath, mets_file) #elif os.path.exists(os.path.join(self.SIPpath,'%s_Content_METS.xml' % self.ObjectIdentifierValue)): # mets_file = '%s_Content_METS.xml' % self.ObjectIdentifierValue # self.SIPmets_objpath = os.path.join(self.SIPpath,mets_file) else: self.SIPmets_objpath = '' self.event_info = 'Problem to find METS file for information package: %s in path: %s' % ( self.ObjectIdentifierValue, self.SIPpath) logging.error(self.event_info) ESSPGM.Events().create( '1025', '', 'ESSArch SIPValidateFormat', ProcVersion, '1', self.event_info, 2, self.ObjectIdentifierValue) self.ok = 0 if self.SIPmets_objpath: self.object_list, errno, why = ESSMD.getAIPObjects( FILENAME=self.SIPmets_objpath) if errno == 0: logging.info( 'Succeeded to get object_list from METS for information package: %s', self.ObjectIdentifierValue) self.F_Checksum, errno, why = ESSPGM.Check( ).checksum(self.SIPmets_objpath, self.ChecksumAlgorithm) # Checksum self.F_SIZE = os.stat(self.SIPmets_objpath)[6] self.object_list.append([ mets_file, self.ChecksumAlgorithm_name, self.F_Checksum, self.F_SIZE, '' ]) else: self.event_info = 'Problem to get object_list from METS for information package: %s, errno: %s, detail: %s' % ( self.ObjectIdentifierValue, str(errno), str(why)) logging.error(self.event_info) ESSPGM.Events().create( '1025', '', 'ESSArch SIPValidateFormat', ProcVersion, '1', self.event_info, 2, self.ObjectIdentifierValue) self.ok = 0 if self.ok: ########################################################### # update ObjectIdentifierValue to StatusProcess: 25 and StatusActivity: 5 errno, why = ESSPGM.DB().SetAIPstatus( self.IngestTable, self.ext_IngestTable, AgentIdentifierValue, self.ObjectUUID, 25, 5) if errno: logging.error('Failed to update DB status for AIP: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) logging.info('Format validate object: ' + self.ObjectIdentifierValue) if self.ok: ########################################################### # Start to format validate SIP self.startTime = datetime.timedelta( seconds=time.localtime()[5], minutes=time.localtime()[4], hours=time.localtime()[3]) self.ObjectNumItems = 0 self.ObjectSize = 0 if self.metatype == 1: ############################################ # Object have metatype 1 (METS) self.tmp_object_id = ( u'%s/%s_PREMIS.xml') % (self.ObjectIdentifierValue, self.ObjectIdentifierValue) self.object_list.append([ self.tmp_object_id, '', '', '', '', 'ARCHMETAxmlWrap', 'PREMIS' ]) elif self.metatype == 2: ############################################ # Object have metatype 2 (RES) #self.tmp_object_id = ('%s/TIFFEdit.RES') % self.ObjectIdentifierValue #self.object_list.append([self.tmp_object_id,'', '', '', '', 'ARCHMETA', '']) self.tmp_object_id = ( u'%s/%s_PREMIS.xml') % (self.ObjectIdentifierValue, self.ObjectIdentifierValue) self.object_list.append([ self.tmp_object_id, '', '', '', '', 'ARCHMETAxmlWrap', 'PREMIS' ]) elif self.metatype == 3: ############################################ # Object have metatype 3 (ADDML) self.tmp_object_id = ( u'%s/%s_ADDML.xml') % (self.ObjectIdentifierValue, self.ObjectIdentifierValue) self.object_list.append([ self.tmp_object_id, '', '', '', '', 'ARCHMETAxmlWrap', 'ADDML' ]) self.tmp_object_id = ( u'%s/%s_PREMIS.xml') % (self.ObjectIdentifierValue, self.ObjectIdentifierValue) self.object_list.append([ self.tmp_object_id, '', '', '', '', 'ARCHMETAxmlWrap', 'PREMIS' ]) for self.object in self.object_list: logging.debug('variable self.SIPpath: %s, type: %s' % (self.SIPpath, type(self.SIPpath))) logging.debug('variable self.object[0]: %s, type: %s' % (self.object[0], type(self.object[0]))) self.filepath = os.path.join(self.SIPpath, self.object[0]) logging.debug('variable self.filepath: %s, type: %s' % (self.filepath, type(self.filepath))) #self.filepath = ESSPGM.Check().Unicode2isoStr(self.filepath.encode('utf-8')) #self.filepath_iso = ESSPGM.Check().unicode2str(self.filepath) #logging.debug('variable self.filepath_iso: %s, type: %s' % (self.filepath_iso,type(self.filepath_iso))) if self.metatype in [1, 2, 3 ] and self.ObjectNumItems == 0: if self.object[0] == self.ObjectIdentifierValue: logging.info( 'First premis object match information package: %s', self.ObjectIdentifierValue) else: self.event_info = 'First premis object do not match information package: %s, premis_object: %s' % ( self.ObjectIdentifierValue, self.object[0]) logging.error(self.event_info) ESSPGM.Events().create( '1025', '', 'ESSArch SIPValidateFormat', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) self.ok = 0 break if self.ok and os.access(self.filepath, os.X_OK): pass else: self.event_info = 'Object path: %s do not exist or is not executable!' % self.filepath logging.error(self.event_info) ESSPGM.Events().create( '1025', '', 'ESSArch SIPValidateFormat', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) self.ok = 0 break if self.ok and os.access(self.filepath, os.R_OK): pass else: self.event_info = 'Object path: %s do not exist or is not readable!' % self.filepath logging.error(self.event_info) ESSPGM.Events().create( '1025', '', 'ESSArch SIPValidateFormat', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) self.ok = 0 break if self.ok and os.access(self.filepath, os.W_OK): pass else: self.event_info = 'Missing permission, Object path: %s is not writeable!' % self.filepath logging.error(self.event_info) ESSPGM.Events().create( '1025', '', 'ESSArch SIPValidateFormat', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) self.ok = 0 break if self.metatype in [1, 2, 3]: if self.ok and not (self.ObjectNumItems == 0 or self.object[5] == 'ARCHMETA' or self.object[5] == 'ARCHMETAxmlWrap'): if int(os.stat(self.filepath)[6]) == int( self.object[4]): self.ObjectSize += int(self.object[4]) else: self.event_info = 'Filesize for object path: %s is %s and premis object size is %s. The sizes must match!' % ( self.filepath, str(os.stat(self.filepath)[6]), str(self.object[4])) logging.error(self.event_info) ESSPGM.Events().create( '1025', '', 'ESSArch SIPValidateFormat', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) self.ok = 0 break if self.ok: self.F_Checksum, errno, why = ESSPGM.Check( ).checksum(self.filepath, self.object[1]) # Checksum if errno: self.event_info = 'Failed to get checksum for: %s, Error: %s' % ( self.filepath, str(why)) logging.error(self.event_info) ESSPGM.Events().create( '1025', '', 'ESSArch SIPValidateFormat', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) self.ok = 0 if self.ok: if self.F_Checksum == self.object[2]: pass else: self.event_info = 'Checksum for object path: %s is %s and premis object checksum is %s. The checksum must match!' % ( self.filepath, self.F_Checksum, self.object[2]) logging.error(self.event_info) ESSPGM.Events().create( '1025', '', 'ESSArch SIPValidateFormat', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) self.ok = 0 break elif self.ok and not self.ObjectNumItems == 0 and ( self.object[5] == 'ARCHMETA' or self.object[5] == 'ARCHMETAxmlWrap'): if int(os.stat(self.filepath)[6]) > 0: pass else: self.event_info = 'Filesize for object path: %s is 0 bytes. The size should be more then 0 bytes!' % self.filepath logging.error(self.event_info) ESSPGM.Events().create( '1025', '', 'ESSArch SIPValidateFormat', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) self.ok = 0 break elif self.metatype in [4]: if self.ok: #[objectIdentifierValue,messageDigestAlgorithm,messageDigest,a_SIZE,a_MIMETYPE] if int(os.stat(self.filepath)[6]) == int( self.object[3]): self.ObjectSize += int(self.object[3]) else: self.event_info = 'Filesize for object path: %s is %s and METS object size is %s. The sizes must match!' % ( self.filepath, str(os.stat(self.filepath)[6]), str(self.object[3])) logging.error(self.event_info) ESSPGM.Events().create( '1025', '', 'ESSArch SIPValidateFormat', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) self.ok = 0 break if self.ok: self.F_Checksum, errno, why = ESSPGM.Check( ).checksum(self.filepath, self.object[1]) # Checksum if errno: self.event_info = 'Failed to get checksum for: %s, Error: %s' % ( self.filepath, str(why)) logging.error(self.event_info) ESSPGM.Events().create( '1025', '', 'ESSArch SIPValidateFormat', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) self.ok = 0 if self.ok: if self.F_Checksum == self.object[2]: pass else: self.event_info = 'Checksum for object path: %s is %s and METS object checksum is %s. The checksum must match!' % ( self.filepath, self.F_Checksum, self.object[2]) logging.error(self.event_info) ESSPGM.Events().create( '1025', '', 'ESSArch SIPValidateFormat', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) self.ok = 0 break self.ObjectNumItems += 1 if self.ok: if self.metatype in [1, 2, 3]: ############################################################################### # Check if SIP filesystem path contain files that not exist in metadatafile for self.filesystem_object in ESSPGM.Check( ).GetFiletree( os.path.join(self.SIPpath, self.ObjectIdentifierValue)): self.missmatch_flag = 0 for self.object in self.object_list: #if os.path.join(self.ObjectIdentifierValue,self.filesystem_object) == self.object[0].encode('utf-8'): if os.path.join(self.ObjectIdentifierValue, self.filesystem_object ) == self.object[0]: self.missmatch_flag = 0 break else: self.missmatch_flag = 1 if self.missmatch_flag: self.filesystempath = u'%s/%s/%s' % ( self.SIPpath, self.ObjectIdentifierValue, self.filesystem_object) self.event_info = 'Filesystem file: %s do not exist in metadatafile for object: %s' % ( self.filesystempath, os.path.join(self.SIPpath, self.ObjectIdentifierValue)) logging.error(self.event_info) ESSPGM.Events().create( '1025', '', 'ESSArch SIPValidateFormat', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) self.ok = 0 if self.metatype in [4]: ############################################################################### # Check if SIP filesystem path contain files that not exist in metadatafile for self.filesystem_object in ESSPGM.Check( ).GetFiletree(self.SIPpath): self.missmatch_flag = 0 for self.object in self.object_list: if self.filesystem_object == self.object[0]: self.missmatch_flag = 0 break else: self.missmatch_flag = 1 if self.missmatch_flag: self.filesystempath = u'%s/%s' % ( self.SIPpath, self.filesystem_object) self.event_info = 'Filesystem file: %s do not exist in metadatafile for object: %s' % ( self.filesystempath, self.SIPpath) logging.error(self.event_info) ESSPGM.Events().create( '1025', '', 'ESSArch SIPValidateFormat', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) self.ok = 0 if self.ok: if self.metatype in [4]: ############################################################################### # Check if SIP INFORMATIONCLASS match Policy if self.INFORMATIONCLASS == self.Policy_INFORMATIONCLASS: self.event_info = 'Object: %s InformationClass: %s match defined InformaionClass: %s in PolicyID: %s' % ( self.ObjectIdentifierValue, self.INFORMATIONCLASS, self.Policy_INFORMATIONCLASS, self.PolicyId) logging.info(self.event_info) else: self.event_info = 'Object: %s InformationClass: %s do not match defined InformationClass: %s in PolicyID: %s' % ( self.ObjectIdentifierValue, self.INFORMATIONCLASS, self.Policy_INFORMATIONCLASS, self.PolicyId) logging.error(self.event_info) ESSPGM.Events().create( '1025', '', 'ESSArch SIPValidateFormat', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) self.ok = 0 if self.ok: self.stopTime = datetime.timedelta( seconds=time.localtime()[5], minutes=time.localtime()[4], hours=time.localtime()[3]) self.MeasureTime = self.stopTime - self.startTime self.ObjectSizeMB = self.ObjectSize / 1048576 if self.MeasureTime.seconds < 1: self.MeasureTime = datetime.timedelta( seconds=1 ) #Fix min time to 1 second if it is zero. self.VerMBperSEC = int(self.ObjectSizeMB) / int( self.MeasureTime.seconds) if self.ok: logging.info('Succeeded to validate SIP package: ' + self.ObjectIdentifierValue + ' , ' + str(self.VerMBperSEC) + ' MB/Sec and Time: ' + str(self.MeasureTime)) errno, why = ESSPGM.DB().SetAIPstatus( self.IngestTable, self.ext_IngestTable, AgentIdentifierValue, self.ObjectUUID, 29, 0) if errno: logging.error('Failed to update DB status for AIP: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) else: ESSPGM.Events().create('1025', '', 'ESSArch SIPValidateFormat', ProcVersion, '0', '', self.DBmode, self.ObjectIdentifierValue) else: errno, why = ESSPGM.DB().SetAIPstatus( self.IngestTable, self.ext_IngestTable, AgentIdentifierValue, self.ObjectUUID, 26, 4) if errno: logging.error('Failed to update DB status for AIP: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) else: self.event_info = 'Failed to validate SIP package: ' + self.ObjectIdentifierValue logging.error(self.event_info) ESSPGM.Events().create('1025', '', 'ESSArch SIPValidateFormat', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) db.close_old_connections() self.mLock.release() time.sleep(int(self.Time)) self.mDieFlag = 0
Web - http://www.essolutions.se Email - [email protected] ''' __majorversion__ = "2.5" __revision__ = "$Revision$" __date__ = "$Date$" __author__ = "$Author$" import re __version__ = '%s.%s' % (__majorversion__, re.sub('[\D]', '', __revision__)) import sys, ESSDB, ESSMSSQL, uuid, datetime, ESSMD, MySQLdb, ftplib, socket, os, pytz from lxml import etree from django.utils import timezone from essarch.models import ArchiveObjectMetadata, ArchiveObject MD_FTP_USER = ESSDB.DB().action('ESSConfig', 'GET', ('Value', ), ('Name', 'MD_FTP_USER'))[0][0] MD_FTP_PASS = ESSDB.DB().action('ESSConfig', 'GET', ('Value', ), ('Name', 'MD_FTP_PASS'))[0][0] MD_FTP_HOST = ESSDB.DB().action('ESSConfig', 'GET', ('Value', ), ('Name', 'MD_FTP_HOST'))[0][0] MD_FTP_PORT = ESSDB.DB().action('ESSConfig', 'GET', ('Value', ), ('Name', 'MD_FTP_PORT'))[0][0] MD_FTP_ROOT_PATH = ESSDB.DB().action('ESSConfig', 'GET', ('Value', ), ('Name', 'MD_FTP_ROOT_PATH'))[0][0] MD_FTP_ROOT_KEY = ESSDB.DB().action('ESSConfig', 'GET', ('Value', ), ('Name', 'MD_FTP_ROOT_KEY'))[0][0] class prod: TimeZone = timezone.get_default_timezone_name() tz = pytz.timezone(TimeZone)
def ObjectValidate(self, InTable): self.IngestTable = InTable if ExtDBupdate: self.ext_IngestTable = self.IngestTable else: self.ext_IngestTable = '' # Check if accepted in prjDB self.dbget, errno, why = ESSDB.DB().action( self.IngestTable, 'GET4', ('ObjectUUID', 'ObjectIdentifierValue', 'PolicyId', 'StatusProcess'), ('StatusActivity', '=', '0', 'AND', 'StatusProcess', 'BETWEEN', 19, 'AND', 21)) if errno: logging.error('Failed to access Local DB, error: ' + str(why)) for self.obj in self.dbget: self.ObjectUUID = self.obj[0] self.ObjectIdentifierValue = self.obj[1] PolicyID = self.obj[2] self.StatusProcess = self.obj[3] ArchivePolicy_objs = ArchivePolicy.objects.filter( PolicyStat=1, PolicyID=PolicyID)[:1] if not ArchivePolicy_objs: logging.error('Missing PolicyID: %s in db' % str(PolicyID)) else: ArchivePolicy_obj = ArchivePolicy_objs.get() logging.info( 'StatusProcess %s, ObjectIdentifierValue %s, WaitForApproval: %s' % (self.StatusProcess, self.ObjectIdentifierValue, ArchivePolicy_obj.WaitProjectApproval)) if ArchivePolicy_obj.WaitProjectApproval == 1: #Check.... self.extOBJ = 0 self.PrjDBget, errno, why = ESSDB.DB().action( 'ExtPrjDB', 'GET3', ('DataObjectSize', 'DataObjectNumItems', 'Signature', 'Status'), ('DataObjectIdentifier', self.ObjectIdentifierValue)) if errno: logging.error('Failed to access Local DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) if self.PrjDBget: self.extOBJ = 1 self.PrjDB_DataObjectSize = self.PrjDBget[0][ 0] # not in use self.PrjDB_DataObjectNumItems = self.PrjDBget[0][ 1] # not in use self.PrjDB_Signature = self.PrjDBget[0][ 2] # not in use self.PrjDB_Status = self.PrjDBget[0][3] if self.PrjDB_Status == 1: self.PrjAccepted = 1 elif self.PrjDB_Status == 0: self.PrjAccepted = 0 else: self.extOBJ = 0 if not self.extOBJ: ################################################################### #Object don't exist in prjDB(20) ################################################################### errno, why = ESSPGM.DB().SetAIPstatus( self.IngestTable, self.ext_IngestTable, AgentIdentifierValue, self.ObjectUUID, 20, 0) if errno: logging.error( 'Failed to update DB status for AIP: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) logging.info( 'Change to StatusProcess 20, ObjectIdentifierValue ' + str(self.ObjectIdentifierValue)) elif self.extOBJ and not self.PrjAccepted: ################################################################### #Object is not accepted in prjDB(21) ################################################################### errno, why = ESSPGM.DB().SetAIPstatus( self.IngestTable, self.ext_IngestTable, AgentIdentifierValue, self.ObjectUUID, 21, 0) if errno: logging.error( 'Failed to update DB status for AIP: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) logging.info( 'Change to StatusProcess 21, ObjectIdentifierValue ' + str(self.ObjectIdentifierValue)) elif self.extOBJ and self.PrjAccepted: ################################################################### #Object is accepted in prjDB(24) and RFNext and OK(0) ################################################################### errno, why = ESSPGM.DB().SetAIPstatus( self.IngestTable, self.ext_IngestTable, AgentIdentifierValue, self.ObjectUUID, 24, 0) if errno: logging.error( 'Failed to update DB status for AIP: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) else: ESSPGM.Events().create( '1020', '', 'ESSArch SIPValidateApproval', ProcVersion, '0', '', 2, self.ObjectIdentifierValue) logging.info( 'Change to StatusProcess 24, ObjectIdentifierValue ' + str(self.ObjectIdentifierValue)) elif ArchivePolicy_obj.WaitProjectApproval == 2: ################################################################### # Check if Object exist in ReqIngestQueue ################################################################### # id: INT # ReqUUID: CHAR # ReqType: INT # ReqPurpose: VARCHAR # user: VARCHAR # password: VARCHAR # ObjectIdentifierValue: VARCHAR # Status: INT # posted: DATETIME # ReqIngestQueue_q = model.meta.Session.query(model.ReqIngestQueue) # DbRow = ReqIngestQueue_q.filter(and_(model.ReqIngestQueue.ObjectIdentifierValue==self.ObjectIdentifierValue, \ # model.ReqIngestQueue.Status<=1)).first() DbRow = IngestQueue.objects.filter( ObjectIdentifierValue=self.ObjectIdentifierValue, Status__lte=2)[:1] if DbRow: DbRow = DbRow.get() ################################################################### #Object is accepted in ReqIngestQueue(24) and RFNext and OK(0) ################################################################### errno, why = ESSPGM.DB().SetAIPstatus( self.IngestTable, self.ext_IngestTable, AgentIdentifierValue, self.ObjectUUID, 24, 0) if errno: logging.error( 'Failed to update DB status for AIP: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) else: ESSPGM.Events().create( '1020', '', 'ESSArch SIPValidateApproval', ProcVersion, '0', '', 2, self.ObjectIdentifierValue) event_info = 'Start to Ingest SIP with ObjectIdentifierValue: %s, ReqUUID: %s' % ( DbRow.ObjectIdentifierValue, DbRow.ReqUUID) logging.info(event_info) ESSPGM.Events().create('1302', DbRow.ReqPurpose, 'ESSArch Ingest', ProcVersion, '0', event_info, 2, DbRow.ObjectIdentifierValue) DbRow.Status = 5 #model.meta.Session.commit() DbRow.save() logging.info( 'Change to StatusProcess 24, ObjectIdentifierValue ' + str(self.ObjectIdentifierValue)) else: ################################################################### #Object is not accepted in ReqIngestQueue(21) ################################################################### errno, why = ESSPGM.DB().SetAIPstatus( self.IngestTable, self.ext_IngestTable, AgentIdentifierValue, self.ObjectUUID, 21, 0) if errno: logging.error( 'Failed to update DB status for AIP: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) logging.info( 'Change to StatusProcess 21, ObjectIdentifierValue ' + str(self.ObjectIdentifierValue)) #model.meta.Session.close() else: ################################################################### #Skip Object check in ReqIngestQueue(24) and RFNext and OK(0) ################################################################### errno, why = ESSPGM.DB().SetAIPstatus( self.IngestTable, self.ext_IngestTable, AgentIdentifierValue, self.ObjectUUID, 24, 0) if errno: logging.error('Failed to update DB status for AIP: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) else: ESSPGM.Events().create('1020', '', 'ESSArch SIPValidateApproval', ProcVersion, '0', 'Skip to check ReqIngestQueue', 2, self.ObjectIdentifierValue) logging.info( 'Skip check, Change to StatusProcess 24, ObjectIdentifierValue ' + str(self.ObjectIdentifierValue))