def _checkValidParameters(self, schema, tempMetadataHolder, parameterNames): '''Check that the parameter described in xml exists in the database ''' parameterNamesDict = {} for parameterName in parameterNames: parameterNamesDict[parameterName.name] = "1" for tempMetadataHolderName in \ tempMetadataHolder: if not tempMetadataHolderName in \ parameterNamesDict: schema_paramname = str(schema) + ":" + \ str(tempMetadataHolderName) if not schema_paramname in self.unsupported_parametername: self.unsupported_parametername[schema_paramname] = "1" fail_message = "Unsupported parameter name found: " + \ schema_paramname logger.warning(fail_message) rs = RegistrationStatus(action= self.ingest_action, status= RegistrationStatus.WARNING, message= fail_message, experiment= self.modelExperiment) rs.save()
def _saveParameter(self, parameterTypeClass, parameterName, parameterValue, parameterSet): '''Save the metadata field in the database. Reference: http://stackoverflow.com/questions/452969/does-python-have-an-equivalent-to-java-class-forname ''' #logger.debug('saving parameter %s: %s' % # (parameterName, parameterValue)) if parameterName.isNumeric(): try: parameter = \ getattr(models, parameterTypeClass)( parameterset=parameterSet, name=parameterName, string_value=None, numerical_value=float(parameterValue)) parameter.save() except ValueError: schema_paramname = str(parameterName.schema) + ":" + \ parameterName.name fail_message = "Invalid value for numeric parameter " +\ schema_paramname + ": " + parameterValue logger.warning(fail_message) rs = RegistrationStatus(action=self.ingest_action, status=RegistrationStatus.WARNING, message=fail_message, experiment=self.modelExperiment) rs.save() else: parameter = \ getattr(models, parameterTypeClass)( parameterset=parameterSet, name=parameterName, string_value=parameterValue, numerical_value=None) parameter.save()
def add_status(status, message, exception=False): if exception: import traceback message = message + '<div class="traceback"/>' + traceback.format_exc().replace("\n", "<br/>") + "</div>" rs = RegistrationStatus(action=current_action, status=status, message=message, experiment=experiment, ) rs.save() appropriate_logger = { RegistrationStatus.PASS: logger.info, RegistrationStatus.WARNING: logger.warning, RegistrationStatus.ERROR: logger.error, }[status] #if exception: # appropriate_logger = logger.exception # this logger prints exception information too. #message="" + message if experiment: appropriate_logger("#%d: %s" % (experiment.id, message,)) else: appropriate_logger(message) global idless_statuses idless_statuses.append(rs.id) # keep track of statuses before we got an experiment, for later
def endElementNS(self, name, qname): # just get the element name without the namespace elName = name[1] if elName == 'dmdSec': self.inDmdSec = False # if we currently processing an experiment structure, let's # save the institution value before we finalise the experiment if self.processExperimentStruct: self.metsObject.institution = self.institution # let's save the experiment in the DB if self.tardisExpId: self.modelExperiment = models.Experiment.objects.get( pk=self.tardisExpId) else: self.modelExperiment = models.Experiment() self.modelExperiment.id = self.tardisExpId self.modelExperiment.url = self.metsObject.url self.modelExperiment.approved = True self.modelExperiment.title = self.metsObject.title self.modelExperiment.institution_name = \ self.metsObject.institution self.modelExperiment.description = self.metsObject.description self.modelExperiment.start_time = self.metsObject.start_time self.modelExperiment.end_time = self.metsObject.end_time self.modelExperiment.created_by = self.createdBy self.modelExperiment.save() self.holder.experimentDatabaseId = self.modelExperiment.id x = 0 for author in self.metsObject.authors: author_experiment = models.Author_Experiment( experiment=self.modelExperiment, author=author, order=x) author_experiment.save() x = x + 1 elif self.processDatasetStruct: # let's save the dataset in the DB self.modelDataset = models.Dataset( experiment=self.modelExperiment, description=self.metsObject.title, immutable=settings.IMMUTABLE_METS_DATASETS) self.modelDataset.save() # let's also save the modelDataset in a dictionary so that we # can look it up easily later on when we start processing # the datafiles. self.datasetLookupDict[self.metsObject.id] = self.modelDataset self.metsObject = None self.processExperimentStruct = False self.processDatasetStruct = False elif elName == 'title' and self.inDmdSec: self.grabTitle = False elif elName == 'startTime' and self.processExperimentStruct: self.grabStartTime = False elif elName == 'endTime' and self.processExperimentStruct: self.grabEndTime = False elif elName == 'url' and self.processExperimentStruct: self.grabExperimentUrl = False elif elName == 'abstract' and self.processExperimentStruct: self.grabAbstract = False elif elName == 'name' and self.processExperimentStruct: self.inName = False elif elName == 'namePart' and self.inName: self.grabMightBeAuthor = False elif elName == 'roleTerm' and self.inName: self.grabRoleTerm = False self.mightBeAuthor = None elif elName == 'name' and self.inInstitution: self.grabInstitution = False elif elName == 'agent': self.inInstitution = False elif elName == 'amdSec': # we're done processing the metadata entries self.inAmdSec = False # let's reset the cached experiment model object self.modelExperiment = None elif elName == 'techMD' and self.inAmdSec: self.inTechMd = False self.metadataId = None self.metsObject = None self.processMetadata = False elif elName == 'xmlData' and self.inTechMd: self.inXmlData = False elif elName != self.xmlDataChildElement and \ self.customHandler is not None: self.customHandler.endElement(elName) elif elName == self.xmlDataChildElement and self.inXmlData: if self.customHandler is not None: self.tempMetadataHolder = self.customHandler.metadataDict try: schema = models.Schema.objects.get( namespace__exact=self.elementNamespace) # get the associated parameter names for the given schema parameterNames = \ models.ParameterName.objects.filter( schema__namespace__exact=schema.namespace).order_by('id') # let's create a trigger holder which we can use to check # if we still need to create another parameterset entry in the # DB createParamSetFlag = {'experiment': True, 'dataset': True, 'datafile': True} datasetParameterSet = None datafileParameterSet = None if self.metadataId in self.holder.metadataMap: for metsObject in self.holder.metadataMap[self.metadataId]: self.metsObject = metsObject metsObjectClassName = self.metsObject.__class__.__name__ if metsObjectClassName == 'Experiment': if createParamSetFlag['experiment']: # create a new parameter set for the metadata parameterSet = \ models.ExperimentParameterSet( schema=schema, experiment=self.modelExperiment) parameterSet.save() self._checkValidParameters(schema, self.tempMetadataHolder, parameterNames) # now let's process the experiment parameters for parameterName in parameterNames: if parameterName.name in \ self.tempMetadataHolder: parameterValue = self.tempMetadataHolder[ parameterName.name] if parameterValue != '': self._saveParameter('ExperimentParameter', parameterName, parameterValue, parameterSet) createParamSetFlag['experiment'] = False else: # this is not even allowed as there's only going # to be one experiment per METS file raise Exception('forbidden state!') elif metsObjectClassName == 'Dataset': if createParamSetFlag['dataset']: dataset = self.datasetLookupDict[ self.metsObject.id] # create a new parameter set for the # dataset metadata datasetParameterSet = \ models.DatasetParameterSet(schema=schema, dataset=dataset) datasetParameterSet.save() self._checkValidParameters(schema, self.tempMetadataHolder, parameterNames) # now let's process the dataset parameters for parameterName in parameterNames: if parameterName.name in \ self.tempMetadataHolder: parameterValue = self.tempMetadataHolder[ parameterName.name] if parameterValue != '': self._saveParameter('DatasetParameter', parameterName, parameterValue, datasetParameterSet) # disable creation for the next visit createParamSetFlag['dataset'] = False elif metsObjectClassName == 'Datafile': # this will be a good time to save the # "hard" metadata of this datafile so that # when we start adding "soft" metadata # parameters to it, we already have an # entry for it in the DB # look up the dataset this file belongs to thisFilesDataset = self.datasetLookupDict[ self.metsObject.dataset.id] # also check if the file already exists datafile = thisFilesDataset.dataset_file_set.filter( filename=self.metsObject.name, size=self.metsObject.size) if datafile.count() == 0: size = self.metsObject.size if not self.metsObject.size: size = 0 from datetime import datetime self.modelDatafile = models.Dataset_File( dataset=thisFilesDataset, filename=self.metsObject.name, url=self.metsObject.url, size=size, protocol=self.metsObject.url.split('://')[0], transfer_status='Waiting', transfer_status_timestamp=datetime.now() ) self.modelDatafile.save() else: self.modelDatafile = thisFilesDataset.dataset_file_set.get( filename=self.metsObject.name, size=self.metsObject.size) # TODO: we need to note here that we are # only creating a datafile entry in the DB # for files that have corresponding # metadata. if we are to create a file # entry for files with no metadata, we'll # need to get the unaccessed datafiles # from datasetLookupDict. if createParamSetFlag['datafile']: # create a new parameter set for the metadata datafileParameterSet = \ models.DatafileParameterSet(schema=schema, dataset_file=self.modelDatafile) datafileParameterSet.save() self._checkValidParameters(schema, self.tempMetadataHolder, parameterNames) # now let's process the datafile parameters for parameterName in parameterNames: if parameterName.name in \ self.tempMetadataHolder: parameterValue = self.tempMetadataHolder[ parameterName.name] if parameterValue != '': self._saveParameter('DatafileParameter', parameterName, parameterValue, datafileParameterSet) createParamSetFlag['datafile'] = False except models.Schema.DoesNotExist: fail_message = 'Unsupported schema found ' + \ self.elementNamespace logger.warning(fail_message) if not self.elementNamespace in self.unsupported_schema: self.unsupported_schema[self.elementNamespace] = "1" rs = RegistrationStatus(action=self.ingest_action, status=RegistrationStatus.WARNING, message=fail_message, experiment=self.modelExperiment) rs.save() # reset the current xmlData child element so that if a new # parameter set is read, we can process it again self.xmlDataChildElement = None self.customHandler = None elif elName == self.parameterName and \ self.xmlDataChildElement is not None: # reset self.parameterName to None so the next parameter can be # processed self.parameterName = None