Exemplo n.º 1
0
    def _checkValidParameters(self, schema, tempMetadataHolder,
                             parameterNames):
        '''Check that the parameter described in xml exists in the database
        '''

        parameterNamesDict = {}
        for parameterName in parameterNames:
            parameterNamesDict[parameterName.name] = "1"

        for tempMetadataHolderName in \
            tempMetadataHolder:
                if not tempMetadataHolderName in \
                    parameterNamesDict:

                    schema_paramname = str(schema) + ":" + \
                                       str(tempMetadataHolderName)

                    if not schema_paramname in self.unsupported_parametername:
                        self.unsupported_parametername[schema_paramname] = "1"

                        fail_message = "Unsupported parameter name found: " + \
                            schema_paramname

                        logger.warning(fail_message)

                        rs = RegistrationStatus(action=
                                                    self.ingest_action,
                                                status=
                                                    RegistrationStatus.WARNING,
                                                message=
                                                    fail_message,
                                                experiment=
                                                    self.modelExperiment)

                        rs.save()
Exemplo n.º 2
0
    def _saveParameter(self, parameterTypeClass, parameterName,
                       parameterValue, parameterSet):
        '''Save the metadata field in the database.

        Reference:
        http://stackoverflow.com/questions/452969/does-python-have-an-equivalent-to-java-class-forname

        '''
        #logger.debug('saving parameter %s: %s' %
        #    (parameterName, parameterValue))
        if parameterName.isNumeric():
            try:
                parameter = \
                getattr(models, parameterTypeClass)(
                parameterset=parameterSet,
                name=parameterName,
                string_value=None,
                numerical_value=float(parameterValue))

                parameter.save()
            except ValueError:
                schema_paramname = str(parameterName.schema) + ":" + \
                    parameterName.name
                
                fail_message = "Invalid value for numeric parameter " +\
                    schema_paramname + ": " + parameterValue

                logger.warning(fail_message)

                rs = RegistrationStatus(action=self.ingest_action,
                                        status=RegistrationStatus.WARNING,
                                        message=fail_message,
                                        experiment=self.modelExperiment)

                rs.save()

        else:
            parameter = \
                getattr(models, parameterTypeClass)(
                parameterset=parameterSet,
                name=parameterName,
                string_value=parameterValue,
                numerical_value=None)
            parameter.save()
Exemplo n.º 3
0
def add_status(status, message, exception=False):
        if exception:
            import traceback
            message = message + '<div class="traceback"/>' +  traceback.format_exc().replace("\n", "<br/>") + "</div>"  
        rs = RegistrationStatus(action=current_action,
                                status=status,
                                message=message,
                                experiment=experiment,
                                )
        rs.save()
        appropriate_logger = { 
                      RegistrationStatus.PASS: logger.info,
                      RegistrationStatus.WARNING: logger.warning,
                      RegistrationStatus.ERROR: logger.error,
                      }[status]
        #if exception:
        #    appropriate_logger = logger.exception # this logger prints exception information too.
        #message="" + message
        if experiment:
            appropriate_logger("#%d: %s" % (experiment.id, message,))
        else:
            appropriate_logger(message)
            global idless_statuses
            idless_statuses.append(rs.id) # keep track of statuses before we got an experiment, for later
Exemplo n.º 4
0
    def endElementNS(self, name, qname):
        # just get the element name without the namespace
        elName = name[1]

        if elName == 'dmdSec':
            self.inDmdSec = False
            # if we currently processing an experiment structure, let's
            # save the institution value before we finalise the experiment
            if self.processExperimentStruct:
                self.metsObject.institution = self.institution

                # let's save the experiment in the DB
                if self.tardisExpId:
                    self.modelExperiment = models.Experiment.objects.get(
                        pk=self.tardisExpId)
                else:
                    self.modelExperiment = models.Experiment()
                self.modelExperiment.id = self.tardisExpId
                self.modelExperiment.url = self.metsObject.url
                self.modelExperiment.approved = True
                self.modelExperiment.title = self.metsObject.title
                self.modelExperiment.institution_name = \
                                            self.metsObject.institution
                self.modelExperiment.description = self.metsObject.description
                self.modelExperiment.start_time = self.metsObject.start_time
                self.modelExperiment.end_time = self.metsObject.end_time
                self.modelExperiment.created_by = self.createdBy

                self.modelExperiment.save()

                self.holder.experimentDatabaseId = self.modelExperiment.id

                x = 0
                for author in self.metsObject.authors:
                    author_experiment = models.Author_Experiment(
                        experiment=self.modelExperiment,
                        author=author, order=x)
                    author_experiment.save()
                    x = x + 1

            elif self.processDatasetStruct:
                # let's save the dataset in the DB
                self.modelDataset = models.Dataset(
                    experiment=self.modelExperiment,
                    description=self.metsObject.title,
                    immutable=settings.IMMUTABLE_METS_DATASETS)
                self.modelDataset.save()

                # let's also save the modelDataset in a dictionary so that we
                # can look it up easily later on when we start processing
                # the datafiles.
                self.datasetLookupDict[self.metsObject.id] = self.modelDataset

            self.metsObject = None

            self.processExperimentStruct = False
            self.processDatasetStruct = False

        elif elName == 'title' and self.inDmdSec:
            self.grabTitle = False

        elif elName == 'startTime' and self.processExperimentStruct:
            self.grabStartTime = False

        elif elName == 'endTime' and self.processExperimentStruct:
            self.grabEndTime = False

        elif elName == 'url' and self.processExperimentStruct:
            self.grabExperimentUrl = False

        elif elName == 'abstract' and self.processExperimentStruct:
            self.grabAbstract = False

        elif elName == 'name' and self.processExperimentStruct:
            self.inName = False

        elif elName == 'namePart' and self.inName:
            self.grabMightBeAuthor = False

        elif elName == 'roleTerm' and self.inName:
            self.grabRoleTerm = False
            self.mightBeAuthor = None

        elif elName == 'name' and self.inInstitution:
            self.grabInstitution = False

        elif elName == 'agent':
            self.inInstitution = False

        elif elName == 'amdSec':
            # we're done processing the metadata entries
            self.inAmdSec = False

            # let's reset the cached experiment model object
            self.modelExperiment = None

        elif elName == 'techMD' and self.inAmdSec:
            self.inTechMd = False
            self.metadataId = None
            self.metsObject = None
            self.processMetadata = False

        elif elName == 'xmlData' and self.inTechMd:
            self.inXmlData = False

        elif elName != self.xmlDataChildElement and \
                self.customHandler is not None:
            self.customHandler.endElement(elName)

        elif elName == self.xmlDataChildElement and self.inXmlData:

            if self.customHandler is not None:
                self.tempMetadataHolder = self.customHandler.metadataDict

            try:
                schema = models.Schema.objects.get(
                    namespace__exact=self.elementNamespace)

                # get the associated parameter names for the given schema
                parameterNames = \
                    models.ParameterName.objects.filter(
                    schema__namespace__exact=schema.namespace).order_by('id')

                # let's create a trigger holder which we can use to check
                # if we still need to create another parameterset entry in the
                # DB
                createParamSetFlag = {'experiment': True, 'dataset': True,
                                      'datafile': True}
                datasetParameterSet = None
                datafileParameterSet = None

                if self.metadataId  in self.holder.metadataMap:
                    for metsObject in self.holder.metadataMap[self.metadataId]:
                        self.metsObject = metsObject

                        metsObjectClassName = self.metsObject.__class__.__name__

                        if metsObjectClassName == 'Experiment':
                            if createParamSetFlag['experiment']:
                                # create a new parameter set for the metadata
                                parameterSet = \
                                    models.ExperimentParameterSet(
                                    schema=schema,
                                    experiment=self.modelExperiment)
                                parameterSet.save()

                                self._checkValidParameters(schema,
                                                    self.tempMetadataHolder,
                                                    parameterNames)

                                # now let's process the experiment parameters
                                for parameterName in parameterNames:
                                    if parameterName.name in \
                                            self.tempMetadataHolder:
                                        parameterValue = self.tempMetadataHolder[
                                            parameterName.name]
                                        if parameterValue != '':
                                            self._saveParameter('ExperimentParameter',
                                                parameterName, parameterValue,
                                                parameterSet)

                                createParamSetFlag['experiment'] = False

                            else:
                                # this is not even allowed as there's only going
                                # to be one experiment per METS file
                                raise Exception('forbidden state!')

                        elif metsObjectClassName == 'Dataset':
                            if createParamSetFlag['dataset']:
                                dataset = self.datasetLookupDict[
                                    self.metsObject.id]

                                # create a new parameter set for the
                                # dataset metadata
                                datasetParameterSet = \
                                    models.DatasetParameterSet(schema=schema,
                                    dataset=dataset)

                                datasetParameterSet.save()

                                self._checkValidParameters(schema,
                                                    self.tempMetadataHolder,
                                                    parameterNames)

                                # now let's process the dataset parameters
                                for parameterName in parameterNames:
                                    if parameterName.name in \
                                            self.tempMetadataHolder:
                                        parameterValue = self.tempMetadataHolder[
                                            parameterName.name]
                                        if parameterValue != '':
                                            self._saveParameter('DatasetParameter',
                                                parameterName, parameterValue,
                                                datasetParameterSet)

                                # disable creation for the next visit
                                createParamSetFlag['dataset'] = False

                        elif metsObjectClassName == 'Datafile':
                            # this will be a good time to save the
                            # "hard" metadata of this datafile so that
                            # when we start adding "soft" metadata
                            # parameters to it, we already have an
                            # entry for it in the DB

                            # look up the dataset this file belongs to
                            thisFilesDataset = self.datasetLookupDict[
                                self.metsObject.dataset.id]

                            # also check if the file already exists
                            datafile = thisFilesDataset.dataset_file_set.filter(
                                filename=self.metsObject.name, size=self.metsObject.size)

                            if datafile.count() == 0:
                                size = self.metsObject.size

                                if not self.metsObject.size:
                                    size = 0

                                from datetime import datetime
                                self.modelDatafile = models.Dataset_File(
                                    dataset=thisFilesDataset,
                                    filename=self.metsObject.name,
                                    url=self.metsObject.url,
                                    size=size,
                                    protocol=self.metsObject.url.split('://')[0],
                                    transfer_status='Waiting',
                                    transfer_status_timestamp=datetime.now()
                                    )

                                self.modelDatafile.save()
                            else:
                                self.modelDatafile = thisFilesDataset.dataset_file_set.get(
                                    filename=self.metsObject.name, size=self.metsObject.size)
                            # TODO: we need to note here that we are
                            # only creating a datafile entry in the DB
                            # for files that have corresponding
                            # metadata. if we are to create a file
                            # entry for files with no metadata, we'll
                            # need to get the unaccessed datafiles
                            # from datasetLookupDict.

                            if createParamSetFlag['datafile']:
                                # create a new parameter set for the metadata
                                datafileParameterSet = \
                                    models.DatafileParameterSet(schema=schema,
                                    dataset_file=self.modelDatafile)
                                datafileParameterSet.save()

                                self._checkValidParameters(schema,
                                                    self.tempMetadataHolder,
                                                    parameterNames)

                                # now let's process the datafile parameters
                                for parameterName in parameterNames:
                                    if parameterName.name in \
                                            self.tempMetadataHolder:
                                        parameterValue = self.tempMetadataHolder[
                                            parameterName.name]
                                        if parameterValue != '':
                                            self._saveParameter('DatafileParameter',
                                                parameterName, parameterValue,
                                                datafileParameterSet)
                                createParamSetFlag['datafile'] = False

            except models.Schema.DoesNotExist:
                fail_message = 'Unsupported schema found ' + \
                    self.elementNamespace

                logger.warning(fail_message)

                if not self.elementNamespace in self.unsupported_schema:
                    self.unsupported_schema[self.elementNamespace] = "1"

                    rs = RegistrationStatus(action=self.ingest_action,
                                            status=RegistrationStatus.WARNING,
                                            message=fail_message,
                                            experiment=self.modelExperiment)

                    rs.save()

            # reset the current xmlData child element so that if a new
            # parameter set is read, we can process it again
            self.xmlDataChildElement = None
            self.customHandler = None

        elif elName == self.parameterName and \
                self.xmlDataChildElement is not None:

            # reset self.parameterName to None so the next parameter can be
            # processed
            self.parameterName = None