示例#1
0
 def add_if_missing(parameterset, name, string_value=None,
                    numerical_value=None, datetime_value=None):
     try:
         ExperimentParameter.objects.get(
             name__name=name, parameterset=parameterset)
     except ExperimentParameter.DoesNotExist:
         param_name = ParameterName.objects.get(
             name=name, schema=parameterset.schema)
         param = ExperimentParameter(name=param_name,
                                     parameterset=parameterset)
         param.string_value = string_value
         param.numerical_value = numerical_value
         param.datetime_value = datetime_value
         param.save()
示例#2
0
文件: tasks.py 项目: UWA-FoS/trudat
 def add_if_missing(parameterset,
                    name,
                    string_value=None,
                    numerical_value=None,
                    datetime_value=None):
     try:
         ExperimentParameter.objects.get(name__name=name,
                                         parameterset=parameterset)
     except ExperimentParameter.DoesNotExist:
         param_name = ParameterName.objects.get(name=name,
                                                schema=parameterset.schema)
         param = ExperimentParameter(name=param_name,
                                     parameterset=parameterset)
         param.string_value = string_value
         param.numerical_value = numerical_value
         param.datetime_value = datetime_value
         param.save()
示例#3
0
def populate_pdb_pub_records():
    PUB_SCHEMA = getattr(settings, 'PUBLICATION_SCHEMA_ROOT',
                         default_settings.PUBLICATION_SCHEMA_ROOT)
    PUB_SCHEMA_DRAFT = getattr(settings, 'PUBLICATION_DRAFT_SCHEMA',
                               default_settings.PUBLICATION_DRAFT_SCHEMA)
    PDB_SCHEMA = getattr(settings, 'PDB_PUBLICATION_SCHEMA_ROOT',
                         default_settings.PDB_PUBLICATION_SCHEMA_ROOT)
    publications = Experiment.objects \
        .filter(experimentparameterset__schema__namespace=PDB_SCHEMA) \
        .filter(experimentparameterset__schema__namespace=PUB_SCHEMA) \
        .exclude(experimentparameterset__schema__namespace=PUB_SCHEMA_DRAFT) \
        .distinct()

    last_update_parameter_name = ParameterName.objects.get(
        name='pdb-last-sync',
        schema__namespace=PUB_SCHEMA)

    def add_if_missing(parameterset, name, string_value=None,
                       numerical_value=None, datetime_value=None):
        try:
            ExperimentParameter.objects.get(
                name__name=name, parameterset=parameterset)
        except ExperimentParameter.DoesNotExist:
            param_name = ParameterName.objects.get(
                name=name, schema=parameterset.schema)
            param = ExperimentParameter(name=param_name,
                                        parameterset=parameterset)
            param.string_value = string_value
            param.numerical_value = numerical_value
            param.datetime_value = datetime_value
            param.save()

    for pub in publications:
        try:
            # try to get the last update time for the PDB data
            pdb_last_update_parameter = ExperimentParameter.objects.get(
                parameterset__schema__namespace=PUB_SCHEMA,
                name=last_update_parameter_name,
                parameterset__experiment=pub
            )
            last_update = pdb_last_update_parameter.datetime_value
            needs_update = last_update + \
                getattr(settings,
                        'PDB_REFRESH_INTERVAL',
                        default_settings.PDB_REFRESH_INTERVAL) \
                < timezone.now()

        except ExperimentParameter.DoesNotExist:
            # if the PDB last update time parameter doesn't exist,
            # we definitely need to update the data and create a last
            # update entry
            needs_update = True
            pdb_last_update_parameter = None

        # If an update needs to happen...
        if needs_update:
            # 1. get the PDB info
            pdb_parameter_set = ExperimentParameterSet.objects.get(
                schema__namespace=getattr(
                    settings,
                    'PDB_PUBLICATION_SCHEMA_ROOT',
                    default_settings.PDB_PUBLICATION_SCHEMA_ROOT),
                experiment=pub)
            pdb = ExperimentParameter.objects.get(
                name__name='pdb-id',
                parameterset=pdb_parameter_set)
            pdb_id = pdb.string_value
            # 1a. cosmetic change of case for PDB ID, if entered incorrectly
            if pdb_id != pdb_id.upper():
                pdb.string_value = pdb_id.upper()
                pdb.save()

            try:
                # 2. fetch the info from pdb.org
                pdb = PDBCifHelper(pdb_id)

                # 3. insert all standard pdb parameters
                add_if_missing(pdb_parameter_set, 'title',
                               string_value=pdb.get_pdb_title())
                add_if_missing(pdb_parameter_set, 'url',
                               string_value=pdb.get_pdb_url())
                try:
                    add_if_missing(pdb_parameter_set, 'resolution',
                                   numerical_value=pdb.get_resolution())
                except ValueError:
                    logger.error(
                        'PDB field "resolution" could not be set for '
                        'publication Id %i \n %s' %
                        (pub.id, traceback.format_exc()))

                try:
                    add_if_missing(pdb_parameter_set, 'r-value',
                                   numerical_value=pdb.get_obs_r_value())
                except ValueError:
                    logger.error(
                        'PDB field "r-value" could not be set for '
                        'publication Id %i \n %s' %
                        (pub.id, traceback.format_exc()))

                try:
                    add_if_missing(pdb_parameter_set, 'r-free',
                                   numerical_value=pdb.get_free_r_value())
                except ValueError:
                    logger.error(
                        'PDB field "r-free" could not be set for '
                        'publication Id %i \n %s' %
                        (pub.id, traceback.format_exc()))

                add_if_missing(pdb_parameter_set, 'space-group',
                               string_value=pdb.get_spacegroup())
                add_if_missing(pdb_parameter_set, 'unit-cell',
                               string_value=pdb.get_unit_cell())

                # 4. insert sequence info (lazy checking)
                pdb_seq_parameter_sets = ExperimentParameterSet.objects.filter(
                    schema__namespace=getattr(
                        settings,
                        'PDB_SEQUENCE_PUBLICATION_SCHEMA',
                        default_settings.PDB_SEQUENCE_PUBLICATION_SCHEMA),
                    experiment=pub)
                if pdb_seq_parameter_sets.count() == 0:
                    # insert seqences
                    for seq in pdb.get_sequence_info():
                        seq_ps_namespace = getattr(
                            settings,
                            'PDB_SEQUENCE_PUBLICATION_SCHEMA',
                            default_settings.PDB_SEQUENCE_PUBLICATION_SCHEMA)
                        seq_parameter_set = ExperimentParameterSet(
                            schema=Schema.objects.get(
                                namespace=seq_ps_namespace),
                            experiment=pub)
                        seq_parameter_set.save()
                        add_if_missing(seq_parameter_set, 'organism',
                                       string_value=seq['organism'])
                        add_if_missing(seq_parameter_set, 'expression-system',
                                       string_value=seq['expression_system'])
                        add_if_missing(seq_parameter_set, 'sequence',
                                       string_value=seq['sequence'])

                # 5. insert/update citation info (aggressive)
                ExperimentParameterSet.objects.filter(
                    schema__namespace=getattr(
                        settings,
                        'PDB_CITATION_PUBLICATION_SCHEMA',
                        default_settings.PDB_CITATION_PUBLICATION_SCHEMA),
                    experiment=pub).delete()
                for citation in pdb.get_citations():
                    cit_ps_namespace = getattr(
                        settings,
                        'PDB_CITATION_PUBLICATION_SCHEMA',
                        default_settings.PDB_CITATION_PUBLICATION_SCHEMA)
                    cit_parameter_set = ExperimentParameterSet(
                        schema=Schema.objects.get(namespace=cit_ps_namespace),
                        experiment=pub)
                    cit_parameter_set.save()
                    add_if_missing(cit_parameter_set, 'title',
                                   string_value=citation['title'])
                    add_if_missing(cit_parameter_set, 'authors',
                                   string_value='; '.join(citation['authors']))
                    add_if_missing(cit_parameter_set, 'journal',
                                   string_value=citation['journal'])
                    add_if_missing(cit_parameter_set, 'volume',
                                   string_value=citation['volume'])
                    add_if_missing(cit_parameter_set, 'page-range',
                                   string_value='-'.join(
                                       [citation['page_first'],
                                        citation['page_last']]))
                    add_if_missing(cit_parameter_set, 'doi',
                                   string_value='http://dx.doi.org/' +
                                                citation['doi'])

                # 6. Remove the PDB embargo if set, since the update has
                # occurred and therefore the PDB must have been relased.
                try:
                    ExperimentParameter.objects.get(
                        name__name='pdb-embargo',
                        parameterset__schema__namespace=getattr(
                            settings,
                            'PUBLICATION_SCHEMA_ROOT',
                            default_settings.PUBLICATION_SCHEMA_ROOT)).delete()
                except ExperimentParameter.DoesNotExist:
                    pass

                # 7. Set the last update parameter to be now
                if pdb_last_update_parameter is None:
                    pub_parameter_set = ExperimentParameterSet(
                        schema=Schema.objects.get(namespace=PUB_SCHEMA),
                        experiment=pub)
                    pub_parameter_set.save()
                    pdb_last_update_parameter = ExperimentParameter(
                        name=last_update_parameter_name,
                        parameterset=pub_parameter_set,
                        datetime_value=timezone.now())
                else:
                    pdb_last_update_parameter.datetime_value = timezone.now()
                pdb_last_update_parameter.save()

            except CifFile.StarError:
                # PDB is either unavailable or invalid
                # (maybe notify the user somehow?)
                continue
示例#4
0
文件: tasks.py 项目: UWA-FoS/trudat
def populate_pdb_pub_records():
    PUB_SCHEMA = getattr(settings, 'PUBLICATION_SCHEMA_ROOT',
                         default_settings.PUBLICATION_SCHEMA_ROOT)
    PUB_SCHEMA_DRAFT = getattr(settings, 'PUBLICATION_DRAFT_SCHEMA',
                               default_settings.PUBLICATION_DRAFT_SCHEMA)
    PDB_SCHEMA = getattr(settings, 'PDB_PUBLICATION_SCHEMA_ROOT',
                         default_settings.PDB_PUBLICATION_SCHEMA_ROOT)
    publications = Experiment.objects \
        .filter(experimentparameterset__schema__namespace=PDB_SCHEMA) \
        .filter(experimentparameterset__schema__namespace=PUB_SCHEMA) \
        .exclude(experimentparameterset__schema__namespace=PUB_SCHEMA_DRAFT) \
        .distinct()

    last_update_parameter_name = ParameterName.objects.get(
        name='pdb-last-sync', schema__namespace=PUB_SCHEMA)

    def add_if_missing(parameterset,
                       name,
                       string_value=None,
                       numerical_value=None,
                       datetime_value=None):
        try:
            ExperimentParameter.objects.get(name__name=name,
                                            parameterset=parameterset)
        except ExperimentParameter.DoesNotExist:
            param_name = ParameterName.objects.get(name=name,
                                                   schema=parameterset.schema)
            param = ExperimentParameter(name=param_name,
                                        parameterset=parameterset)
            param.string_value = string_value
            param.numerical_value = numerical_value
            param.datetime_value = datetime_value
            param.save()

    for pub in publications:
        try:
            # try to get the last update time for the PDB data
            pdb_last_update_parameter = ExperimentParameter.objects.get(
                parameterset__schema__namespace=PUB_SCHEMA,
                name=last_update_parameter_name,
                parameterset__experiment=pub)
            last_update = pdb_last_update_parameter.datetime_value
            needs_update = last_update + \
                getattr(settings,
                        'PDB_REFRESH_INTERVAL',
                        default_settings.PDB_REFRESH_INTERVAL) \
                < timezone.now()

        except ExperimentParameter.DoesNotExist:
            # if the PDB last update time parameter doesn't exist,
            # we definitely need to update the data and create a last
            # update entry
            needs_update = True
            pdb_last_update_parameter = None

        # If an update needs to happen...
        if needs_update:
            # 1. get the PDB info
            pdb_parameter_set = ExperimentParameterSet.objects.get(
                schema__namespace=getattr(
                    settings, 'PDB_PUBLICATION_SCHEMA_ROOT',
                    default_settings.PDB_PUBLICATION_SCHEMA_ROOT),
                experiment=pub)
            pdb = ExperimentParameter.objects.get(
                name__name='pdb-id', parameterset=pdb_parameter_set)
            pdb_id = pdb.string_value
            # 1a. cosmetic change of case for PDB ID, if entered incorrectly
            if pdb_id != pdb_id.upper():
                pdb.string_value = pdb_id.upper()
                pdb.save()

            try:
                # 2. fetch the info from pdb.org
                pdb = PDBCifHelper(pdb_id)

                # 3. insert all standard pdb parameters
                add_if_missing(pdb_parameter_set,
                               'title',
                               string_value=pdb.get_pdb_title())
                add_if_missing(pdb_parameter_set,
                               'url',
                               string_value=pdb.get_pdb_url())
                try:
                    add_if_missing(pdb_parameter_set,
                                   'resolution',
                                   numerical_value=pdb.get_resolution())
                except ValueError:
                    logger.error('PDB field "resolution" could not be set for '
                                 'publication Id %i \n %s' %
                                 (pub.id, traceback.format_exc()))

                try:
                    add_if_missing(pdb_parameter_set,
                                   'r-value',
                                   numerical_value=pdb.get_obs_r_value())
                except ValueError:
                    logger.error('PDB field "r-value" could not be set for '
                                 'publication Id %i \n %s' %
                                 (pub.id, traceback.format_exc()))

                try:
                    add_if_missing(pdb_parameter_set,
                                   'r-free',
                                   numerical_value=pdb.get_free_r_value())
                except ValueError:
                    logger.error('PDB field "r-free" could not be set for '
                                 'publication Id %i \n %s' %
                                 (pub.id, traceback.format_exc()))

                add_if_missing(pdb_parameter_set,
                               'space-group',
                               string_value=pdb.get_spacegroup())
                add_if_missing(pdb_parameter_set,
                               'unit-cell',
                               string_value=pdb.get_unit_cell())

                # 4. insert sequence info (lazy checking)
                pdb_seq_parameter_sets = ExperimentParameterSet.objects.filter(
                    schema__namespace=getattr(
                        settings, 'PDB_SEQUENCE_PUBLICATION_SCHEMA',
                        default_settings.PDB_SEQUENCE_PUBLICATION_SCHEMA),
                    experiment=pub)
                if pdb_seq_parameter_sets.count() == 0:
                    # insert seqences
                    for seq in pdb.get_sequence_info():
                        seq_ps_namespace = getattr(
                            settings, 'PDB_SEQUENCE_PUBLICATION_SCHEMA',
                            default_settings.PDB_SEQUENCE_PUBLICATION_SCHEMA)
                        seq_parameter_set = ExperimentParameterSet(
                            schema=Schema.objects.get(
                                namespace=seq_ps_namespace),
                            experiment=pub)
                        seq_parameter_set.save()
                        add_if_missing(seq_parameter_set,
                                       'organism',
                                       string_value=seq['organism'])
                        add_if_missing(seq_parameter_set,
                                       'expression-system',
                                       string_value=seq['expression_system'])
                        add_if_missing(seq_parameter_set,
                                       'sequence',
                                       string_value=seq['sequence'])

                # 5. insert/update citation info (aggressive)
                ExperimentParameterSet.objects.filter(
                    schema__namespace=getattr(
                        settings, 'PDB_CITATION_PUBLICATION_SCHEMA',
                        default_settings.PDB_CITATION_PUBLICATION_SCHEMA),
                    experiment=pub).delete()
                for citation in pdb.get_citations():
                    cit_ps_namespace = getattr(
                        settings, 'PDB_CITATION_PUBLICATION_SCHEMA',
                        default_settings.PDB_CITATION_PUBLICATION_SCHEMA)
                    cit_parameter_set = ExperimentParameterSet(
                        schema=Schema.objects.get(namespace=cit_ps_namespace),
                        experiment=pub)
                    cit_parameter_set.save()
                    add_if_missing(cit_parameter_set,
                                   'title',
                                   string_value=citation['title'])
                    add_if_missing(cit_parameter_set,
                                   'authors',
                                   string_value='; '.join(citation['authors']))
                    add_if_missing(cit_parameter_set,
                                   'journal',
                                   string_value=citation['journal'])
                    add_if_missing(cit_parameter_set,
                                   'volume',
                                   string_value=citation['volume'])
                    add_if_missing(cit_parameter_set,
                                   'page-range',
                                   string_value='-'.join([
                                       citation['page_first'],
                                       citation['page_last']
                                   ]))
                    add_if_missing(cit_parameter_set,
                                   'doi',
                                   string_value='http://dx.doi.org/' +
                                   citation['doi'])

                # 6. Remove the PDB embargo if set, since the update has
                # occurred and therefore the PDB must have been relased.
                try:
                    ExperimentParameter.objects.get(
                        name__name='pdb-embargo',
                        parameterset__schema__namespace=getattr(
                            settings, 'PUBLICATION_SCHEMA_ROOT',
                            default_settings.PUBLICATION_SCHEMA_ROOT)).delete(
                            )
                except ExperimentParameter.DoesNotExist:
                    pass

                # 7. Set the last update parameter to be now
                if pdb_last_update_parameter is None:
                    pub_parameter_set = ExperimentParameterSet(
                        schema=Schema.objects.get(namespace=PUB_SCHEMA),
                        experiment=pub)
                    pub_parameter_set.save()
                    pdb_last_update_parameter = ExperimentParameter(
                        name=last_update_parameter_name,
                        parameterset=pub_parameter_set,
                        datetime_value=timezone.now())
                else:
                    pdb_last_update_parameter.datetime_value = timezone.now()
                pdb_last_update_parameter.save()

            except CifFile.StarError:
                # PDB is either unavailable or invalid
                # (maybe notify the user somehow?)
                continue