Exemplo n.º 1
0
    def create_facets(self, queryset):
        statuses = OrderedDict([('name', 'status'), ('label', 'Status'),
                                ('items', [])])

        organism = OrderedDict([('name', 'organism'), ('label', 'Organism'),
                                ('items', [])])

        sequence = OrderedDict([('name', 'divergence'),
                                ('label', 'Divergence'), ('items', [])])

        species_set = queryset.species()
        for species in species_set:
            organism["items"].append({"name": species[0], "label": species[1]})

        for query_status in queryset.statuses():
            statuses["items"].append({
                "name":
                Mapping.status_type(query_status),
                "label":
                Mapping.status_type(query_status).replace("_",
                                                          " ").capitalize()
            })

        differences = queryset.divergences()
        if differences[0]:
            sequence["items"].append({
                "label": "identical",
                "name": "identical",
                "count": differences[0]
            })

        if differences[1]:
            sequence["items"].append({
                "label": "small",
                "name": "small",
                "count": differences[1]
            })

        if differences[2]:
            sequence["items"].append({
                "label": "large",
                "name": "large",
                "count": differences[2]
            })

        if len(species_set) == 1:
            chromosomes = OrderedDict([('name', 'chromosomes'),
                                       ('label', 'Chromosomes'),
                                       ('items', [])])

            for chromosome in queryset.chromosomes():
                chromosomes["items"].append({
                    'name': chromosome.lower(),
                    'label': chromosome.upper()
                })

            return [statuses, organism, sequence, chromosomes]

        return [statuses, organism, sequence]
Exemplo n.º 2
0
def get_status(mapping):
    try:
        mapping_status = Mapping.status_type(mapping.status)
    except (IndexError, CvUeStatus.DoesNotExist):
        # TODO: should log this anomaly or do something else
        mapping_status = None

    return mapping_status
Exemplo n.º 3
0
    def create_facets(self, queryset):
        statuses = OrderedDict([('name', 'status'), ('label', 'Status'),
                                ('items', [])])
        organism = OrderedDict([('name', 'organism'), ('label', 'Organism'),
                                ('items', [])])
        sequence = OrderedDict([('name', 'sequence'), ('label', 'Sequence'),
                                ('items', [])])
        #######
        # TODO
        mappings = OrderedDict([('name', 'mappings'), ('label', 'Type'),
                                ('items', [])])
        #######

        for species in queryset.species():
            organism["items"].append({"name": species[0], "label": species[1]})
        for status in queryset.statuses():
            statuses["items"].append({
                "name":
                Mapping.status_type(status),
                "label":
                Mapping.status_type(status).replace("_", " ").capitalize()
            })

        differences = queryset.divergences()
        if differences[0]:
            sequence["items"].append({
                "label": "identical",
                "name": "identical",
                "count": differences[0]
            })
        if differences[1]:
            sequence["items"].append({
                "label": "small",
                "name": "small",
                "count": differences[1]
            })
        if differences[2]:
            sequence["items"].append({
                "label": "large",
                "name": "large",
                "count": differences[2]
            })

#        return [ statuses, organism, sequence, mappings ]
        return [statuses, organism, sequence]
Exemplo n.º 4
0
    def get(self, request):

        #
        # Mapping stats: general and Uniprot/Ensembl specific
        #
        mappings_count = Mapping.objects.count()  # tot mappings

        uniprot_mapped_count = Mapping.objects.values(
            'uniprot').distinct().count()  # tot mapped uniprot entries
        uniprot_not_mapped_sp_count = None  # tot non mapped Swiss-Prot entries, NOTE: NO WAY TO GET IT AT THE MOMENT

        all_entry_types = dict((entry.id, entry.description)
                               for entry in CvEntryType.objects.all())
        sp_entry_type_ids = [
            k for (k, v) in all_entry_types.items()
            if v.lower().startswith('swiss')
        ]
        nonsp_entry_type_ids = list(
            set(all_entry_types.keys()).difference(sp_entry_type_ids))

        #
        # NOTE:
        #  This is not requested but it's computation is reported (commented) here for completeness
        uniprot_mapped_sp_count = 0  # tot mapped Swiss-Prot entries, NOTE: NOT REQUESTED BUT HERE FOR COMPLETENESS
        # if sp_entry_type_ids:
        #     query_filter = Q(mapping_history__entry_type=sp_entry_type_ids[0])
        #
        #     for i in range(1, len(sp_entry_type_ids)):
        #         query_filter = query_filter | Q(mapping_history__entry_type=sp_entry_type_ids[i])
        #
        #     uniprot_mapped_sp_count = Mapping.objects.filter(query_filter).values('uniprot').distinct().count()
        #
        # tot non mapped genes which none of its transcripts match to any SwissProt entry
        gene_not_mapped_sp_count = 0
        #
        # NOTE:
        #   Here we're counting genes not mapped to Swiss-Prot entries among the mapped genes.
        #   As discussed with UniProt, they'd prefer counting among the non mapped genes, which
        #   in this case coincide with counting the non-mapped genes in general, as done below.
        #
        # if nonsp_entry_type_ids:
        #     query_filter = Q(mapping_history__entry_type=nonsp_entry_type_ids[0])
        #
        #     for i in range(1, len(sp_entry_type_ids)):
        #         query_filter = query_filter | Q(mapping_history__entry_type=nonsp_entry_type_ids[i])
        #
        #     gene_not_mapped_sp_count = Mapping.objects.filter(query_filter).values('transcript__gene').distinct().count()

        gene_ids = set(gene.gene_id for gene in EnsemblGene.objects.all())
        gene_mapped_ids = set(
            item['transcript__gene']
            for item in Mapping.objects.values('transcript__gene').distinct())
        gene_mapped_count = len(gene_mapped_ids)  # tot mapped Ensembl genes
        gene_not_mapped_sp_count = len(gene_ids.difference(gene_mapped_ids))

        transcript_mapped_count = Mapping.objects.values(
            'transcript').distinct().count()  # tot mapped Ensembl transcripts

        #
        # Stats relative to mapping labels
        #
        all_labels = CvUeLabel.objects.all()

        label_counts = []

        for label in all_labels:
            count = UeMappingLabel.objects.filter(label=label).count()
            label_counts.append({'label': label.description, 'count': count})

        #
        # Stats for mapping status
        #
        status_counts = []
        status_totals = Mapping.objects.values('status').annotate(
            total=Count('status'))
        for status_count in status_totals:
            status_counts.append({
                'status':
                Mapping.status_type(status_count['status']),
                'count':
                status_count['total']
            })

        serializer = MappingStatsSerializer({
            'mapping': {
                'total': mappings_count,
                'uniprot': {
                    'mapped': uniprot_mapped_count,
                    'not_mapped_sp': uniprot_not_mapped_sp_count
                },
                'ensembl': {
                    'gene_mapped': gene_mapped_count,
                    'gene_not_mapped_sp': gene_not_mapped_sp_count,
                    'transcript_mapped': transcript_mapped_count
                }
            },
            'status': status_counts,
            'label': label_counts
        })

        return Response(serializer.data)
Exemplo n.º 5
0
    def build_mapping(cls, mapping, fetch_sequence=False, authenticated=False):
        mapping_history = mapping.mapping_history.select_related(
            'release_mapping_history').select_related(
                'release_mapping_history__ensembl_species_history').latest(
                    'mapping_history_id')
        release_mapping_history = mapping_history.release_mapping_history
        ensembl_history = mapping_history.release_mapping_history.ensembl_species_history

        status = mapping.status.id

        sequence = None
        if fetch_sequence:
            try:
                sequence = ensembl_sequence(mapping.transcript.enst_id,
                                            ensembl_history.ensembl_release)
            except Exception as e:
                print(e)  # TODO: log
                sequence = None

        mapping_obj = {
            'mappingId': mapping.mapping_id,
            'timeMapped': release_mapping_history.time_mapped,
            'ensemblRelease': ensembl_history.ensembl_release,
            'uniprotRelease': release_mapping_history.uniprot_release,
            'uniprotEntry': {
                'uniprotAccession': mapping.uniprot.uniprot_acc,
                'entryType': Mapping.entry_type(mapping_history.entry_type_id),
                'sequenceVersion': mapping.uniprot.sequence_version,
                'upi': mapping.uniprot.upi,
                'md5': mapping.uniprot.md5,
                'isCanonical':
                True if mapping.uniprot.canonical_uniprot_id else False,
                'alias': mapping.uniprot.alias,
                'ensemblDerived': mapping.uniprot.ensembl_derived,
                'gene_symbol': mapping.uniprot.gene_symbol,
                'gene_accession': mapping.uniprot.gene_accession,
                'length': mapping.uniprot.length
            },
            'ensemblTranscript': {
                'enstId': mapping.transcript.enst_id,
                'enstVersion': mapping.transcript.enst_version,
                'upi': mapping.transcript.uniparc_accession,
                'biotype': mapping.transcript.biotype,
                'deleted': mapping.transcript.deleted,
                'chromosome': mapping.transcript.gene.chromosome,
                'seqRegionStart': mapping.transcript.seq_region_start,
                'seqRegionEnd': mapping.transcript.seq_region_end,
                'seqRegionStrand': mapping.transcript.gene.seq_region_strand,
                'ensgId': mapping.transcript.gene.ensg_id,
                'ensgName': mapping.transcript.gene.gene_name,
                'ensgSymbol': mapping.transcript.gene.gene_symbol,
                'ensgAccession': mapping.transcript.gene.gene_accession,
                'sequence': sequence,
                'enspId': mapping.transcript.ensp_id,
                'enspLen': mapping.transcript.ensp_len,
                'select': mapping.transcript.select
            },
            'alignment_difference': mapping.alignment_difference,
            'status': Mapping.status_type(status),
            'status_history': mapping.statuses(usernames=authenticated)
        }

        return mapping_obj
Exemplo n.º 6
0
def build_related_unmapped_entries_data(mapping):
    """
    Return the list of unmapped entries releated to the mapping (via grouping_id)
    """

    # related unmapped entries share the same grouping_id and tax id
    mapping_mh = mapping.mapping_history.latest(
        'release_mapping_history__time_mapped')
    mapping_mh_rmh = mapping_mh.release_mapping_history
    mapping_grouping_id = mapping_mh.grouping_id

    related_unmapped_ue_histories = UniprotEntryHistory.objects.filter(
        release_version=mapping_mh_rmh.uniprot_release,
        grouping_id=mapping_grouping_id)

    related_unmapped_ue_entries = []
    for ueh in related_unmapped_ue_histories:
        up_entry = ueh.uniprot
        related_unmapped_ue_entries.append({
            'uniprot_id':
            up_entry.uniprot_id,
            'uniprotAccession':
            up_entry.uniprot_acc,
            'entryType':
            Mapping.entry_type(up_entry.entry_type_id),
            'sequenceVersion':
            up_entry.sequence_version,
            'upi':
            up_entry.upi,
            'md5':
            up_entry.md5,
            'isCanonical':
            not up_entry.canonical_uniprot_id,
            'alias':
            up_entry.alias,
            'ensemblDerived':
            up_entry.ensembl_derived,
            'gene_symbol':
            up_entry.gene_symbol,
            'gene_accession':
            up_entry.chromosome_line,
            'length':
            up_entry.length,
            'protein_existence_id':
            up_entry.protein_existence_id
        })

    related_unmapped_transcript_histories = TranscriptHistory.objects.filter(
        ensembl_species_history=mapping_mh_rmh.ensembl_species_history,
        grouping_id=mapping_grouping_id)

    related_unmapped_transcripts = []
    for t_hist in related_unmapped_transcript_histories:
        transcript = t_hist.transcript
        related_unmapped_transcripts.append({
            'transcript_id':
            transcript.transcript_id,
            'enstId':
            transcript.enst_id,
            'enstVersion':
            transcript.enst_version,
            'upi':
            transcript.uniparc_accession,
            'biotype':
            transcript.biotype,
            'deleted':
            transcript.deleted,
            'chromosome':
            transcript.gene.chromosome,
            'regionAccession':
            transcript.gene.region_accession,
            'seqRegionStart':
            transcript.seq_region_start,
            'seqRegionEnd':
            transcript.seq_region_end,
            'seqRegionStrand':
            transcript.gene.seq_region_strand,
            'ensgId':
            transcript.gene.ensg_id,
            'ensgName':
            transcript.gene.gene_name,
            'ensgSymbol':
            transcript.gene.gene_symbol,
            'ensgAccession':
            transcript.gene.gene_accession,
            'ensgRegionAccession':
            transcript.gene.region_accession,
            'sequence':
            None,
            'enspId':
            transcript.ensp_id,
            'enspLen':
            transcript.ensp_len,
            'source':
            transcript.source,
            'select':
            transcript.select
        })

    return {
        'ensembl': related_unmapped_transcripts,
        'uniprot': related_unmapped_ue_entries
    }