Пример #1
0
def fetch_pairwise(mapping):

    pairwise_alignments = []

    enst = mapping.transcript.enst_id
    uniprot_id = mapping.uniprot.uniprot_acc

    for alignment in mapping.alignments.all():
        if alignment.alignment_run.score1_type == 'identity':
            cigarplus = alignment.pairwise.cigarplus
            mdz = alignment.pairwise.mdz

            if mdz.startswith('MD:Z:'):
                mdz = mdz[len('MD:Z:'):]

            ens_release = alignment.alignment_run.ensembl_release

            ensp = ensembl_protein(enst, ens_release)
            seq = ensembl_sequence(ensp, ens_release)

            uniprot_seq, match_str, ensembl_seq = pairwise_alignment(
                seq, cigarplus, mdz)

            pairwise_alignments.append({
                'uniprot_alignment': uniprot_seq,
                'ensembl_alignment': ensembl_seq,
                'match_str': match_str,
                'alignment_id': alignment.alignment_id,
                'ensembl_release': ens_release,
                'ensembl_id': ensp,
                'uniprot_id': uniprot_id,
                'alignment_type': 'identity'
            })

            # Break out of the loop, we're done
            break

        elif alignment.alignment_run.score1_type == 'perfect_match' and alignment.score1 == 1:
            ens_release = alignment.alignment_run.ensembl_release

            ensp = ensembl_protein(enst, ens_release)
            seq = ensembl_sequence(ensp, ens_release)

            pairwise_alignments.append({
                'uniprot_alignment': seq,
                'ensembl_alignment': seq,
                'match_str': '|' * len(seq),
                'alignment_id': alignment.alignment_id,
                'ensembl_release': ens_release,
                'ensembl_id': ensp,
                'uniprot_id': uniprot_id,
                'alignment_type': 'perfect_match'
            })

    return {
        'mapping_id': mapping.mapping_id,
        'alignments': pairwise_alignments
    }
Пример #2
0
def _fetch_alignment(alignment, enst, uniprot_id):
    """
    Parameters
    ----------
    alignment
    enst       : str
    uniprot_id : str

    Returns
    -------
    pw_alignment : dict
        Alignment object
    """
    ens_release = alignment.alignment_run.ensembl_release

    ensp = ensembl_protein(enst, ens_release)
    seq = ensembl_sequence(ensp, ens_release)

    ensembl_seq = seq
    uniprot_seq = seq

    match_str = '|' * len(seq)
    alignment_type = 'perfect_match'

    if alignment.alignment_run.score1_type == 'identity':
        cigarplus = alignment.pairwise.cigarplus
        mdz = alignment.pairwise.mdz

        if mdz.startswith('MD:Z:'):
            mdz = mdz[len('MD:Z:'):]

        uniprot_seq, match_str, ensembl_seq = pairwise_alignment(seq, cigarplus, mdz)

        alignment_type = 'identity'

    pw_alignment = {
        'uniprot_alignment': ensembl_seq,
        'ensembl_alignment': uniprot_seq,
        'match_str': match_str,
        'alignment_id': alignment.alignment_id,
        'ensembl_release': ens_release,
        'ensembl_id': ensp,
        'uniprot_id': uniprot_id,
        'alignment_type': alignment_type
    }

    return pw_alignment
Пример #3
0
    def build_mapping(cls, mapping, fetch_sequence=False, authenticated=False):
        mapping_history = mapping.mapping_history.select_related(
            'release_mapping_history').select_related(
                'release_mapping_history__ensembl_species_history').latest(
                    'mapping_history_id')
        release_mapping_history = mapping_history.release_mapping_history
        ensembl_history = mapping_history.release_mapping_history.ensembl_species_history

        status = mapping.status.id

        sequence = None
        if fetch_sequence:
            try:
                sequence = ensembl_sequence(mapping.transcript.enst_id,
                                            ensembl_history.ensembl_release)
            except Exception as e:
                print(e)  # TODO: log
                sequence = None

        mapping_obj = {
            'mappingId': mapping.mapping_id,
            'timeMapped': release_mapping_history.time_mapped,
            'ensemblRelease': ensembl_history.ensembl_release,
            'uniprotRelease': release_mapping_history.uniprot_release,
            'uniprotEntry': {
                'uniprotAccession': mapping.uniprot.uniprot_acc,
                'entryType': Mapping.entry_type(mapping_history.entry_type_id),
                'sequenceVersion': mapping.uniprot.sequence_version,
                'upi': mapping.uniprot.upi,
                'md5': mapping.uniprot.md5,
                'isCanonical':
                True if mapping.uniprot.canonical_uniprot_id else False,
                'alias': mapping.uniprot.alias,
                'ensemblDerived': mapping.uniprot.ensembl_derived,
                'gene_symbol': mapping.uniprot.gene_symbol,
                'gene_accession': mapping.uniprot.gene_accession,
                'length': mapping.uniprot.length
            },
            'ensemblTranscript': {
                'enstId': mapping.transcript.enst_id,
                'enstVersion': mapping.transcript.enst_version,
                'upi': mapping.transcript.uniparc_accession,
                'biotype': mapping.transcript.biotype,
                'deleted': mapping.transcript.deleted,
                'chromosome': mapping.transcript.gene.chromosome,
                'seqRegionStart': mapping.transcript.seq_region_start,
                'seqRegionEnd': mapping.transcript.seq_region_end,
                'seqRegionStrand': mapping.transcript.gene.seq_region_strand,
                'ensgId': mapping.transcript.gene.ensg_id,
                'ensgName': mapping.transcript.gene.gene_name,
                'ensgSymbol': mapping.transcript.gene.gene_symbol,
                'ensgAccession': mapping.transcript.gene.gene_accession,
                'sequence': sequence,
                'enspId': mapping.transcript.ensp_id,
                'enspLen': mapping.transcript.ensp_len,
                'select': mapping.transcript.select
            },
            'alignment_difference': mapping.alignment_difference,
            'status': Mapping.status_type(status),
            'status_history': mapping.statuses(usernames=authenticated)
        }

        return mapping_obj
Пример #4
0
 def test_ensembl_sequence(self):
     seq = external.ensembl_sequence('ENST00000382038', 95)
     self.assertEqual(seq[0:7], 'GCTTGCC')
Пример #5
0
    def build_mapping(cls,
                      mapping_view,
                      fetch_sequence=False,
                      authenticated=False):
        status = mapping_view.status

        sequence = None
        if fetch_sequence:
            try:
                sequence = ensembl_sequence(mapping_view.enst_id,
                                            mapping_view.ensembl_release)
            except Exception as e:
                print(e)
                sequence = None

        mapping_obj = {
            'id': mapping_view.id,
            'mappingId': mapping_view.mapping_id,
            'groupingId': mapping_view.grouping_id,
            'timeMapped': mapping_view.time_mapped,
            'ensemblRelease': mapping_view.ensembl_release,
            'uniprotRelease': mapping_view.uniprot_release,
            'uniprotEntry': {
                'uniprot_id': mapping_view.uniprot_id,
                'uniprotAccession': mapping_view.uniprot_acc,
                'entryType':
                MappingView.entry_description(mapping_view.entry_type),
                'sequenceVersion': mapping_view.sequence_version,
                'upi': mapping_view.upi,
                'md5': mapping_view.md5,
                'isCanonical': not mapping_view.canonical_uniprot_id,
                'alias': mapping_view.alias,
                'ensemblDerived': mapping_view.ensembl_derived,
                'gene_symbol': mapping_view.gene_symbol_up,
                'gene_accession': mapping_view.chromosome_line,
                'length': mapping_view.length,
                'protein_existence_id': mapping_view.protein_existence_id
            },
            'ensemblTranscript': {
                'transcript_id': mapping_view.transcript_id,
                'enstId': mapping_view.enst_id,
                'enstVersion': mapping_view.enst_version,
                'upi': mapping_view.uniparc_accession,
                'biotype': mapping_view.biotype,
                'deleted': mapping_view.deleted,
                'chromosome': mapping_view.chromosome,
                'regionAccession': mapping_view.region_accession,
                'seqRegionStart': mapping_view.seq_region_start,
                'seqRegionEnd': mapping_view.seq_region_end,
                'seqRegionStrand': mapping_view.seq_region_strand,
                'ensgId': mapping_view.ensg_id,
                'ensgName': mapping_view.gene_name,
                'ensgSymbol': mapping_view.gene_symbol_eg,
                'ensgAccession': mapping_view.gene_accession,
                'ensgRegionAccession': mapping_view.region_accession,
                'sequence': sequence,
                'enspId': mapping_view.ensp_id,
                'enspLen': mapping_view.ensp_len,
                'source': mapping_view.source,
                'select': mapping_view.select
            },
            'alignment_difference': mapping_view.alignment_difference,
            'status': MappingView.status_description(status),
            'status_history': mapping_view.statuses(usernames=authenticated)
        }

        return mapping_obj