Пример #1
0
    def get(self, request, slug=None, segments=None):
        if slug is not None:
            ps = Protein.objects.filter(sequence_type__slug='wt', source__id=1, family__slug__startswith=slug)
            
            if segments is not None:
                segment_list = segments.split(",")
                ss = ProteinSegment.objects.filter(slug__in=segment_list, partial=False)
            else:
                ss = ProteinSegment.objects.filter(partial=False)

            # create an alignment object
            a = Alignment()
            a.show_padding = False

            # load data from selection into the alignment
            a.load_proteins(ps)
            a.load_segments(ss)

            # build the alignment data matrix
            a.build_alignment()
            
            # render the fasta template as string
            response = render_to_string('alignment/alignment_fasta.html', {'a': a}).split("\n")

            # convert the list to a dict
            ali_dict = {}
            for row in response:
                if row.startswith(">"):
                    k = row[1:]
                else:
                    ali_dict[k] = row
                    k = False

            return Response(ali_dict)
Пример #2
0
    def get(self, request, proteins=None, segments=None):
        if proteins is not None:
            protein_list = proteins.split(",")
            ps = Protein.objects.filter(sequence_type__slug='wt',
                                        entry_name__in=protein_list)

            if segments is not None:
                segment_list = segments.split(",")
                ss = ProteinSegment.objects.filter(slug__in=segment_list,
                                                   partial=False)
            else:
                ss = ProteinSegment.objects.filter(partial=False)

            # create an alignment object
            a = Alignment()
            a.show_padding = False

            # load data from selection into the alignment
            a.load_proteins(ps)
            a.load_segments(ss)

            # build the alignment data matrix
            a.build_alignment()

            # render the fasta template as string
            response = render_to_string('alignment/alignment_fasta.html', {
                'a': a
            }).split("\n")

            # convert the list to a dict
            ali_dict = {}
            k = False
            for row in response:
                if row.startswith(">"):
                    k = row[1:]
                elif k:
                    ali_dict[k] = row
                    k = False

            return Response(ali_dict)
Пример #3
0
    def get(self, request, proteins=None, segments=None):
        if proteins is not None:
            protein_list = proteins.split(",")
            ps = Protein.objects.filter( entry_name__in=protein_list)
            
            if segments is not None:
                segment_list = segments.split(",")
                ss = ProteinSegment.objects.filter(slug__in=segment_list, partial=False)
            else:
                ss = ProteinSegment.objects.filter(partial=False)

            # create an alignment object
            a = Alignment()
            a.show_padding = False

            # load data from selection into the alignment
            a.load_proteins(ps)
            a.load_segments(ss)

            # build the alignment data matrix
            a.build_alignment()
            
            # render the fasta template as string
            #response = render_to_string('alignment/alignment_fasta.html', {'a': a}).split("\n")

            # convert the list to a dict
            ali_dict = {}
            k = False
            num_of_sequences = len(a.proteins)
            num_residue_columns = len(a.positions) + len(a.segments)
            renderer_classes = (StaticHTMLRenderer,)
            response = render(request, 'alignment/alignment_ws.html', {'a': a, 'num_of_sequences': num_of_sequences,'num_residue_columns': num_residue_columns})
            response['X-Frame-Options'] = "ALLOWALL"
           # for row in response:
           #     if row.startswith(">"):
           #         k = row[1:]
           #     elif k:
           #         ali_dict[k] = row
           #         k = False
            #return Response(ali_dict)
            return response
Пример #4
0
    def get(self, request, proteins=None, segments=None, statistics=False):
        if proteins is not None:
            protein_list = proteins.split(",")
            ps = Protein.objects.filter(sequence_type__slug='wt',
                                        entry_name__in=protein_list)

            # take the numbering scheme from the first protein
            s_slug = Protein.objects.get(
                entry_name=protein_list[0]).residue_numbering_scheme_id

            gen_list = []
            segment_list = []
            if segments is not None:
                input_list = segments.split(",")
                # fetch a list of all segments
                protein_segments = ProteinSegment.objects.filter(
                    partial=False).values_list('slug', flat=True)
                for s in input_list:
                    # add to segment list
                    if s in protein_segments:
                        segment_list.append(s)
                    # get generic numbering object for generic positions
                    else:
                        gen_object = ResidueGenericNumberEquivalent.objects.get(
                            label=s, scheme__id=s_slug)
                        gen_object.properties = {}
                        gen_list.append(gen_object)

                # fetch all complete protein_segments
                ss = ProteinSegment.objects.filter(slug__in=segment_list,
                                                   partial=False)

            else:
                ss = ProteinSegment.objects.filter(partial=False)

            # create an alignment object
            a = Alignment()
            a.show_padding = False

            # load data from selection into the alignment
            a.load_proteins(ps)

            # load generic numbers and TMs seperately
            if gen_list:
                a.load_segments(gen_list)
            a.load_segments(ss)

            # build the alignment data matrix
            a.build_alignment()

            # calculate statistics
            if statistics == True:
                a.calculate_statistics()

            # render the fasta template as string
            response = render_to_string('alignment/alignment_fasta.html', {
                'a': a
            }).split("\n")

            # convert the list to a dict
            ali_dict = {}
            k = False
            for row in response:
                if row.startswith(">"):
                    k = row[1:]
                elif k:
                    ali_dict[k] = row
                    k = False

            # render statistics for output
            if statistics == True:
                feat = {}
                for i, feature in enumerate(AMINO_ACID_GROUPS):
                    feature_stats = a.feature_stats[i]
                    feature_stats_clean = []
                    for d in feature_stats:
                        sub_list = [x[0] for x in d]
                        feature_stats_clean.append(
                            sub_list)  # remove feature frequencies
                    # print(feature_stats_clean)
                    feat[feature] = [
                        item for sublist in feature_stats_clean
                        for item in sublist
                    ]

                for i, AA in enumerate(AMINO_ACIDS):
                    feature_stats = a.amino_acid_stats[i]
                    feature_stats_clean = []
                    for d in feature_stats:
                        sub_list = [x[0] for x in d]
                        feature_stats_clean.append(
                            sub_list)  # remove feature frequencies
                    # print(feature_stats_clean)
                    feat[AA] = [
                        item for sublist in feature_stats_clean
                        for item in sublist
                    ]

                ali_dict["statistics"] = feat

            return Response(ali_dict)
Пример #5
0
    def get(self, request, entry_name=None, segments=None):
        if entry_name is not None:
            ref = Protein.objects.get(sequence_type__slug='wt',
                                      entry_name=entry_name)

            structures = Structure.objects.order_by(
                'protein_conformation__protein__parent', 'state',
                'resolution').distinct('protein_conformation__protein__parent',
                                       'state')

            ps = []
            for structure in structures:
                ps.append(structure.protein_conformation.protein.parent)

            if segments is not None:
                input_list = segments.split(",")
                ss = ProteinSegment.objects.filter(slug__in=input_list,
                                                   partial=False)
            else:
                ss = ProteinSegment.objects.filter(partial=False,
                                                   category='helix')

            # create an alignment object
            a = Alignment()
            a.show_padding = False

            # load data from selection into the alignment
            a.load_reference_protein(ref)
            a.load_proteins(ps)
            a.load_segments(ss)

            # build the alignment data matrix
            a.build_alignment()

            # calculate identity and similarity of each row compared to the reference
            a.calculate_similarity()

            # return the entry_name of the closest template
            return Response(a.proteins[1].protein.entry_name)
Пример #6
0
    def find_segment_template(self, pconf, sconfs, segments):
        a = Alignment()
        a.load_reference_protein(pconf.protein)
        a.load_proteins(sconfs)
        a.load_segments(segments)
        a.build_alignment()
        a.calculate_similarity()

        return a.proteins[1]
Пример #7
0
    def get(self, request, proteins=None, segments=None):
        if proteins is not None:
            protein_list = proteins.split(",")
            # first in API should be reference
            ps = Protein.objects.filter(sequence_type__slug='wt',
                                        entry_name__in=protein_list[1:])
            reference = Protein.objects.filter(
                sequence_type__slug='wt', entry_name__in=[protein_list[0]])

            # take the numbering scheme from the first protein
            s_slug = Protein.objects.get(
                entry_name=protein_list[0]).residue_numbering_scheme_id

            if segments is not None:
                input_list = segments.split(",")
                # fetch a list of all segments
                protein_segments = ProteinSegment.objects.filter(
                    partial=False).values_list('slug', flat=True)
                gen_list = []
                segment_list = []
                for s in input_list:
                    # add to segment list
                    if s in protein_segments:
                        segment_list.append(s)
                    # get generic numbering object for generic positions
                    else:
                        # make sure the query works for all positions
                        gen_object = ResidueGenericNumberEquivalent.objects.get(
                            label=s, scheme__id=s_slug)
                        gen_object.properties = {}
                        gen_list.append(gen_object)

                # fetch all complete protein_segments
                ss = ProteinSegment.objects.filter(slug__in=segment_list,
                                                   partial=False)

            # create an alignment object
            a = Alignment()
            a.show_padding = False

            # load data from API into the alignment
            a.load_reference_protein(reference)
            a.load_proteins(ps)

            # load generic numbers and TMs seperately
            a.load_segments(gen_list)
            a.load_segments(ss)

            # build the alignment data matrix
            a.build_alignment()

            # calculate identity and similarity of each row compared to the reference
            a.calculate_similarity()

            # render the fasta template as string
            response = render_to_string('alignment/alignment_fasta.html', {
                'a': a
            }).split("\n")

            # convert the list to a dict
            ali_dict = {}
            k = False
            num = 0
            for i, row in enumerate(response):
                if row.startswith(">"):
                    k = row[1:]
                elif k:
                    # add the query as 100 identical/similar to the beginning (like on the website)
                    if num == 0:
                        a.proteins[num].identity = 100
                        a.proteins[num].similarity = 100
                    # order dict after custom list
                    keyorder = ["similarity", "identity", "AA"]
                    ali_dict[k] = {
                        "AA":
                        row,
                        "identity":
                        int(str(a.proteins[num].identity).replace(" ", "")),
                        "similarity":
                        int(str(a.proteins[num].similarity).replace(" ", ""))
                    }
                    ali_dict[k] = OrderedDict(
                        sorted(ali_dict[k].items(),
                               key=lambda t: keyorder.index(t[0])))
                    num += 1
                    k = False
            ali_dict_ordered = OrderedDict(
                sorted(ali_dict.items(),
                       key=lambda x: x[1]['similarity'],
                       reverse=True))
            return Response(ali_dict_ordered)
Пример #8
0
    def get(self, request, entry_name=None, segments=None):
        if entry_name is not None:
            ref = Protein.objects.get(sequence_type__slug='wt', entry_name=entry_name)

            structures =  Structure.objects.order_by('protein_conformation__protein__parent', 'state',
                'resolution').distinct('protein_conformation__protein__parent', 'state')

            ps = []
            for structure in structures:
                ps.append(structure.protein_conformation.protein.parent)
            
            if segments is not None:
                input_list = segments.split(",")
                ss = ProteinSegment.objects.filter(slug__in=input_list, partial=False)
            else:
                ss = ProteinSegment.objects.filter(partial=False, category='helix')

            # create an alignment object
            a = Alignment()
            a.show_padding = False

            # load data from selection into the alignment
            a.load_reference_protein(ref)
            a.load_proteins(ps)
            a.load_segments(ss)

            # build the alignment data matrix
            a.build_alignment()

            # calculate identity and similarity of each row compared to the reference
            a.calculate_similarity()
            
            # return the entry_name of the closest template
            return Response(a.proteins[1].protein.entry_name)
Пример #9
0
def get_segment_template (protein, segments=['TM1', 'TM2', 'TM3', 'TM4','TM5','TM6', 'TM7'], state=None):

    a = Alignment()
    a.load_reference_protein(protein)
    #You are so gonna love it...
    if state:
        a.load_proteins([x.protein_conformation.protein.parent for x in list(Structure.objects.order_by('protein_conformation__protein__parent','resolution').exclude(protein_conformation__protein=protein.id, protein_conformation__state=state))])
    else:
        a.load_proteins([x.protein_conformation.protein.parent for x in list(Structure.objects.order_by('protein_conformation__protein__parent','resolution').exclude(protein_conformation__protein=protein.id))])
    a.load_segments(ProteinSegment.objects.filter(slug__in=segments))
    a.build_alignment()
    a.calculate_similarity()

    return a.proteins[1]
Пример #10
0
    def receptor_mammal_representatives(self):
        # print('Script to label structures if they are mammal, and which are the closest structure to human')

        structures = Structure.objects.filter(refined=False).prefetch_related(
            "pdb_code", "state",
            "protein_conformation__protein__parent__family",
            "protein_conformation__protein__species")

        distinct_proteins = {}
        is_mammal = {}

        ## Go through all structures and deduce if mammal and prepare receptor/state sets to find most "human"
        for s in structures:
            pdb = s.pdb_code.index
            state = s.state.slug
            slug = s.protein_conformation.protein.parent.family.slug
            name = s.protein_conformation.protein.parent.family.name
            species = s.protein_conformation.protein.species.common_name
            protein = s.protein_conformation.protein.parent

            if species not in is_mammal:
                mammal = self.check_uniprot_if_mammal(protein)
                is_mammal[species] = mammal
            else:
                mammal = is_mammal[species]

            # print(species, mammal)
            s.mammal = mammal
            s.save()

            key = '{}-{}'.format(slug, state)

            if key not in distinct_proteins:
                distinct_proteins[key] = []

            distinct_proteins[key].append([pdb, species, protein, s])

        print("DEBUG", is_mammal)

        for conformation, pdbs in distinct_proteins.items():
            p_slug, state = conformation.split("-")
            number_of_pdbs = len(pdbs)
            distinct_species = set(list(x[1] for x in pdbs))
            distinct_proteins = set(list(x[2] for x in pdbs))

            if 'Human' in distinct_species:
                # Human always best..
                best_species = 'Human'
            elif len(distinct_species) == 1:
                # If only one type.. then it most be the best match
                best_species = list(distinct_species)[0]
            else:
                # There are more than 1 species, and human is not in it.. do similarity
                a = Alignment()
                ref_p = Protein.objects.get(family__slug=p_slug,
                                            species__common_name='Human',
                                            sequence_type__slug='wt')
                a.load_reference_protein(
                    Protein.objects.get(family__slug=p_slug,
                                        species__common_name='Human',
                                        sequence_type__slug='wt'))
                a.load_proteins(distinct_proteins)
                a.load_segments(
                    ProteinSegment.objects.filter(slug__in=[
                        'TM1', 'TM2', 'TM3', 'TM4', 'TM5', 'TM6', 'TM7'
                    ]))
                a.build_alignment()
                a.calculate_similarity()
                best_species = a.proteins[1].protein.species.common_name

            ## Now that we know which species to label as most "human" go through structures and label
            for pdb, species, protein, structure in pdbs:
                most_human = False
                if species == best_species:
                    most_human = True
                structure.closest_to_human = most_human
                structure.save()
Пример #11
0
    def get(self, request, proteins=None, segments=None, statistics=False):
        if proteins is not None:
            protein_list = proteins.split(",")
            ps = Protein.objects.filter(sequence_type__slug='wt', entry_name__in=protein_list)

            # take the numbering scheme from the first protein
            s_slug = Protein.objects.get(entry_name=protein_list[0]).residue_numbering_scheme_id

            gen_list = []
            segment_list = []
            if segments is not None:
                input_list = segments.split(",")
                # fetch a list of all segments
                protein_segments = ProteinSegment.objects.filter(partial=False).values_list('slug', flat=True) 
                for s in input_list:
                    # add to segment list
                    if s in protein_segments:
                        segment_list.append(s)
                    # get generic numbering object for generic positions
                    else:
                        gen_object = ResidueGenericNumberEquivalent.objects.get(label=s, scheme__id=s_slug)
                        gen_object.properties = {}
                        gen_list.append(gen_object)                        
                
                # fetch all complete protein_segments
                ss = ProteinSegment.objects.filter(slug__in=segment_list, partial=False)

            else:
                ss = ProteinSegment.objects.filter(partial=False)

            # create an alignment object
            a = Alignment()
            a.show_padding = False

            # load data from selection into the alignment
            a.load_proteins(ps)

            # load generic numbers and TMs seperately
            if gen_list:
                a.load_segments(gen_list)
            a.load_segments(ss)

            # build the alignment data matrix
            a.build_alignment()

            # calculate statistics
            if statistics == True:
                a.calculate_statistics()
            
            # render the fasta template as string
            response = render_to_string('alignment/alignment_fasta.html', {'a': a}).split("\n")

            # convert the list to a dict
            ali_dict = {}
            k = False
            for row in response:
                if row.startswith(">"):
                    k = row[1:]
                elif k:
                    ali_dict[k] = row
                    k = False
            
            # render statistics for output
            if statistics == True:
                feat = {}
                for i, feature in enumerate(AMINO_ACID_GROUPS):
                    feature_stats = a.feature_stats[i]
                    feature_stats_clean = []
                    for d in feature_stats:
                        sub_list = [x[0] for x in d]
                        feature_stats_clean.append(sub_list) # remove feature frequencies
                    # print(feature_stats_clean)
                    feat[feature] = [item for sublist in feature_stats_clean for item in sublist]

                for i, AA in enumerate(AMINO_ACIDS):
                    feature_stats = a.amino_acid_stats[i]
                    feature_stats_clean = []
                    for d in feature_stats:
                        sub_list = [x[0] for x in d]
                        feature_stats_clean.append(sub_list) # remove feature frequencies
                    # print(feature_stats_clean)
                    feat[AA] = [item for sublist in feature_stats_clean for item in sublist]

                ali_dict["statistics"] = feat

            return Response(ali_dict)
Пример #12
0
    def find_segment_template(self, pconf, sconfs, segments):
            a = Alignment()
            a.load_reference_protein(pconf.protein)
            a.load_proteins(sconfs)
            a.load_segments(segments)
            a.build_alignment()
            a.calculate_similarity()

            return a.proteins[1]
Пример #13
0
def get_segment_template (protein, segments=['TM1', 'TM2', 'TM3', 'TM4','TM5','TM6', 'TM7'], state=None):

    a = Alignment()
    a.load_reference_protein(protein)
    #You are so gonna love it...
    if state:
        a.load_proteins([x.protein_conformation.protein.parent for x in list(Structure.objects.order_by('protein_conformation__protein__parent','resolution').exclude(protein_conformation__protein=protein.id, protein_conformation__state=state))])
    else:
        a.load_proteins([x.protein_conformation.protein.parent for x in list(Structure.objects.order_by('protein_conformation__protein__parent','resolution').exclude(protein_conformation__protein=protein.id))])
    a.load_segments(ProteinSegment.objects.filter(slug__in=segments))
    a.build_alignment()
    a.calculate_similarity()

    return a.proteins[1]
Пример #14
0
    def main_func(self, positions, iteration,count,lock):
        # families
        # if not positions[1]:
        #     families = self.families[positions[0]:]
        # else:
        #     families = self.families[positions[0]:positions[1]]
        if self.signprot:
            signprot_fam = ProteinFamily.objects.get(name=self.signprot)
            families = ProteinFamily.objects.filter(slug__startswith=signprot_fam.slug+'_').all() # The '_' at the end is needed to skip the Alpha and Arrestin consensus sequences
            self.segments = ProteinSegment.objects.filter(partial=False, proteinfamily=self.signprot)
        else:
            families = self.families

        if self.input_slug:
            families = ProteinFamily.objects.filter(slug__startswith=self.input_slug)
        
        while count.value<len(families):
            with lock:
                family = families[count.value]
                count.value +=1 
        # for family in families:
            # get proteins in this family
            proteins = Protein.objects.filter(family__slug__startswith=family.slug, sequence_type__slug='wt',
                species__common_name="Human").prefetch_related('species', 'residue_numbering_scheme')

            # if family does not have human equivalents, like Class D1
            if len(proteins)==0:
                proteins = Protein.objects.filter(family__slug__startswith=family.slug, sequence_type__slug='wt',).prefetch_related('species', 'residue_numbering_scheme')

            if proteins.count() <= 1:
                continue
            self.logger.info('Building alignment for {}'.format(family))
            # create alignment
            a = Alignment()
            a.load_proteins(proteins)
            a.load_segments(self.segments)
            a.build_alignment()
            a.calculate_statistics()

            try:
                # Save alignment
                AlignmentConsensus.objects.create(slug=family.slug, alignment=pickle.dumps(a))

                # Load alignment to ensure it works
                a = pickle.loads(AlignmentConsensus.objects.get(slug=family.slug).alignment)
                self.logger.info('Succesfully pickled {}'.format(family))
            except:
                self.logger.error('Failed pickle for {}'.format(family))

            self.logger.info('Completed building alignment for {}'.format(family))

            # get (forced) consensus sequence from alignment object
            family_consensus = str()
            for segment, s in a.forced_consensus.items():
                for gn, aa in s.items():
                    family_consensus += aa

            # create sequence type 'consensus'
            sequence_type, created = ProteinSequenceType.objects.get_or_create(slug='consensus',
                defaults={'name': 'Consensus',})
            if created:
                self.logger.info('Created protein sequence type {}'.format(sequence_type.name))

            # create a protein record
            consensus_name = family.name + " consensus"
            residue_numbering_scheme = proteins[0].residue_numbering_scheme
            up = dict()
            up['entry_name'] = slugify(consensus_name)
            if Protein.objects.filter(entry_name=up['entry_name']).exists():
                up['entry_name'] += "-" + family.slug.split('_')[0]
            up['source'] = "OTHER"
            up['species_latin_name'] = proteins[0].species.latin_name
            up['species_common_name'] = proteins[0].species.common_name
            up['sequence'] = family_consensus

            up['names'] = up['genes'] = []
            self.create_protein(consensus_name, family, sequence_type, residue_numbering_scheme, False, up)

            # get protein anomalies in family
            all_constrictions = []
            constriction_freq = dict()
            consensus_pas = dict() # a constriction has to be in all sequences to be included in the consensus
            pcs = ProteinConformation.objects.filter(protein__in=proteins,
                state__slug=settings.DEFAULT_PROTEIN_STATE).prefetch_related('protein_anomalies')
            for pc in pcs:
                pas = pc.protein_anomalies.all().prefetch_related('generic_number__protein_segment', 'anomaly_type')
                for pa in pas:
                    pa_label = pa.generic_number.label
                    pa_type = pa.anomaly_type.slug
                    pa_segment_slug = pa.generic_number.protein_segment.slug
                    
                    # bulges are directly added to the consensus list
                    if pa_type == 'bulge':
                        if pa_segment_slug not in consensus_pas:
                            consensus_pas[pa_segment_slug] = []
                        if pa not in consensus_pas[pa_segment_slug]:
                            consensus_pas[pa_segment_slug].append(pa)

                    # a constriction's frequency is counted
                    else:
                        if pa not in all_constrictions:
                            all_constrictions.append(pa)
                        if pa_label in constriction_freq:
                            constriction_freq[pa_label] += 1
                        else:
                            constriction_freq[pa_label] = 1
            
            # go through constrictions to see which ones should be included in the consensus
            for pa in all_constrictions:
                pa_label = pa.generic_number.label
                pa_segment_slug = pa.generic_number.protein_segment.slug
                freq = constriction_freq[pa_label]

                # is the constriction in all sequences?
                if freq == len(all_constrictions):
                    if pa_segment_slug not in consensus_pas:
                        consensus_pas[pa_segment_slug] = []
                    consensus_pas[pa_segment_slug].append(pa)

            # create residues
            pc = ProteinConformation.objects.get(protein__entry_name=up['entry_name'],
                state__slug=settings.DEFAULT_PROTEIN_STATE)
            segment_info = self.get_segment_residue_information(a.forced_consensus)
            ref_positions, segment_starts, segment_aligned_starts, segment_ends, segment_aligned_ends = segment_info
            for segment_slug, s in a.forced_consensus.items():
                if self.signprot:
                    segment = ProteinSegment.objects.get(slug=segment_slug, proteinfamily=self.signprot)
                else:
                    segment = ProteinSegment.objects.get(slug=segment_slug)
                if segment_slug in consensus_pas:
                    protein_anomalies = consensus_pas[segment_slug]
                else:
                    protein_anomalies = []
                if segment_slug in segment_starts:
                    if self.signprot:
                        create_or_update_residues_in_segment(pc, segment, segment_starts[segment_slug],
                            segment_aligned_starts[segment_slug], segment_ends[segment_slug],
                            segment_aligned_ends[segment_slug], self.schemes, ref_positions, protein_anomalies, True, self.signprot)
                    else:
                        create_or_update_residues_in_segment(pc, segment, segment_starts[segment_slug],
                            segment_aligned_starts[segment_slug], segment_ends[segment_slug],
                            segment_aligned_ends[segment_slug], self.schemes, ref_positions, protein_anomalies, True)
Пример #15
0
    def get(self,
            request,
            slug=None,
            segments=None,
            latin_name=None,
            statistics=False):
        if slug is not None:
            # Check for specific species
            if latin_name is not None:
                ps = Protein.objects.filter(sequence_type__slug='wt',
                                            source__id=1,
                                            family__slug__startswith=slug,
                                            species__latin_name=latin_name)
            else:
                ps = Protein.objects.filter(sequence_type__slug='wt',
                                            source__id=1,
                                            family__slug__startswith=slug)

            # take the numbering scheme from the first protein
            #s_slug = Protein.objects.get(entry_name=ps[0]).residue_numbering_scheme_id
            s_slug = ps[0].residue_numbering_scheme_id

            protein_family = ps[0].family.slug[:3]

            gen_list = []
            segment_list = []
            if segments is not None:
                input_list = segments.split(",")
                # fetch a list of all segments

                protein_segments = ProteinSegment.objects.filter(
                    partial=False).values_list('slug', flat=True)
                for s in input_list:
                    # add to segment list
                    if s in protein_segments:
                        segment_list.append(s)
                    # get generic numbering object for generic positions
                    else:
                        # make sure the query works for all positions
                        gen_object = ResidueGenericNumberEquivalent.objects.get(
                            label=s, scheme__id=s_slug)
                        gen_object.properties = {}
                        gen_list.append(gen_object)

                # fetch all complete protein_segments
                ss = ProteinSegment.objects.filter(slug__in=segment_list,
                                                   partial=False)
            else:
                ss = ProteinSegment.objects.filter(partial=False)

            if int(protein_family) < 100:
                ss = [s for s in ss if s.proteinfamily == 'GPCR']
            elif protein_family == "100":
                ss = [s for s in ss if s.proteinfamily == 'Gprotein']
            elif protein_family == "200":
                ss = [s for s in ss if s.proteinfamily == 'Arrestin']

            # create an alignment object
            a = Alignment()
            a.show_padding = False

            # load data from selection into the alignment
            a.load_proteins(ps)

            # load generic numbers and TMs seperately
            if gen_list:
                a.load_segments(gen_list)
            a.load_segments(ss)

            # build the alignment data matrix
            a.build_alignment()

            a.calculate_statistics()

            residue_list = []
            for aa in a.full_consensus:
                residue_list.append(aa.amino_acid)

            # render the fasta template as string
            response = render_to_string('alignment/alignment_fasta.html', {
                'a': a
            }).split("\n")

            # convert the list to a dict
            ali_dict = OrderedDict({})
            for row in response:
                if row.startswith(">"):
                    k = row[1:]
                else:
                    ali_dict[k] = row
                    k = False
            ali_dict['CONSENSUS'] = ''.join(residue_list)

            # render statistics for output
            if statistics == True:
                feat = {}
                for i, feature in enumerate(AMINO_ACID_GROUPS):
                    feature_stats = a.feature_stats[i]
                    feature_stats_clean = []
                    for d in feature_stats:
                        sub_list = [x[0] for x in d]
                        feature_stats_clean.append(
                            sub_list)  # remove feature frequencies
                    # print(feature_stats_clean)
                    feat[feature] = [
                        item for sublist in feature_stats_clean
                        for item in sublist
                    ]

                for i, AA in enumerate(AMINO_ACIDS):
                    feature_stats = a.amino_acid_stats[i]
                    feature_stats_clean = []
                    for d in feature_stats:
                        sub_list = [x[0] for x in d]
                        feature_stats_clean.append(
                            sub_list)  # remove feature frequencies
                    # print(feature_stats_clean)
                    feat[AA] = [
                        item for sublist in feature_stats_clean
                        for item in sublist
                    ]

                ali_dict["statistics"] = feat

            return Response(ali_dict)
Пример #16
0
    def get(self, request, slug=None, segments=None, latin_name=None, statistics=False):
        if slug is not None:
            # Check for specific species
            if latin_name is not None:
                ps = Protein.objects.filter(sequence_type__slug='wt', source__id=1, family__slug__startswith=slug,
                    species__latin_name=latin_name)
            else:
                ps = Protein.objects.filter(sequence_type__slug='wt', source__id=1, family__slug__startswith=slug)

            # take the numbering scheme from the first protein
            #s_slug = Protein.objects.get(entry_name=ps[0]).residue_numbering_scheme_id
            s_slug = ps[0].residue_numbering_scheme_id

            protein_family = ps[0].family.slug[:3]

            gen_list = []
            segment_list = []
            if segments is not None:
                input_list = segments.split(",")
                # fetch a list of all segments

                protein_segments = ProteinSegment.objects.filter(partial=False).values_list('slug', flat=True)
                for s in input_list:
                    # add to segment list
                    if s in protein_segments:
                        segment_list.append(s)
                    # get generic numbering object for generic positions
                    else:
                        # make sure the query works for all positions
                        gen_object = ResidueGenericNumberEquivalent.objects.get(label=s, scheme__id=s_slug)
                        gen_object.properties = {}
                        gen_list.append(gen_object)

                # fetch all complete protein_segments
                ss = ProteinSegment.objects.filter(slug__in=segment_list, partial=False)
            else:
                ss = ProteinSegment.objects.filter(partial=False)

            if int(protein_family) < 100:
                ss = [ s for s in ss if s.proteinfamily == 'GPCR']
            elif protein_family == "100":
                ss = [ s for s in ss if s.proteinfamily == 'Gprotein']
            elif protein_family == "200":
                ss = [ s for s in ss if s.proteinfamily == 'Arrestin']

            # create an alignment object
            a = Alignment()
            a.show_padding = False

            # load data from selection into the alignment
            a.load_proteins(ps)

            # load generic numbers and TMs seperately
            if gen_list:
                a.load_segments(gen_list)
            a.load_segments(ss)

            # build the alignment data matrix
            a.build_alignment()

            a.calculate_statistics()

            residue_list = []
            for aa in a.full_consensus:
                residue_list.append(aa.amino_acid)

            # render the fasta template as string
            response = render_to_string('alignment/alignment_fasta.html', {'a': a}).split("\n")

            # convert the list to a dict
            ali_dict = OrderedDict({})
            for row in response:
                if row.startswith(">"):
                    k = row[1:]
                else:
                    ali_dict[k] = row
                    k = False
            ali_dict['CONSENSUS'] = ''.join(residue_list)

            # render statistics for output
            if statistics == True:
                feat = {}
                for i, feature in enumerate(AMINO_ACID_GROUPS):
                    feature_stats = a.feature_stats[i]
                    feature_stats_clean = []
                    for d in feature_stats:
                        sub_list = [x[0] for x in d]
                        feature_stats_clean.append(sub_list) # remove feature frequencies
                    # print(feature_stats_clean)
                    feat[feature] = [item for sublist in feature_stats_clean for item in sublist]

                for i, AA in enumerate(AMINO_ACIDS):
                    feature_stats = a.amino_acid_stats[i]
                    feature_stats_clean = []
                    for d in feature_stats:
                        sub_list = [x[0] for x in d]
                        feature_stats_clean.append(sub_list) # remove feature frequencies
                    # print(feature_stats_clean)
                    feat[AA] = [item for sublist in feature_stats_clean for item in sublist]

                ali_dict["statistics"] = feat

            return Response(ali_dict)
Пример #17
0
    def main_func(self, positions, iteration):
        # families
        if not positions[1]:
            families = self.families[positions[0]:]
        else:
            families = self.families[positions[0]:positions[1]]

        for family in families:
            # get proteins in this family
            proteins = Protein.objects.filter(family__slug__startswith=family.slug, sequence_type__slug='wt',
                species__id=1).prefetch_related('species', 'residue_numbering_scheme')

            if proteins.count() <= 1:
                continue
            self.logger.info('Building alignment for {}'.format(family))
            # create alignment
            a = Alignment()
            a.load_proteins(proteins)
            a.load_segments(self.segments)
            a.build_alignment()
            a.calculate_statistics()
            self.logger.info('Completed building alignment for {}'.format(family))

            # get (forced) consensus sequence from alignment object
            family_consensus = str()
            for segment, s in a.forced_consensus.items():
                for gn, aa in s.items():
                    family_consensus += aa

            # create sequence type 'consensus'
            sequence_type, created = ProteinSequenceType.objects.get_or_create(slug='consensus',
                defaults={'name': 'Consensus',})
            if created:
                self.logger.info('Created protein sequence type {}'.format(sequence_type.name))

            # create a protein record
            consensus_name = family.name + " consensus"
            residue_numbering_scheme = proteins[0].residue_numbering_scheme
            up = dict()
            up['entry_name'] = slugify(consensus_name)
            if Protein.objects.filter(entry_name=up['entry_name']).exists():
                up['entry_name'] += "-" + family.slug.split('_')[0]
            up['source'] = "OTHER"
            up['species_latin_name'] = proteins[0].species.latin_name
            up['species_common_name'] = proteins[0].species.common_name
            up['sequence'] = family_consensus

            up['names'] = up['genes'] = []
            self.create_protein(consensus_name, family, sequence_type, residue_numbering_scheme, False, up)

            # get protein anomalies in family
            all_constrictions = []
            constriction_freq = dict()
            consensus_pas = dict() # a constriction has to be in all sequences to be included in the consensus
            pcs = ProteinConformation.objects.filter(protein__in=proteins,
                state__slug=settings.DEFAULT_PROTEIN_STATE).prefetch_related('protein_anomalies')
            for pc in pcs:
                pas = pc.protein_anomalies.all().prefetch_related('generic_number__protein_segment', 'anomaly_type')
                for pa in pas:
                    pa_label = pa.generic_number.label
                    pa_type = pa.anomaly_type.slug
                    pa_segment_slug = pa.generic_number.protein_segment.slug
                    
                    # bulges are directly added to the consensus list
                    if pa_type == 'bulge':
                        if pa_segment_slug not in consensus_pas:
                            consensus_pas[pa_segment_slug] = []
                        if pa not in consensus_pas[pa_segment_slug]:
                            consensus_pas[pa_segment_slug].append(pa)

                    # a constriction's frequency is counted
                    else:
                        if pa not in all_constrictions:
                            all_constrictions.append(pa)
                        if pa_label in constriction_freq:
                            constriction_freq[pa_label] += 1
                        else:
                            constriction_freq[pa_label] = 1
            
            # go through constrictions to see which ones should be included in the consensus
            for pa in all_constrictions:
                pa_label = pa.generic_number.label
                pa_segment_slug = pa.generic_number.protein_segment.slug
                freq = constriction_freq[pa_label]

                # is the constriction in all sequences?
                if freq == len(all_constrictions):
                    if pa_segment_slug not in consensus_pas:
                        consensus_pas[pa_segment_slug] = []
                    consensus_pas[pa_segment_slug].append(pa)

            # create residues
            pc = ProteinConformation.objects.get(protein__entry_name=up['entry_name'],
                state__slug=settings.DEFAULT_PROTEIN_STATE)
            segment_info = self.get_segment_residue_information(a.forced_consensus)
            ref_positions, segment_starts, segment_aligned_starts, segment_ends, segment_aligned_ends = segment_info
            for segment_slug, s in a.forced_consensus.items():
                segment = ProteinSegment.objects.get(slug=segment_slug)
                if segment_slug in consensus_pas:
                    protein_anomalies = consensus_pas[segment_slug]
                else:
                    protein_anomalies = []
                if segment_slug in segment_starts:
                    create_or_update_residues_in_segment(pc, segment, segment_starts[segment_slug],
                        segment_aligned_starts[segment_slug], segment_ends[segment_slug],
                        segment_aligned_ends[segment_slug], self.schemes, ref_positions, protein_anomalies, True)
Пример #18
0
 def run_build(self):
     for i in self.data[1:]:
         i = [
             None
             if j == '-' else float(j) if '.' in j and len(j) == 3 else j
             for j in i
         ]
         pdb, main_temp, version, overall_all, overall_backbone, TM_all, TM_backbone, H8, ICL1, ECL1, ICL2, ECL2, ECL3, notes = i
         target_structure = Structure.objects.get(
             pdb_code__index=pdb.upper())
         main_template = Structure.objects.get(
             pdb_code__index=main_temp.upper())
         a = Alignment()
         a.load_reference_protein(
             target_structure.protein_conformation.protein.parent)
         a.load_proteins(
             [main_template.protein_conformation.protein.parent])
         segments = Residue.objects.filter(
             protein_conformation__protein=target_structure.
             protein_conformation.protein.parent).order_by(
                 'protein_segment__id').distinct(
                     'protein_segment__id').values_list('protein_segment',
                                                        flat=True)
         a.load_segments(ProteinSegment.objects.filter(id__in=segments))
         a.build_alignment()
         a.remove_non_generic_numbers_from_alignment()
         a.calculate_similarity()
         seq_sim = a.proteins[1].similarity
         seq_id = a.proteins[1].identity
         smr, created = StructureModelRMSD.objects.get_or_create(
             target_structure=target_structure,
             main_template=main_template,
             version='{}-{}-{}'.format(version[-4:], version[3:5],
                                       version[:2]),
             seq_id=seq_id,
             seq_sim=seq_sim,
             overall_all=overall_all,
             overall_backbone=overall_backbone,
             TM_all=TM_all,
             TM_backbone=TM_backbone,
             H8=H8,
             ICL1=ICL1,
             ECL1=ECL1,
             ICL2=ICL2,
             ECL2=ECL2,
             ECL3=ECL3,
             notes=notes)
Пример #19
0
    def main_func(self, positions, iteration):
        # families
        if not positions[1]:
            families = self.families[positions[0]:]
        else:
            families = self.families[positions[0]:positions[1]]

        for family in families:
            # get proteins in this family
            proteins = Protein.objects.filter(
                family__slug__startswith=family.slug,
                sequence_type__slug='wt',
                species__common_name="Human").prefetch_related(
                    'species', 'residue_numbering_scheme')

            if proteins.count() <= 1:
                continue
            self.logger.info('Building alignment for {}'.format(family))
            # create alignment
            a = Alignment()
            a.load_proteins(proteins)
            a.load_segments(self.segments)
            a.build_alignment()
            a.calculate_statistics()
            self.logger.info(
                'Completed building alignment for {}'.format(family))

            # get (forced) consensus sequence from alignment object
            family_consensus = str()
            for segment, s in a.forced_consensus.items():
                for gn, aa in s.items():
                    family_consensus += aa

            # create sequence type 'consensus'
            sequence_type, created = ProteinSequenceType.objects.get_or_create(
                slug='consensus', defaults={
                    'name': 'Consensus',
                })
            if created:
                self.logger.info('Created protein sequence type {}'.format(
                    sequence_type.name))

            # create a protein record
            consensus_name = family.name + " consensus"
            residue_numbering_scheme = proteins[0].residue_numbering_scheme
            up = dict()
            up['entry_name'] = slugify(consensus_name)
            if Protein.objects.filter(entry_name=up['entry_name']).exists():
                up['entry_name'] += "-" + family.slug.split('_')[0]
            up['source'] = "OTHER"
            up['species_latin_name'] = proteins[0].species.latin_name
            up['species_common_name'] = proteins[0].species.common_name
            up['sequence'] = family_consensus

            up['names'] = up['genes'] = []
            self.create_protein(consensus_name, family, sequence_type,
                                residue_numbering_scheme, False, up)

            # get protein anomalies in family
            all_constrictions = []
            constriction_freq = dict()
            consensus_pas = dict(
            )  # a constriction has to be in all sequences to be included in the consensus
            pcs = ProteinConformation.objects.filter(
                protein__in=proteins,
                state__slug=settings.DEFAULT_PROTEIN_STATE).prefetch_related(
                    'protein_anomalies')
            for pc in pcs:
                pas = pc.protein_anomalies.all().prefetch_related(
                    'generic_number__protein_segment', 'anomaly_type')
                for pa in pas:
                    pa_label = pa.generic_number.label
                    pa_type = pa.anomaly_type.slug
                    pa_segment_slug = pa.generic_number.protein_segment.slug

                    # bulges are directly added to the consensus list
                    if pa_type == 'bulge':
                        if pa_segment_slug not in consensus_pas:
                            consensus_pas[pa_segment_slug] = []
                        if pa not in consensus_pas[pa_segment_slug]:
                            consensus_pas[pa_segment_slug].append(pa)

                    # a constriction's frequency is counted
                    else:
                        if pa not in all_constrictions:
                            all_constrictions.append(pa)
                        if pa_label in constriction_freq:
                            constriction_freq[pa_label] += 1
                        else:
                            constriction_freq[pa_label] = 1

            # go through constrictions to see which ones should be included in the consensus
            for pa in all_constrictions:
                pa_label = pa.generic_number.label
                pa_segment_slug = pa.generic_number.protein_segment.slug
                freq = constriction_freq[pa_label]

                # is the constriction in all sequences?
                if freq == len(all_constrictions):
                    if pa_segment_slug not in consensus_pas:
                        consensus_pas[pa_segment_slug] = []
                    consensus_pas[pa_segment_slug].append(pa)

            # create residues
            pc = ProteinConformation.objects.get(
                protein__entry_name=up['entry_name'],
                state__slug=settings.DEFAULT_PROTEIN_STATE)
            segment_info = self.get_segment_residue_information(
                a.forced_consensus)
            ref_positions, segment_starts, segment_aligned_starts, segment_ends, segment_aligned_ends = segment_info
            for segment_slug, s in a.forced_consensus.items():
                segment = ProteinSegment.objects.get(slug=segment_slug)
                if segment_slug in consensus_pas:
                    protein_anomalies = consensus_pas[segment_slug]
                else:
                    protein_anomalies = []
                if segment_slug in segment_starts:
                    create_or_update_residues_in_segment(
                        pc, segment, segment_starts[segment_slug],
                        segment_aligned_starts[segment_slug],
                        segment_ends[segment_slug],
                        segment_aligned_ends[segment_slug], self.schemes,
                        ref_positions, protein_anomalies, True)
Пример #20
0
    def get(self, request, proteins=None, segments=None):
        if proteins is not None:
            protein_list = proteins.split(",")
            # first in API should be reference
            ps = Protein.objects.filter(sequence_type__slug='wt', entry_name__in=protein_list[1:])
            reference = Protein.objects.filter(sequence_type__slug='wt', entry_name__in=[protein_list[0]])

            # take the numbering scheme from the first protein
            s_slug = Protein.objects.get(entry_name=protein_list[0]).residue_numbering_scheme_id

            if segments is not None:
                input_list = segments.split(",")
                # fetch a list of all segments
                protein_segments = ProteinSegment.objects.filter(partial=False).values_list('slug', flat=True) 
                gen_list = []
                segment_list = []
                for s in input_list:
                    # add to segment list
                    if s in protein_segments:
                        segment_list.append(s)
                    # get generic numbering object for generic positions
                    else:
                        # make sure the query works for all positions
                        gen_object = ResidueGenericNumberEquivalent.objects.get(label=s, scheme__id=s_slug)
                        gen_object.properties = {}
                        gen_list.append(gen_object)                        
                
                # fetch all complete protein_segments
                ss = ProteinSegment.objects.filter(slug__in=segment_list, partial=False)

            # create an alignment object
            a = Alignment()
            a.show_padding = False

            # load data from API into the alignment
            a.load_reference_protein(reference)
            a.load_proteins(ps)

            # load generic numbers and TMs seperately
            a.load_segments(gen_list)
            a.load_segments(ss)

            # build the alignment data matrix
            a.build_alignment()
            
            # calculate identity and similarity of each row compared to the reference
            a.calculate_similarity()

            # render the fasta template as string
            response = render_to_string('alignment/alignment_fasta.html', {'a': a}).split("\n")

            # convert the list to a dict
            ali_dict = {}
            k = False
            num = 0
            for i, row in enumerate(response):
                if row.startswith(">"):
                    k = row[1:]
                elif k:
                    # add the query as 100 identical/similar to the beginning (like on the website)
                    if num == 0:
                        a.proteins[num].identity = 100
                        a.proteins[num].similarity = 100
                    # order dict after custom list
                    keyorder = ["similarity","identity","AA"]
                    ali_dict[k] = {"AA": row, "identity": int(str(a.proteins[num].identity).replace(" ","")), 
                    "similarity": int(str(a.proteins[num].similarity).replace(" ",""))}
                    ali_dict[k] = OrderedDict(sorted(ali_dict[k].items(), key=lambda t: keyorder.index(t[0])))
                    num+=1
                    k = False
            ali_dict_ordered = OrderedDict(sorted(ali_dict.items(), key=lambda x: x[1]['similarity'], reverse=True))
            return Response(ali_dict_ordered)