def get(self, request, entry_name=None, segments=None): if entry_name is not None: ref = Protein.objects.get(sequence_type__slug='wt', entry_name=entry_name) structures = Structure.objects.order_by('protein_conformation__protein__parent', 'state', 'resolution').distinct('protein_conformation__protein__parent', 'state') ps = [] for structure in structures: ps.append(structure.protein_conformation.protein.parent) if segments is not None: input_list = segments.split(",") ss = ProteinSegment.objects.filter(slug__in=input_list, partial=False) else: ss = ProteinSegment.objects.filter(partial=False, category='helix') # create an alignment object a = Alignment() a.show_padding = False # load data from selection into the alignment a.load_reference_protein(ref) a.load_proteins(ps) a.load_segments(ss) # build the alignment data matrix a.build_alignment() # calculate identity and similarity of each row compared to the reference a.calculate_similarity() # return the entry_name of the closest template return Response(a.proteins[1].protein.entry_name)
def find_segment_template(self, pconf, sconfs, segments): a = Alignment() a.load_reference_protein(pconf.protein) a.load_proteins(sconfs) a.load_segments(segments) a.build_alignment() a.calculate_similarity() return a.proteins[1]
def get_segment_template (protein, segments=['TM1', 'TM2', 'TM3', 'TM4','TM5','TM6', 'TM7'], state=None): a = Alignment() a.load_reference_protein(protein) #You are so gonna love it... if state: a.load_proteins([x.protein_conformation.protein.parent for x in list(Structure.objects.order_by('protein_conformation__protein__parent','resolution').exclude(protein_conformation__protein=protein.id, protein_conformation__state=state))]) else: a.load_proteins([x.protein_conformation.protein.parent for x in list(Structure.objects.order_by('protein_conformation__protein__parent','resolution').exclude(protein_conformation__protein=protein.id))]) a.load_segments(ProteinSegment.objects.filter(slug__in=segments)) a.build_alignment() a.calculate_similarity() return a.proteins[1]
def get_segment_template (protein, segments=['TM1', 'TM2', 'TM3', 'TM4','TM5','TM6', 'TM7'], state=None): a = Alignment() a.load_reference_protein(protein) #You are so gonna love it... if state: a.load_proteins([x.protein_conformation.protein.parent for x in list(Structure.objects.order_by('protein_conformation__protein__parent','resolution').exclude(protein_conformation__protein=protein.id, protein_conformation__state=state))]) else: a.load_proteins([x.protein_conformation.protein.parent for x in list(Structure.objects.order_by('protein_conformation__protein__parent','resolution').exclude(protein_conformation__protein=protein.id))]) a.load_segments(ProteinSegment.objects.filter(slug__in=segments)) a.build_alignment() a.calculate_similarity() return a.proteins[1]
def run_build(self): for i in self.data[1:]: i = [ None if j == '-' else float(j) if '.' in j and len(j) == 3 else j for j in i ] pdb, main_temp, version, overall_all, overall_backbone, TM_all, TM_backbone, H8, ICL1, ECL1, ICL2, ECL2, ECL3, notes = i target_structure = Structure.objects.get( pdb_code__index=pdb.upper()) main_template = Structure.objects.get( pdb_code__index=main_temp.upper()) a = Alignment() a.load_reference_protein( target_structure.protein_conformation.protein.parent) a.load_proteins( [main_template.protein_conformation.protein.parent]) segments = Residue.objects.filter( protein_conformation__protein=target_structure. protein_conformation.protein.parent).order_by( 'protein_segment__id').distinct( 'protein_segment__id').values_list('protein_segment', flat=True) a.load_segments(ProteinSegment.objects.filter(id__in=segments)) a.build_alignment() a.remove_non_generic_numbers_from_alignment() a.calculate_similarity() seq_sim = a.proteins[1].similarity seq_id = a.proteins[1].identity smr, created = StructureModelRMSD.objects.get_or_create( target_structure=target_structure, main_template=main_template, version='{}-{}-{}'.format(version[-4:], version[3:5], version[:2]), seq_id=seq_id, seq_sim=seq_sim, overall_all=overall_all, overall_backbone=overall_backbone, TM_all=TM_all, TM_backbone=TM_backbone, H8=H8, ICL1=ICL1, ECL1=ECL1, ICL2=ICL2, ECL2=ECL2, ECL3=ECL3, notes=notes)
def get(self, request, entry_name=None, segments=None): if entry_name is not None: ref = Protein.objects.get(sequence_type__slug='wt', entry_name=entry_name) structures = Structure.objects.order_by( 'protein_conformation__protein__parent', 'state', 'resolution').distinct('protein_conformation__protein__parent', 'state') ps = [] for structure in structures: ps.append(structure.protein_conformation.protein.parent) if segments is not None: input_list = segments.split(",") ss = ProteinSegment.objects.filter(slug__in=input_list, partial=False) else: ss = ProteinSegment.objects.filter(partial=False, category='helix') # create an alignment object a = Alignment() a.show_padding = False # load data from selection into the alignment a.load_reference_protein(ref) a.load_proteins(ps) a.load_segments(ss) # build the alignment data matrix a.build_alignment() # calculate identity and similarity of each row compared to the reference a.calculate_similarity() # return the entry_name of the closest template return Response(a.proteins[1].protein.entry_name)
def get(self, request, proteins=None, segments=None): if proteins is not None: protein_list = proteins.split(",") # first in API should be reference ps = Protein.objects.filter(sequence_type__slug='wt', entry_name__in=protein_list[1:]) reference = Protein.objects.filter( sequence_type__slug='wt', entry_name__in=[protein_list[0]]) # take the numbering scheme from the first protein s_slug = Protein.objects.get( entry_name=protein_list[0]).residue_numbering_scheme_id if segments is not None: input_list = segments.split(",") # fetch a list of all segments protein_segments = ProteinSegment.objects.filter( partial=False).values_list('slug', flat=True) gen_list = [] segment_list = [] for s in input_list: # add to segment list if s in protein_segments: segment_list.append(s) # get generic numbering object for generic positions else: # make sure the query works for all positions gen_object = ResidueGenericNumberEquivalent.objects.get( label=s, scheme__id=s_slug) gen_object.properties = {} gen_list.append(gen_object) # fetch all complete protein_segments ss = ProteinSegment.objects.filter(slug__in=segment_list, partial=False) # create an alignment object a = Alignment() a.show_padding = False # load data from API into the alignment a.load_reference_protein(reference) a.load_proteins(ps) # load generic numbers and TMs seperately a.load_segments(gen_list) a.load_segments(ss) # build the alignment data matrix a.build_alignment() # calculate identity and similarity of each row compared to the reference a.calculate_similarity() # render the fasta template as string response = render_to_string('alignment/alignment_fasta.html', { 'a': a }).split("\n") # convert the list to a dict ali_dict = {} k = False num = 0 for i, row in enumerate(response): if row.startswith(">"): k = row[1:] elif k: # add the query as 100 identical/similar to the beginning (like on the website) if num == 0: a.proteins[num].identity = 100 a.proteins[num].similarity = 100 # order dict after custom list keyorder = ["similarity", "identity", "AA"] ali_dict[k] = { "AA": row, "identity": int(str(a.proteins[num].identity).replace(" ", "")), "similarity": int(str(a.proteins[num].similarity).replace(" ", "")) } ali_dict[k] = OrderedDict( sorted(ali_dict[k].items(), key=lambda t: keyorder.index(t[0]))) num += 1 k = False ali_dict_ordered = OrderedDict( sorted(ali_dict.items(), key=lambda x: x[1]['similarity'], reverse=True)) return Response(ali_dict_ordered)
def get(self, request, proteins=None, segments=None): if proteins is not None: protein_list = proteins.split(",") # first in API should be reference ps = Protein.objects.filter(sequence_type__slug='wt', entry_name__in=protein_list[1:]) reference = Protein.objects.filter(sequence_type__slug='wt', entry_name__in=[protein_list[0]]) # take the numbering scheme from the first protein s_slug = Protein.objects.get(entry_name=protein_list[0]).residue_numbering_scheme_id if segments is not None: input_list = segments.split(",") # fetch a list of all segments protein_segments = ProteinSegment.objects.filter(partial=False).values_list('slug', flat=True) gen_list = [] segment_list = [] for s in input_list: # add to segment list if s in protein_segments: segment_list.append(s) # get generic numbering object for generic positions else: # make sure the query works for all positions gen_object = ResidueGenericNumberEquivalent.objects.get(label=s, scheme__id=s_slug) gen_object.properties = {} gen_list.append(gen_object) # fetch all complete protein_segments ss = ProteinSegment.objects.filter(slug__in=segment_list, partial=False) # create an alignment object a = Alignment() a.show_padding = False # load data from API into the alignment a.load_reference_protein(reference) a.load_proteins(ps) # load generic numbers and TMs seperately a.load_segments(gen_list) a.load_segments(ss) # build the alignment data matrix a.build_alignment() # calculate identity and similarity of each row compared to the reference a.calculate_similarity() # render the fasta template as string response = render_to_string('alignment/alignment_fasta.html', {'a': a}).split("\n") # convert the list to a dict ali_dict = {} k = False num = 0 for i, row in enumerate(response): if row.startswith(">"): k = row[1:] elif k: # add the query as 100 identical/similar to the beginning (like on the website) if num == 0: a.proteins[num].identity = 100 a.proteins[num].similarity = 100 # order dict after custom list keyorder = ["similarity","identity","AA"] ali_dict[k] = {"AA": row, "identity": int(str(a.proteins[num].identity).replace(" ","")), "similarity": int(str(a.proteins[num].similarity).replace(" ",""))} ali_dict[k] = OrderedDict(sorted(ali_dict[k].items(), key=lambda t: keyorder.index(t[0]))) num+=1 k = False ali_dict_ordered = OrderedDict(sorted(ali_dict.items(), key=lambda x: x[1]['similarity'], reverse=True)) return Response(ali_dict_ordered)
def receptor_mammal_representatives(self): # print('Script to label structures if they are mammal, and which are the closest structure to human') structures = Structure.objects.filter(refined=False).prefetch_related( "pdb_code", "state", "protein_conformation__protein__parent__family", "protein_conformation__protein__species") distinct_proteins = {} is_mammal = {} ## Go through all structures and deduce if mammal and prepare receptor/state sets to find most "human" for s in structures: pdb = s.pdb_code.index state = s.state.slug slug = s.protein_conformation.protein.parent.family.slug name = s.protein_conformation.protein.parent.family.name species = s.protein_conformation.protein.species.common_name protein = s.protein_conformation.protein.parent if species not in is_mammal: mammal = self.check_uniprot_if_mammal(protein) is_mammal[species] = mammal else: mammal = is_mammal[species] # print(species, mammal) s.mammal = mammal s.save() key = '{}-{}'.format(slug, state) if key not in distinct_proteins: distinct_proteins[key] = [] distinct_proteins[key].append([pdb, species, protein, s]) print("DEBUG", is_mammal) for conformation, pdbs in distinct_proteins.items(): p_slug, state = conformation.split("-") number_of_pdbs = len(pdbs) distinct_species = set(list(x[1] for x in pdbs)) distinct_proteins = set(list(x[2] for x in pdbs)) if 'Human' in distinct_species: # Human always best.. best_species = 'Human' elif len(distinct_species) == 1: # If only one type.. then it most be the best match best_species = list(distinct_species)[0] else: # There are more than 1 species, and human is not in it.. do similarity a = Alignment() ref_p = Protein.objects.get(family__slug=p_slug, species__common_name='Human', sequence_type__slug='wt') a.load_reference_protein( Protein.objects.get(family__slug=p_slug, species__common_name='Human', sequence_type__slug='wt')) a.load_proteins(distinct_proteins) a.load_segments( ProteinSegment.objects.filter(slug__in=[ 'TM1', 'TM2', 'TM3', 'TM4', 'TM5', 'TM6', 'TM7' ])) a.build_alignment() a.calculate_similarity() best_species = a.proteins[1].protein.species.common_name ## Now that we know which species to label as most "human" go through structures and label for pdb, species, protein, structure in pdbs: most_human = False if species == best_species: most_human = True structure.closest_to_human = most_human structure.save()