示例#1
0
def pfam_annotate(data):

    data["PFAM_domains"] = "-"
    data["Link_to_pfam"] = "-"
    for index, row in enumerate(data.iterrows()):
        pfam_domains = []
        uniprot_id = row[1][18].split(",")
        aa_pos = int(row[1][12])
        for seq in uniprot_id:
            if seq == "-":
                continue
            else:
                pfam_url = "https://pfam.xfam.org/protein/"
                full_url = os.path.join(pfam_url, seq)
                try:
                    f = prody.searchPfam(seq)
                    for i in f.items():
                        start_pos = int(i[1]["locations"][0]["start"])
                        end_pos = int(i[1]["locations"][0]["end"])
                        if aa_pos >= start_pos and aa_pos <= end_pos:
                            pfam_domains.append(str(i[1]["id"])+":"+\
                                                str(start_pos)+"-"+str(end_pos))
                except Exception:
                    continue
        if len(pfam_domains) == 0:
            continue
        else:
            data.loc[index, "PFAM_domains"] = ",".join(pfam_domains)
            data.loc[index, "Link_to_pfam"] = full_url
    return data
示例#2
0
文件: Uniprot.py 项目: yaz62/rhapsody
 def _searchPfam(self, refresh=False, **kwargs):
     assert type(refresh) is bool
     if refresh is True or self.Pfam is None:
         try:
             self.Pfam = searchPfam(self.uniq_acc, **kwargs)
         except:
             self.Pfam = {}
             raise
     return self.Pfam
示例#3
0
def evol_search(query, **kwargs):

    import prody
    from os.path import join, split

    pfam_results = prody.searchPfam(query, **kwargs)
    if pfam_results is None:
        return
    outname = kwargs.get('outname', None)
    delimiter = kwargs.get('delimiter', '\t')
    if outname:
        folder, outname = split(outname)
        filepath = join(prody.utilities.makePath(folder), outname)
        out = open(filepath, 'wb')
    else:
        from sys import stdout as out
    title = delimiter.join(['acc', 'id', 'type', 'e-value']) + '\n'
    out.write(title)
    for key in pfam_results:
        val = pfam_results[key]
        evalue = ''
        for i, location in enumerate(val.get('locations', [])):
            temp = location.get('evalue', None)
            if temp:
                if i == 0:
                    evalue = float(temp)
                else:
                    if float(temp) < evalue:
                        evalue = float(temp)
        output = delimiter.join([
            val.get('accession', '    '),
            val.get('id', '    '),
            val.get('type', '    '),
            str(evalue)
        ]) + '\n'
        out.write(output)
    if outname:
        prody.LOGGER.info('Search results written in {0}.'.format(filepath))
        out.close()
示例#4
0
def evol_search(query, **kwargs):

    import prody
    from os.path import join, split

    pfam_results =  prody.searchPfam(query, **kwargs)
    if pfam_results is None:
        return
    outname = kwargs.get('outname', None)
    delimiter = kwargs.get('delimiter', '\t')
    if outname:
        folder, outname = split(outname)
        filepath = join(prody.utilities.makePath(folder), outname)
        out = open(filepath, 'wb')
    else:
        from sys import stdout as out
    title = delimiter.join(['acc', 'id', 'type', 'e-value']) + '\n'
    out.write(title)
    for key in pfam_results:
        val = pfam_results[key]
        evalue = ''
        for i, location in enumerate(val.get('locations', [])):
            temp = location.get('evalue', None)
            if temp:
                if i==0:
                    evalue = float(temp)
                else:
                    if float(temp) < evalue:
                        evalue = float(temp)
        output = delimiter.join([val.get('accession', '    '),
                                 val.get('id', '    '),
                                 val.get('type', '    '),
                                 str(evalue)]) + '\n'
        out.write(output)
    if outname:
        prody.LOGGER.info('Search results written in {0}.'.format(filepath))
        out.close()
示例#5
0
def evol_search(query, **kwargs):

    import prody
    from os.path import join, split

    pfam_results = prody.searchPfam(query, **kwargs)
    if pfam_results is None:
        return
    outname = kwargs.get("outname", None)
    delimiter = kwargs.get("delimiter", "\t")
    if outname:
        folder, outname = split(outname)
        filepath = join(prody.utilities.makePath(folder), outname)
        out = open(filepath, "wb")
    else:
        from sys import stdout as out
    title = delimiter.join(["acc", "id", "type", "e-value"]) + "\n"
    out.write(title)
    for key in pfam_results:
        val = pfam_results[key]
        evalue = ""
        for i, location in enumerate(val.get("locations", [])):
            temp = location.get("evalue", None)
            if temp:
                if i == 0:
                    evalue = float(temp)
                else:
                    if float(temp) < evalue:
                        evalue = float(temp)
        output = (
            delimiter.join([val.get("accession", "    "), val.get("id", "    "), val.get("type", "    "), str(evalue)])
            + "\n"
        )
        out.write(output)
    if outname:
        prody.LOGGER.info("Search results written in {0}.".format(filepath))
        out.close()
    if (len(uniprot_code)):
        ### get the uniprot canonical sequence from the database (full kinase sequence)
        raw_sequence = o3i.getCanonicalSequenceFromUniprotCode(uniprot_code)
        uniprot_sequence = Seq(raw_sequence, generic_protein)

        seq_id = o3i.getCanonicalSeqIdFromUniprotCode(uniprot_code)

        # get a list of start and end positions for a given annotation on the sequence
        # if none is returned, then the annotation is not found for this particular sequence
        cur_domain_idx = 1

        #first let's get all domain annotations from PFAM, if available
        #
        #print(uniprot_code)
        try:
            pfam_annotations = prody.searchPfam(uniprot_code)
        except Exception:
            continue
        pfam_families = pfam_annotations.keys()
        #for pfam_family in pfam_families:
        #        if pfam_family in expected_pfam_families:
        #                print(pfam_annotations[pfam_family])

        for family in expected_pfam_families:
            if (family in pfam_annotations):
                subSequence = ""
                if (len(pfam_annotations[family]["locations"]) > 1):
                    #                                print(uniprot_code,pfam_annotations[family]["locations"])
                    for loc_idx, location in enumerate(
                            pfam_annotations[family]["locations"]):