Пример #1
0
    def __get_uniprot_reactions(self):
        """
        Purpose: Takes reactions in the form of 'HSAXXX HSAXXX ReactionType HSAPathway and
        converts each hsaXXX id to a swissprot uniprot id. Stores results inside a set of tuples
        with the form (uprotA, uprotB, reaction label).

        @return: Set of 4-tuples with the form (uprotA, uprotB, reaction label, direction).
        """
        if self.__verbose:
            print("Converting KEGG reactions to UniProt reactions...")
        reaction_set = set()
        for reaction in self.kegg_reactions:
            hsa1 = reaction[0]
            hsa2 = reaction[1]
            reaction_type = reaction[2]

            if hsa1 not in self.hsa_to_uprot.keys() \
                    or hsa2 not in self.hsa_to_uprot.keys():
                continue
            else:
                u_prot_1 = self.hsa_to_uprot[hsa1]
                u_prot_2 = self.hsa_to_uprot[hsa2]
                ppi = PPI(u_prot_1, u_prot_2, [reaction_type])

                if u_prot_1 != u_prot_2:
                    reaction_set.add(ppi)

                if self.__verbose:
                    print("\tReaction: {}\t{}\t{}".format(
                        ppi.p1, ppi.p2, ppi.get_reaction_type_string()
                    ))
        reaction_set = set([(ppi.p1, ppi.p2, ppi.get_reaction_type_string()) for ppi in reaction_set])
        return reaction_set
Пример #2
0
        ppi.p1 = sorted(xrefs[ppi.p1].swissprot_id, cmp=s_cmp)[0]
        ppi.p2 = sorted(xrefs[ppi.p2].swissprot_id, cmp=s_cmp)[0]

    for ppi in unknown_ppis:
        if ppi.p1 != '-':
            ppi.p1 = sorted(xrefs[ppi.p1].swissprot_id, cmp=s_cmp)[0]
        if ppi.p2 != '-':
            ppi.p2 = sorted(xrefs[ppi.p2].swissprot_id, cmp=s_cmp)[0]

    # Filter out small samples, self interactions and interactions with missing p2/p1
    ppis = filter(lambda ppi: ppi.p1 != '-' and ppi.p2 != '-', ppis)
    ppis = filter(lambda ppi: ppi.p1 != ppi.p2, ppis)
    ppis = [ppi.split() for ppi in ppis]
    ppis = set([ppi for sl in ppis for ppi in sl])

    types = [ppi.get_reaction_type_string() for ppi in ppis]
    type_counts = {k:0 for k in types}
    for t in types:
        type_counts[t] += 1
    ppis = [ppi for ppi in ppis if type_counts[ppi.get_reaction_type_string()] >= 5]

    fp1 = open('tmp/hprd_training_ppi.tsv', 'w')
    fp2 = open('tmp/hprd_testing_ppi.tsv', 'w')
    fp3 = open('tmp/unknown_hprd_ppi.tsv', 'w')
    fp1.write("uniprot_a\tuniprot_a\tlabel\n")
    fp2.write("uniprot_a\tuniprot_a\tlabel\n")
    fp3.write("uniprot_a\tuniprot_a\tlabel\n")

    test_type = [
        'phosphorylation',
        'dephosphorylation'