示例#1
0
class PosSelClusterTest(unittest.TestCase):
    def setUp(self,):
        session = Session()
        # family_id  = 19187
        # protein_id = 1151960
        # domain_id  = 1302435
        family_id = 18883
        protein_id = 1063014
        domain_id = 1212855

        self.family = session.query(Family).get(family_id)
        self.protein = session.query(Protein).get(protein_id)
        self.domain = session.query(Domain).get(domain_id)

        self.ps_sites = get_possel_sites(self.family)
        (self.pdb_id, self.pdb_chain, self.struct) = get_domain_struct(self.domain)

        print "Testing on Family {0}, Protein {1}, Domain {2}, Structure {3}, PDB {4}{5}".format(
            family_id, protein_id, domain_id, self.struct.id, self.pdb_id, self.pdb_chain
        )
        print "Sites of +Sel for family: ", self.ps_sites

        self.pdb_struct = BioPDBStruct(self.pdb_id, self.pdb_chain, debug=True)
        self.cluster_id_str = "Family {0}, Protein {1}, Domain {2}, Structure {3}".format(
            family_id, protein_id, domain_id, self.struct.id
        )

    def testRunClusterAnalysis(self,):
        domain_sites = get_domain_sites(self.family, self.protein, self.domain, self.ps_sites)
        print "Local sites for domain {0}: ".format(self.domain.id), domain_sites

        self.pdb_struct.cluster_analysis(
            domain_sites, sample_size=10, store_file="ps-cluster-test.pkl", tag=self.cluster_id_str, report=True
        )
示例#2
0
def cluster_driver(family_id):
    session = Session()
    family = session.query(Family).get(family_id)
    if family == None:
        raise Exception("Family {0} could not be fetched from the database".format(family_id))

    # Get sites for family (+Sel and TODO: firedb)
    ps_sites = get_possel_sites(family)
    # DEBUG
    print "Start cluster analysis for family {0}".format(family.id)
    print "Family {0} sites of +sel: ".format(family.id), ps_sites

    # Get repr protein from family (first one)
    protein = family.proteins[0]

    # Attempt clustering on structures for all domains in protein
    for domain in protein.domains:
        try:
            (pdb_id, pdb_chain, struct) = get_domain_struct(domain)
        except Exception as e:
            print e
            print "Domain {0} (Family {1}, Protein {2}) has no valid structure. Skipping..".format(
                domain.id, family.id, protein.id
            )
            continue

        # Get domain-specific sites
        domain_sites = get_domain_sites(family, protein, domain, ps_sites)
        # DEBUG
        print "Family {0}, Protein {1}, Domain {2} local sites: ".format(family.id, protein.id, domain.id), domain_sites

        # Create BioPDBStruct object to call cluster analysis on
        pdb_struct = BioPDBStruct(pdb_id, pdb_chain, debug=True)
        cluster_id_str = "Family {0}, Protein {1}, Domain {2}, Structure {3}".format(
            family.id, protein.id, domain.id, struct.id
        )
        try:
            pdb_struct.cluster_analysis(
                domain_sites, sample_size=samples, store_file=results_file, tag=cluster_id_str, report=True
            )
        except Exception as e:
            print e
            print "Can not complete clustering analysis on domain {0}. Skipping..".format(domain.id)
            continue

    # DEBUG
    print "Clustering for domain/structs in Family {0} (Protein {1}) complete".format(family.id, protein.id)