class PosSelClusterTest(unittest.TestCase): def setUp(self,): session = Session() # family_id = 19187 # protein_id = 1151960 # domain_id = 1302435 family_id = 18883 protein_id = 1063014 domain_id = 1212855 self.family = session.query(Family).get(family_id) self.protein = session.query(Protein).get(protein_id) self.domain = session.query(Domain).get(domain_id) self.ps_sites = get_possel_sites(self.family) (self.pdb_id, self.pdb_chain, self.struct) = get_domain_struct(self.domain) print "Testing on Family {0}, Protein {1}, Domain {2}, Structure {3}, PDB {4}{5}".format( family_id, protein_id, domain_id, self.struct.id, self.pdb_id, self.pdb_chain ) print "Sites of +Sel for family: ", self.ps_sites self.pdb_struct = BioPDBStruct(self.pdb_id, self.pdb_chain, debug=True) self.cluster_id_str = "Family {0}, Protein {1}, Domain {2}, Structure {3}".format( family_id, protein_id, domain_id, self.struct.id ) def testRunClusterAnalysis(self,): domain_sites = get_domain_sites(self.family, self.protein, self.domain, self.ps_sites) print "Local sites for domain {0}: ".format(self.domain.id), domain_sites self.pdb_struct.cluster_analysis( domain_sites, sample_size=10, store_file="ps-cluster-test.pkl", tag=self.cluster_id_str, report=True )
def cluster_driver(family_id): session = Session() family = session.query(Family).get(family_id) if family == None: raise Exception("Family {0} could not be fetched from the database".format(family_id)) # Get sites for family (+Sel and TODO: firedb) ps_sites = get_possel_sites(family) # DEBUG print "Start cluster analysis for family {0}".format(family.id) print "Family {0} sites of +sel: ".format(family.id), ps_sites # Get repr protein from family (first one) protein = family.proteins[0] # Attempt clustering on structures for all domains in protein for domain in protein.domains: try: (pdb_id, pdb_chain, struct) = get_domain_struct(domain) except Exception as e: print e print "Domain {0} (Family {1}, Protein {2}) has no valid structure. Skipping..".format( domain.id, family.id, protein.id ) continue # Get domain-specific sites domain_sites = get_domain_sites(family, protein, domain, ps_sites) # DEBUG print "Family {0}, Protein {1}, Domain {2} local sites: ".format(family.id, protein.id, domain.id), domain_sites # Create BioPDBStruct object to call cluster analysis on pdb_struct = BioPDBStruct(pdb_id, pdb_chain, debug=True) cluster_id_str = "Family {0}, Protein {1}, Domain {2}, Structure {3}".format( family.id, protein.id, domain.id, struct.id ) try: pdb_struct.cluster_analysis( domain_sites, sample_size=samples, store_file=results_file, tag=cluster_id_str, report=True ) except Exception as e: print e print "Can not complete clustering analysis on domain {0}. Skipping..".format(domain.id) continue # DEBUG print "Clustering for domain/structs in Family {0} (Protein {1}) complete".format(family.id, protein.id)