def data237_fp_gen(refresh=False): for complex_id in complex_ids(path = data237_complex_root): output_path = os.path.join(data237_fp6_root,complex_id) if not refresh and os.path.exists(output_path): print("%s already processed" %complex_id) continue print "start processing %s" %complex_id data_dir = os.path.join(data237_complex_root,complex_id) antigen = load_pdb_struct(os.path.join(data_dir,"antigen.pdb"),residue_cls = Residue) antibody = load_pdb_struct(os.path.join(data_dir,"antibody.pdb"),residue_cls = Residue) try: c = Complex(complex_id, antigen,antibody) fps = c.gen_fps() except: sys.stderr.write("complex %s encountered error.\n" %complex_id) if not os.path.exists(output_path): os.makedirs(output_path) for i,fp in enumerate(fps): path = os.path.join(output_path,"fp%d.txt" %(i+1)) print "to path", path fp.tofile(path)
def gen_dist_matrix(plane_type, atg_or_atb, res_or_tri): data_dir = os.path.join(data237_fp175_padded_root, "%s-%s-%s" % (plane_type, atg_or_atb, res_or_tri)) dataloader = make_dataloader(data_dir, make_single_line_converter(None)) c_id_list = complex_ids(data_dir) complex_pairwise_calc(c_id_list, dataloader, corr_coef, callback=callback)
def print_splitted_fp_in_csv(res_or_tri, which=0): """ split 360 bits into parts """ data_dir = os.path.join(data480_root, "fp_aaindex_if_padded" if res_or_tri == "res" else "fp_aaindex_if_padded_tri") delimiter = "," cids = complex_ids(data_dir) name = ("%s 3 bits" % (res_or_tri), "atb 4 bits", "15 bits") from ve.fp.complex_util.padding import PaddedComplexFingerPrint, OverallSpatialDistribution atg_dist, atb_dist, tri_dist = OverallSpatialDistribution.from_cache() first_3_count = sum(atg_dist.values() if res_or_tri == "res" else tri_dist.values()) * 3 second_3_count = sum(atb_dist.values()) * 3 last_15_count = sum(atg_dist.values()) * 15 first_3_dataloader = make_dataloader(data_dir, make_single_line_converter(slice(0, first_3_count))) second_3_dataloader = make_dataloader( data_dir, make_single_line_converter(slice(first_3_count, first_3_count + second_3_count)) ) last_15_dataloader = make_dataloader( data_dir, make_single_line_converter( slice(first_3_count + second_3_count, first_3_count + second_3_count + last_15_count) ), ) dls = [first_3_dataloader, second_3_dataloader, last_15_dataloader] print ("cid, %s" % name[which]) for cid in sorted(cids): print ("%s,%s" % (cid, delimiter.join(map(lambda d: "%.2f" % d, dls[which](cid)))))
def gen_pairwise_dist(res_or_tri): data_dir = os.path.join(data480_root, "fp_aaindex_if_padded" if res_or_tri == "res" else "fp_aaindex_if_padded_tri") dataloader = make_dataloader(data_dir, make_single_line_converter(None)) c_id_list = complex_ids(data_dir) complex_pairwise_calc(c_id_list, dataloader, corr_coef, callback=callback)
def print_fp_in_csv(plane_type, atg_or_atb, res_or_tri): data_dir = os.path.join(data237_fp175_padded_root, "%s-%s-%s" % (plane_type, atg_or_atb, res_or_tri)) dataloader = make_dataloader(data_dir, make_single_line_converter(None)) delimiter = "," cids = complex_ids(data_dir) for cid in sorted(cids): print ("%s,%s," % (cid, delimiter.join(map(lambda d: "%.2f" % d, dataloader(cid)))))
def get_overall_residue_distribution(): from ve.util.load_pdb import load_complexes, complex_ids from ve.fp.test.common import GeometryResidue cs = load_complexes(complex_ids(), complex_cls=ComplexWithResidueSpatialDistribution, residue_cls=GeometryResidue) overall_atg_dist, overall_atb_dist, overall_tri_dist = OverallSpatialDistribution.overall_dist(cs) return overall_atg_dist, overall_atb_dist, overall_tri_dist
def setUp(self): from ve.util.load_pdb import load_complexes, complex_ids from ve.fp.fp_80 import Residue from random import sample sample_count = 10 sampled_ids = sample(complex_ids(), sample_count) self.complexes = load_complexes(sampled_ids, complex_cls = ResiduePlaneBasedComplex, residue_cls = Residue)
def main(): from ve.util.complex import BaseComplex from ve.fp.fp_80 import Residue as Residue80 class Complex(BaseComplex, ResidueTriangleTrait): pass from ve.util.load_pdb import load_complexes, complex_ids for c in load_complexes(complex_ids(), complex_cls = Complex, residue_cls = Residue80): c.get_triangles(refresh = True)
def main_480(): import os from ve.util.load_pdb import complex_ids from ve.config import data480_root from data480 import fp370_atg_dataloader, fp370_atb_dataloader fp_dir = os.path.join(data480_root, "fp_370_atg") cids = complex_ids(fp_dir) cids = map(lambda x: x.split(",")[0], cids) complex_pairwise_calc(cids, fp370_atb_dataloader, corr_coef, callback = callback)
def simmat_from_db(): from ve.util.load_pdb import complex_ids from ve.config import epi166_fp from ve.dbconfig import db cids = complex_ids(epi166_fp) mat = lmatrix.from_db(db["epi_166"], cids) mat = lmatrix(cids, data=mat / np.matrix(mat.diagonal()).T) print mat.to_csv_str()
def main1(): """ main function generate the 370-bit finger prints for all complexes in 237 dataset """ from ve.util.load_pdb import complex_ids c_ids = complex_ids() for cid in c_ids: print cid gen_fp_for_complex(cid)
def simmat_from_db(): from ve.util.load_pdb import complex_ids from ve.config import epi166_fp from ve.dbconfig import db cids = complex_ids(epi166_fp) mat = lmatrix.from_db(db["epi_166"], cids) mat = lmatrix(cids, data = mat / np.matrix(mat.diagonal()).T) print mat.to_csv_str()
def test_count(): import os from ve.util.load_pdb import complex_ids, load_complexes from ve.config import data480_root, data480_complex_root ids = complex_ids(data480_complex_root) fp_dir = os.path.join(data480_root, "fp_370_atg" ) for cid in ids: fp_path = os.path.join(fp_dir, "%s.fp" %cid) if os.path.exists(fp_path): print cid, "exists" else: print cid
def main(fp_dir, use_complex_plane = True, atg_as_rec = True, use_tri = True, use_cache = True): from ve.fp.fp_80 import Residue from ve.util.load_pdb import complex_ids, load_complexes from ve.config import data237_fp175_padded_root, data480_complex_root complex_cls = ComplexPlaneBasedComplex if use_complex_plane else ResiduePlaneBasedComplex cids = complex_ids() cs = load_complexes(cids, complex_cls = complex_cls, residue_cls = Residue) for c in cs: try: fp_str = c.gen_fp_str(atg_as_receptor = atg_as_rec, use_cache = use_cache, use_tri = use_tri) #print c.c_id with open(fp_dir + "/" + c.c_id + ".fp", "w") as f: f.write(fp_str) except: print "%s encountered error" %c.c_id
def main(fp_dir, res_or_tri= "tri", atg_as_rec = True, use_cache = True): from ve.fp.fp_80 import Residue from ve.util.load_pdb import complex_ids, load_complexes from ve.config import data237_fp175_padded_root, data480_complex_root cids = complex_ids() cids = ["1FJ1_F", "3BN9_B","3B9K_EF"] cs = load_complexes(cids, complex_cls = Complex, residue_cls = Residue) for c in cs: print c.c_id fp_str = c.gen_fp_str(res_or_tri, atg_as_receptor = atg_as_rec, use_cache = use_cache) try: #fp_str = c.gen_fp_str(res_or_tri, atg_as_receptor = atg_as_rec, use_cache = use_cache) with open(fp_dir + "/" + c.c_id + ".fp", "w") as f: f.write(fp_str) except: print "%s encountered error" %c.c_id
def main2(which_as_rec): import os from ve.util.load_pdb import complex_ids, load_complexes from ve.config import data480_root, data480_complex_root ids = complex_ids(data480_complex_root) cs = load_complexes(ids, directory = data480_complex_root,complex_cls = MyComplex, residue_cls = MyResidue) fp_dir = os.path.join(data480_root, "fp_370_%s" %which_as_rec) for c in cs: fp_path = os.path.join(fp_dir, "%s.fp" %c.c_id) if os.path.exists(fp_path): print "%s preexists\n" %c.c_id else: print "processing", c.c_id try: fp = c.get_fp(which_as_rec) fp.tofile(fp_path) except Exception as e: from ve.util.error import get_error_info print c.c_id, get_error_info(e) print c.c_id, "processed\n"
def main_237(): filter_list = set(["1UWX_AP"]) c_id_list = set(complex_ids()) - filter_list # complex_pairwise_calc(c_id_list, src.fp370_atg_dataloader, corr_coef, callback = callback) complex_pairwise_calc(c_id_list, src.fp370_atb_dataloader, corr_coef, callback = callback)
c = ComplexDual(complex_id, antigen, antibody) c.gen_fp_to_file(complex_id, fp_type="double") def already_processed(complex_id): file_types = ["atg.fp","15bits.fp","atb.fp"] calc_types = ["single", "double"] for ct in calc_types: for ft in file_types: file_dir = os.path.join(data237_fps808015_root, ct, complex_id, ft) print file_dir if not os.path.exists(file_dir): return False#not processed completely return True#processed completely if __name__ == "__main__": ids = ["1SLG_D", "1N4X_L", "1JV5_A", "1STS_B"] for c_id in complex_ids(path = data237_complex_root): if already_processed(c_id): print "already processed", c_id continue else: print "processing", c_id try: gen_fp(c_id) except: import sys sys.stderr.write("error encountered processing %s" %c_id) continue
if only_paratope: self.find_epitope() self.write_epitope(complex_dir, paraepi_dir) if __name__ == "__main__": import sys from ve.util.load_pdb import complex_ids, load_complexes from ve.fp.complex_util.paraepi import ParatopeNotFoundError, EpitopeNotFoundError from ve.config import data_root var = sys.argv[1] complex_dir = os.path.join(data_root,"three-groups/split-complexes", var) #get the complex ids ids = complex_ids(complex_dir) print ids cs = load_complexes(ids, directory = complex_dir, complex_cls = ParaEpiGen, residue_cls = TestResidue) for c in cs: try: c.gen_paraepi(complex_dir, os.path.join(data_root,"three-groups/paraepi", var), only_paratope = True) except ParatopeNotFoundError: print "paratope not found" except EpitopeNotFoundError: print "epitope not found"