def compute_doublet_params(ids, params_type): dp = __import__("doublet-params") dd = DoubletsDict("gc-data/", reduced_atoms=R_ATOMS) dd.load_pdb_files(ids, verbose=True) res = {} widgets = ['compute doublet params',' ', Percentage(), ' ', Bar(), ' ', ETA()] pbar = ProgressBar(widgets=widgets, maxval=max(1,len(ids))).start() for i,d_id in enumerate(ids): d = dd.get(d_id) if d is None or d[0] is None or d[1] is None: print "INVALID doublet! %s" % d_id continue n_type = dd.get_n_type(d_id).upper() if not re.match('^[ACGU]{2}$',n_type): print "INVALID doublet! %s, wrong n_type: %s" % (d_id,n_type) continue d_norm = normalize_points(d, n_type[0]) d = doublet_params_dict(d, n_type, params_type) if d is None: print "INVALID doublet! %s" % (d_id) continue res[d_id] = d pbar.update(i) pbar.finish() return res
def classify_prg_result_details_fp(self, short_id, sc, prg_res, exp_res, other_results): other_prgs = [x for x in self.PRG_BY_CAT[sc] if x != 'CL'] d_id = self.pdb_id.upper() + ":" + short_id if sc == 'base-ribose': p_br = doublet_params_dict(self.dd.get(d_id), self.dd.get_n_type(d_id), 'base-ribose') if sc in ['bp-classic', 'bp-non-classic']: p_bp = doublet_params_dict(self.dd.get(d_id), self.dd.get_n_type(d_id), 'bp') sc2 = sc.replace("bp-classic", "bp").replace("bp-non-classic", "bp") exp_res2 = exp_res if exp_res2 == "": oo = [x for x in other_results if x != ""] if len(oo) == 1: exp_res2 = oo[0] if sc == 'base-ribose' and p_br['ph_h'] >= 4.5: return ("new-base-ribose", "ph_h=%.4f" % p_br['ph_h']) elif sc2 == 'bp' and prg_res != "" and exp_res2 != "" and prg_res[ 0:2] == exp_res2[0:2] and expected_strand_orient( exp_res2) != p_bp['strand_orient']: extra = "strand orientation: %s, expected orientation for %s: %s" % ( self._strand_orientation_name(p_bp['strand_orient']), exp_res2, self._strand_orientation_name( expected_strand_orient(exp_res2))) return ("cis-vs-trans", extra) elif sc2 == 'bp' and prg_res == 'SH_cis' and exp_res2 == 'WH_cis': extra = "" return ("wh_cis-vs-sh_cis", extra) else: if prg_res in other_results: extra = "consistent with: " + [ p for p, o in zip(other_prgs, other_results) if prg_res == o ][0] return ("consistent-with-single-cl", extra) elif all([x == "" for x in other_results]): return ("not-recognized-by-others-cl", "") else: return ("others", "")
def test_doublets_class(self): from utils import PDBObject,DoubletsDict,GraphTool,bench_start,bench_stop from distances import doublet_params_dict, Doublet, Residue, residue_conformation eps = 0.001 for pdb_id in ['3fo6','2zjp']: dd = DoubletsDict(reduced_atoms=['*']) dd.load_pdb(pdb_id) gr = GraphTool(PDBObject.pdb_fn(pdb_id,"close_doublets"),edge_type="dist") old_params = {} bench_start("old params computations") for d_id in gr.get_ids(): full_d_id = pdb_id+":"+d_id n_type = dd.get_n_type(full_d_id) (p1,p2) = dd.get(full_d_id) for type in ['bp','stacking','base-phosphate','base-ribose']: params = doublet_params_dict((p1,p2), n_type, type) old_params["%s-%s"%(d_id,type)] = params bench_stop("old params computations") residues = {} bench_start("new params computations") for d_id in gr.get_ids(): full_d_id = pdb_id+":"+d_id n_type = dd.get_n_type(full_d_id) # print "processing doublet: %s (%s)" % (full_d_id,n_type) (p1,p2) = dd.get(full_d_id) (r1,r2) = d_id.split(":") if not residues.has_key(r1): residues[r1] = Residue(r1,n_type[0],p1) if not residues.has_key(r2): residues[r2] = Residue(r2,n_type[1],p2) d = Doublet(d_id,residues[r1],residues[r2]) for type in ['bp','stacking','base-phosphate','base-ribose']: params = old_params["%s-%s"%(d_id,type)] for key,expected_value in params.items(): if re.match("^(dist_[A-Z]|i_|ii_|oxygens)",key): if type=='base-phosphate': v = d.ph_info.get(key) elif type=='base-ribose': v = d.br_info.get(key) else: v = None else: v = getattr(d, key) # print "d_id=%(d_id)s type=%(type)s key=%(key)s v=%(v)s expected=%(expected_value)s" % locals() if key in ['oxygens']: self.assertTrue(sorted(v)==sorted(expected_value)) else: self.assertTrue(v>expected_value-eps and v<expected_value+eps,"%s: got: %.4f, expected: %.4f"%(key,v,expected_value)) bench_stop("new params computations")
def compute_doublet_params(ids, params_type): dp = __import__("doublet-params") R_ATOMS = ['N1', 'N2', 'N3', 'N4', 'N6', 'N7', 'P'] R_ATOMS += ['C2', 'C4', 'C5', 'C6', 'C8', "C1'"] R_ATOMS += ['O2'] R_ATOMS += ["O2'", "O3'", "O4'"] R_ATOMS += ["OP1", "OP2", "O5'", "NEXT:O3'"] R_ATOMS += [ 'N1', 'C6', 'O6', 'C5', 'C4', 'N3', 'C2', 'N2', 'N7', 'C8', 'N9' ] dd = DoubletsDict("gc-data", reduced_atoms=R_ATOMS) dd.load_pdb_files(ids, verbose=True) res = {} widgets = [ 'compute doublet params', ' ', Percentage(), ' ', Bar(), ' ', ETA() ] pbar = ProgressBar(widgets=widgets, maxval=max(1, len(ids))).start() for i, d_id in enumerate(ids): d = dd.get(d_id) if d is None or d[0] is None or d[1] is None: print "INVALID doublet! %s" % d_id continue n_type = dd.get_n_type(d_id).upper() if not re.match('^[ACGU]{2}$', n_type): print "INVALID doublet! %s, wrong n_type: %s" % (d_id, n_type) continue d = doublet_params_dict(d, n_type, params_type) if d is None: print "INVALID doublet! %s" % (d_id) continue res[d_id] = d pbar.update(i) pbar.finish() return res
def test_bp_params(self): from utils import DoubletsDict from distances import doublet_params_dict, Doublet, Residue TEST_DATA = [ ( "3FO6:A39:A57", { 'stack_orient': 3, 'dist_z': 0.22861798604329428, 'o_ang': 69.303115470181339, 'dist': 5.5976623073175231, 'stack_overlap': 0, 'conf': 0, 'n12cc_ang': 87.910385850994587, 'n1cc_ang': 92.089614149005413, 'strand_orient_norm': -1, 'stack_norm': 0.97095424, 'nn_ang_norm': 13.698485786637832, 'nn_ang': 166.30151421336217, 'min_dist': 1.7859186, 'strand_orient': -1, 'orient': 1, 'n2cc_ang': 88.640047368908554 } ), ( "2ZJP:X303:X77", { 'stack_orient': 3, 'strand_orient': -1, 'dist': 6.3820344572213887, 'stack_min_dist': 1.4906554379247356, 'n12cc_ang': 79.337562855790168, 'stack_norm': 0.93983656, 'strand_orient_norm': -1, 'conf': 0, 'n2_z': -0.93983656, 'orient': 1, 'n2cc_ang': 96.415019366401566, 'dist_z': 1.1562830607096353, 'o_ang': 50.822836557417922, 'stack_overlap': 0, 'rot_ang': -56.801295513103156, 'n1cc_ang': 100.66243714420983, 'min_dist': 1.4906554379247356, 'nn_ang_norm': 20.020604505050017, 'nn_ang': 159.97939549494998 } ) ] eps = 0.001 dd = DoubletsDict(reduced_atoms=['*']) for d_id, expected_params in TEST_DATA: n_type = dd.get_n_type(d_id) points = dd.get(d_id) self.assertEqual(len(points),2) params = doublet_params_dict(points, n_type, 'stacking') self.assertTrue(params is not None) for key,expected_value in expected_params.items(): self.assertTrue(params.has_key(key)) self.assertTrue(params[key]>expected_value-eps) self.assertTrue(params[key]<expected_value+eps) # test nowych metod r1 = Residue("A1",n_type[0],points[0]) r2 = Residue("B1",n_type[1],points[1]) d = Doublet(d_id,r1,r2) for key,expected_value in expected_params.items(): if key in ['dist','min_dist','nn_ang','nn_ang_norm','n1cc_ang','n2cc_ang', 'n12cc_ang','o_ang','orient','stack_orient','stack_norm','strand_orient', 'strand_orient_norm','conf','dist_z','n2_z','rot_ang','stack_min_dist', 'stack_overlap']: v = getattr(d,key) # print "%s got: %.4f, expected: %.4f" % (key,v,expected_value) self.assertTrue(v>expected_value-eps and v<expected_value+eps,"%s: got: %.4f, expected: %.4f"%(key,v,expected_value)) else: print "skipping: %s" % key
def main(): (parser, options, _args) = parse_args() doublet_lists = [] labels = [] if options.doublet_id: doublet_lists.append([options.doublet_id]) labels.append("d:%s" % options.doublet_id) elif options.gen_pdb_for: json = load_json(options.input_group_info) assert isinstance(json, dict) assert len(json.keys()) == 1 v = [] vu = [] for group_info in json.values(): v = group_info['all_doublets'] vu = list( set([ x for row in group_info['neigh_unclassified'] for x, d in row ])) gen_pdb(v, vu, options) elif options.input_group_info: json = load_json(options.input_group_info) assert isinstance(json, dict) assert len(json.keys()) == 1 for group_info in json.values(): assert isinstance(group_info, dict) for k in ('all_doublets', 'neigh_unclassified', 'neigh_other'): v = group_info[k] if k == 'all_doublets': doublet_lists.append(v) labels.append('reference') else: vv = list(set([did for row in v for (did, dist) in row])) doublet_lists.append(vv) labels.append(k.split("_")[1]) elif options.input_json: only_keys = None if options.only_keys: only_keys = options.only_keys.split(",") for fn in options.input_json.split(","): if fn == '': continue json = load_json(fn) if isinstance(json, dict): print "DICT!" keys = json.keys() if only_keys is not None: if len(only_keys) == 1: regexp = re.compile('^' + only_keys[0] + '$') keys = [k for k in json.keys() if regexp.match(k)] else: keys = only_keys for k in keys: if not json.has_key(k): continue v = json[k] assert all([isinstance(did, str) for did in v]) == True print k doublet_lists.append(v) labels.append(k) if only_keys is not None and len(only_keys) == 1: doublet_lists = [sum(doublet_lists, [])] labels = [only_keys[0]] elif isinstance(json, list): print "LIST!" assert all([isinstance(did, str) for did in json]) == True doublet_lists.append(json) labels.append(os.path.basename(fn)) else: raise Exception("Unknown format of JSON file") elif options.input_pdb: structure = load_pdb(options.input_pdb) residues = [r for r in structure.get_residues()] assert len(residues) == 2 n_type = residues[0].resname.strip() + residues[1].resname.strip() print doublet_params_dict( (simplify_residue(residues[0]), simplify_residue(residues[1])), n_type, options.params_type) compute_params(doublet_lists, labels, options)