def filter_and_output_results( criteria, ssdag, result, output_from_pose, merge_bblock, db, output_symmetric, output_centroid, output_prefix, max_output, max_score0, max_score0sym, rms_err_cut, no_duplicate_bases, output_only_AAAA, full_score0sym, output_short_fnames, output_only_connected, null_base_names, only_outputs, **kw, ): sf = ros.core.scoring.ScoreFunctionFactory.create_score_function("score0") if hasattr(ros.core.scoring.symmetry, 'symmetrize_scorefunction'): sfsym = ros.core.scoring.symmetry.symmetrize_scorefunction(sf) else: sfsym = sf if max_score0sym == 9e9: # TODO: improve this logic? max_score0sym = 2.0 * max_score0 mbb = "" if merge_bblock is not None: mbb = f"_mbb{merge_bblock:04d}" head = f"{output_prefix}{mbb}" if mbb and output_prefix[-1] != "/": head += "_" if not merge_bblock: # do this once per run, at merge_bblock == 0 (or None) with open(head + "__HEADER.info", "w") as info_file: info_file.write( "close_err close_rms score0 score0sym filter zheight zradius " + "radius porosity nc nc_wo_jct n_nb bases_str fname nchain chain_len " + "splicepoints ibblocks ivertex") N = len(ssdag.verts) info_file.write(" seg0_pdb_0 seg0_exit") for i in range(1, N - 1): info_file.write(" seg%i_enter seg%i_pdb seg%i_exit" % (i, i, i)) info_file.write(" seg%i_enter seg%i_pdb" % (N - 1, N - 1)) info_file.write("\n") if output_from_pose: info_file = None nresults = 0 Ntotal = min(max_output, len(result.idx)) for iresult in range(Ntotal): if only_outputs and iresult not in only_outputs: print('output skipping', iresult) continue if False: # make json files with bblocks for single result tmp, seenit = list(), set() for j in range(len(ssdag.verts)): v = ssdag.verts[j] ibb = v.ibblock[result.idx[iresult, j]] bb = ssdag.bbs[j][ibb] fname = str(bytes(bb.file), 'utf-8') if fname not in seenit: for e in db[0]._alldb: if e['file'] == fname: tmp.append(e) seenit.add(fname) import json jsonfname = 'tmp_%i.json' % iresult print('output bblocks to', jsonfname) with open(jsonfname, 'w') as out: json.dump(tmp, out) # print(getmem(), 'MEM ================ top of loop ===============') if iresult % 100 == 0: process = psutil.Process(os.getpid()) gc.collect() mem_before = process.memory_info().rss / float(2**20) db[0].clear() gc.collect() mem_after = process.memory_info().rss / float(2**20) print("clear db", mem_before, mem_after, mem_before - mem_after) if iresult % 10 == 0: process = psutil.Process(os.getpid()) if hasattr(db[0], "_poses_cache"): print( f"mbb{merge_bblock:04} dumping results {iresult} of {Ntotal}", "pose_cache", sys.getsizeof(db[0]._poses_cache), len(db[0]._poses_cache), f"{process.memory_info().rss / float(2**20):,}mb", ) bases = ssdag.get_bases(result.idx[iresult]) bases_str = ",".join(bases) if no_duplicate_bases: if criteria.is_cyclic: bases = bases[:-1] for null_name in null_base_names: while null_name in bases: bases.remove(null_name) bases_uniq = set(bases) nbases = len(bases) if len(bases_uniq) != nbases: if criteria.is_cyclic: bases[-1] = "(" + bases[-1] + ")" print("duplicate bases fail", merge_bblock, iresult, bases) continue try: # print(getmem(), 'MEM make_pose_crit before') pose, prov = make_pose_crit( db[0], ssdag, criteria, result.idx[iresult], result.pos[iresult], only_connected=output_only_connected, provenance=True, # full_output_segs=[0], ) # print(getmem(), 'MEM make_pose_crit after') except ValueError as e: print("error in make_pose_crit:") print(e) continue # print(getmem(), 'MEM dbfilters before') try: ( jstr, jstr1, filt, grade, sp, mc, mcnh, mhc, nc, ncnh, nhc, ) = run_db_filters(db, criteria, ssdag, iresult, result.idx[iresult], pose, prov, **kw) except Exception as e: print("error in db_filters:") print(traceback.format_exc()) print(e) continue # print(getmem(), 'MEM dbfilters after') if output_only_AAAA and grade != "AAAA": print(f"mbb{merge_bblock:04} {iresult:06} bad grade", grade) continue # print(getmem(), 'MEM rms before') rms = criteria.iface_rms(pose, prov, **kw) # if rms > rms_err_cut: continue # print(getmem(), 'MEM rms after') # print(getmem(), 'MEM poses and score0 before') cenpose = pose.clone() ros.core.util.switch_to_residue_type_set(cenpose, "centroid") score0 = sf(cenpose) # print(getmem(), 'MEM poses and score0 after') if score0 > max_score0: print( f"mbb{merge_bblock:04} {iresult:06} score0 fail", merge_bblock, iresult, "score0", score0, "rms", rms, "grade", grade, ) continue symfilestr = None if hasattr(criteria, "symfile_modifiers"): symdata, symfilestr = util.get_symdata_modified( criteria.symname, **criteria.symfile_modifiers(segpos=result.pos[iresult]), ) else: symdata = util.get_symdata(criteria.symname) # print(getmem(), 'MEM poses and score0sym before') if symdata: sympose = cenpose.clone() # if pose.pdb_info() and pose.pdb_info().crystinfo().A() > 0: # ros.protocols.cryst.MakeLatticeMover().apply(sympose) # else: ros.core.pose.symmetry.make_symmetric_pose(sympose, symdata) score0sym = sfsym(sympose) if full_score0sym: sym_asym_pose = sympose.clone() ros.core.pose.symmetry.make_asymmetric_pose(sym_asym_pose) score0sym = sf(sym_asym_pose) # print(getmem(), 'MEM poses and score0sym after') if score0sym >= max_score0sym: print( f"mbb{merge_bblock:06} {iresult:04} score0sym fail", score0sym, "rms", rms, "grade", grade, ) continue else: score0sym = -1 mbbstr = "None" if merge_bblock is not None: mbbstr = f"{merge_bblock:4d}" # print(getmem(), 'MEM chains before') chains = pose.split_by_chain() chain_info = "%4d " % (len(list(chains))) chain_info += "-".join(str(len(c)) for c in chains) # print(getmem(), 'MEM chains after') # print(getmem(), 'MEM get_affected_positions before') mod, new, lost, junct = get_affected_positions(cenpose, prov) # print(getmem(), 'MEM get_affected_positions after') if output_short_fnames: fname = "%s_%04i" % (head, iresult) else: jpos = "-".join(str(x) for x in junct) fname = "%s_%04i_%s_%s_%s" % (head, iresult, jpos, jstr[:200], grade) # report bblock ids, taking into account merge_bblock shenani ibblock_list = [ str(v.ibblock[i]) for i, v in zip(result.idx[iresult], ssdag.verts) ] mseg = kw["merge_segment"] mseg = criteria.merge_segment(**kw) if mseg is None else mseg mseg = mseg or 0 # 0 if None # print("!!!!!!!", merge_bblock, "mseg", mseg, ibblock_list) ibblock_list[mseg] = str(merge_bblock) if not info_file: d = os.path.dirname(output_prefix) if d != "" and not os.path.exists(d): os.makedirs(d) info_file = open(f"{output_prefix}{mbb}.info", "w") info_file.write( "%5.2f %5.2f %7.2f %7.2f %-8s %5.1f %5.1f %5.1f %5.3f %4d %4d %4d %s %-80s %s %s %s %s %s\n" % ( result.err[iresult], rms, score0, score0sym, grade, result.zheight[iresult], result.zradius[iresult], result.radius[iresult], result.porosity[iresult], mc, mcnh, mhc, bases_str, fname, chain_info, "-".join([str(x) for x in sp]), "-".join(ibblock_list), "-".join(str(x) for x in result.idx[iresult]), jstr1, )) info_file.flush() # print(getmem(), 'MEM dump pdb before') if symdata and output_symmetric: sympose.dump_pdb(fname + "_sym.pdb") if output_centroid: pose = cenpose print("solution", fname) pose.dump_pdb(fname + "_asym.pdb") if symfilestr is not None: with open(fname + ".sym", "w") as out: out.write(symfilestr) nresults += 1 commas = lambda l: ",".join(str(_) for _ in l) with open(fname + "_asym.pdb", "a") as out: for ip, p in enumerate(prov): lb, ub, psrc, lbsrc, ubsrc = p out.write( f"Segment: {ip:2} resis {lb:4}-{ub:4} come from resis " + f"{lbsrc}-{ubsrc} of {psrc.pdb_info().name()}\n") nchain = pose.num_chains() out.write("Bases: " + bases_str + "\n") out.write("Modified positions: " + commas(mod) + "\n") out.write("New contact positions: " + commas(new) + "\n") out.write("Lost contact positions: " + commas(lost) + "\n") out.write("Junction residues: " + commas(junct) + "\n") out.write("Length of asymetric unit: " + str(len(pose.residues)) + "\n") out.write("Number of chains in ASU: " + str(nchain) + "\n") out.write("Closure error: " + str(rms) + "\n") # print(getmem(), 'MEM dump pdb after') if info_file is not None: info_file.close() else: nresults = 0 for iresult in range(min(max_output, len(result.idx))): fname = "%s_%04i" % (head, iresult) print(result.err[iresult], fname) graph_dump_pdb( fname + ".pdb", ssdag, result.idx[iresult], result.pos[iresult], join="bb", trim=True, ) nresults += 1 if nresults: return ["nresults output" + str(nresults)] else: return []
def make_peace(spec, cart_resl, ori_resl, clash_check, dump_pdb, **kw): binner = gu_xbin_indexer(cart_resl, ori_resl) numba_binner = numba_xbin_indexer(cart_resl, ori_resl) bbdb = kw["db"][0] bbspec, crit = spec ################ outer cycle ############### touter = time() ot_graph, ot_rslt, ot_crit = outside_grow(spec, **kw) rescore = ot_crit.score(ot_rslt.pos.swapaxes(0, 1)) assert np.max(rescore[rescore < 9999]) <= 1.0 ot_rslt = subset_result(ot_rslt, rescore <= 1.0) ntot = len(ot_rslt.idx) keys, hash_table = _make_hash_table(ot_graph, ot_rslt, binner) print( " nresult outer", len(ot_rslt.idx), "unique hashes", len(set(keys)), f"{int(ot_rslt.stats.total_samples[0] / (time() - touter)):,}/s", f"redundancy {ot_rslt.stats.n_redundant_results[0]/len(ot_rslt.idx):5.2f}", ) ################ spokes ############### tinner = time() in_graph, in_rslt = inside_grow(spec, binner=numba_binner, table=hash_table, **kw) print( " nresults inner", len(in_rslt.idx), f"rate {int(in_rslt.stats.total_samples[0] / (time() - tinner)):,}/s" f"redundancy {in_rslt.stats.n_redundant_results[0]/len(in_rslt.idx):5.2f}", ) ################ merged ############### ssdag = simple_search_dag( bbspec, modbbs=modsinglebb((spec[1].from_seg, ), kw["i_merge_bblock"]), make_edges=False, **kw, ) print("whole:", spec[0]) rslt, imerge = merge_results(ssdag, crit, in_rslt, in_graph, ot_rslt, ot_graph, binner, hash_table) ntot = len(rslt.idx) tclash = time() rslt = prune_clashes(ssdag, crit, rslt, at_most=10000, thresh=3.0, **kw) tclash = time() - tclash print(" nresults", len(rslt.idx), "withclash", ntot, "clashrate", ntot / tclash) symdata = util.get_symdata("C" + str(crit.nfold)) for i in range(min(999, len(rslt.idx))): pose = make_pose_crit(bbdb, ssdag, crit, rslt.idx[i], rslt.pos[i], only_connected="auto") ros.core.util.switch_to_residue_type_set(pose, "centroid") ros.core.pose.symmetry.make_symmetric_pose(pose, symdata) pose.dump_pdb("whole_%03i.pdb" % i)
def sympose( self, which, score=False, provenance=False, fullatom=False, asym_score_thresh=50, min_cell_spacing=130, *, parallel=False ): # yapf: disable """TODO: Summary Args: which (TYPE): Description score (bool, optional): Description provenance (bool, optional): Description fullatom (bool, optional): Description parallel (bool, optional): Description asym_score_thresh (int, optional): Description Returns: TYPE: Description Raises: IndexError: Description """ if isinstance(which, Iterable): which = list(which) if not all(0 <= i < len(self) for i in which): raise IndexError("invalid worm index") args = ( self.sympose, which, it.repeat(score), it.repeat(provenance), it.repeat(fullatom), it.repeat(asym_score_thresh), it.repeat(min_cell_spacing), ) if parallel: with ThreadPoolExecutor() as pool: result = pool.map(*args) return list(result) else: return list(map(*args)) if not 0 <= which < len(self): raise IndexError("invalid worm index") posecall = self.pose(which, provenance=True) if not posecall: return None p, prov = posecall if fullatom: pfull = p.clone() pcen = p ros.core.util.switch_to_residue_type_set(pcen, "centroid") if self.score0(pcen) > asym_score_thresh: return None if self.criteria.symfile_modifiers: symdata = util.get_symdata_modified( self.criteria.symname, **self.criteria.symfile_modifiers(segpos=self.positions[which])) else: symdata = util.get_symdata(self.criteria.symname) sfxn = self.score0sym if 0: # pcen.pdb_info() and pcen.pdb_info().crystinfo().A() > 0: if pcen.pdb_info().crystinfo().A() > min_cell_spacing: ros.protocols.cryst.MakeLatticeMover().apply(pcen) if self.score0sym(pcen) > 500: return None else: return None elif symdata is None: sfxn = self.score0 else: ros.core.pose.symmetry.make_symmetric_pose(pcen, symdata) if fullatom: pfull = pcen.clone() ros.core.util.switch_to_residue_type_set(pfull, "fa_standard") p = pfull else: p = pcen if score and provenance: return p, sfxn(pcen), prov if score: return p, sfxn(pcen) if provenance: return p, prov return p
def filter_and_output_results(criteria, ssdag, result, output_from_pose, merge_bblock, db, output_symmetric, output_centroid, output_prefix, max_output, max_score0, rms_err_cut, no_duplicate_bases, output_only_AAAA, **kw): sf = ros.core.scoring.ScoreFunctionFactory.create_score_function('score0') sfsym = ros.core.scoring.symmetry.symmetrize_scorefunction(sf) mbb = '' if merge_bblock is not None: mbb = f'_mbb{merge_bblock:04d}' head = f'{output_prefix}{mbb}' if mbb and output_prefix[-1] != '/': head += '_' if not merge_bblock: # do this once per run, at merge_bblock == 0 (or None) with open(head + '__HEADER.info', 'w') as info_file: info_file.write( 'close_err close_rms score0 score0sym filter zheight zradius radius nc nc_wo_jct n_nb Name chain_info [exit_pdb exit_resN entrance_resN entrance_pdb] jct_res \n' ) # make json files with bblocks for single result # tmp, seenit = list(), set() # for j in range(len(ssdag.verts)): # v = ssdag.verts[j] # ibb = v.ibblock[result.idx[iresult, j]] # bb = ssdag.bbs[j][ibb] # fname = str(bytes(bb.file), 'utf-8') # if fname not in seenit: # for e in db[0]._alldb: # if e['file'] == fname: # tmp.append(e) # seenit.add(fname) # import json # with open('tmp_%i.json' % iresult, 'w') as out: # json.dump(tmp, out) if output_from_pose: info_file = None nresults = 0 Ntotal = min(max_output, len(result.idx)) for iresult in range(Ntotal): # print(getmem(), 'MEM ================ top of loop ===============') if iresult % 100 == 0: process = psutil.Process(os.getpid()) gc.collect() mem_before = process.memory_info().rss / float(2**20) pym_before = asizeof(db[0]) / float(2**20) db[0].clear() pym_after = asizeof(db[0]) / float(2**20) gc.collect() mem_after = process.memory_info().rss / float(2**20) print('clear db', mem_before, mem_after, mem_before - mem_after, 'pympler', pym_before, pym_after, pym_before - pym_after) if iresult % 10 == 0: process = psutil.Process(os.getpid()) if hasattr(db[0], '_poses_cache'): print( f'mbb{merge_bblock:04} dumping results {iresult} of {Ntotal}', 'pose_cache', sys.getsizeof(db[0]._poses_cache), len(db[0]._poses_cache), f'{process.memory_info().rss / float(2**20):,}mb') bases = ssdag.get_bases(result.idx[iresult]) bases_str = ','.join(bases) if no_duplicate_bases: if '' in bases: bases.remove('') if 'n/a' in bases: bases.remove('n/a') bases_uniq = set(bases) nbases = len(bases) if criteria.is_cyclic: nbases -= 1 if len(bases_uniq) != nbases: if criteria.is_cyclic: bases[-1] = '(' + bases[-1] + ')' print('duplicate bases fail', merge_bblock, iresult, bases) continue # print(getmem(), 'MEM make_pose_crit before') pose, prov = make_pose_crit( db[0], ssdag, criteria, result.idx[iresult], result.pos[iresult], only_connected='auto', provenance=True, ) # print(getmem(), 'MEM make_pose_crit after') # print(getmem(), 'MEM dbfilters before') try: (jstr, jstr1, filt, grade, sp, mc, mcnh, mhc, nc, ncnh, nhc) = run_db_filters(db, criteria, ssdag, iresult, result.idx[iresult], pose, prov, **kw) except Exception as e: print('error in db_filters:') print(traceback.format_exc()) print(e) continue # print(getmem(), 'MEM dbfilters after') if output_only_AAAA and grade != 'AAAA': # print(f'mbb{merge_bblock:04} {iresult:06} bad grade', grade) continue # print(getmem(), 'MEM rms before') rms = criteria.iface_rms(pose, prov, **kw) # if rms > rms_err_cut: continue # print(getmem(), 'MEM rms after') # print(getmem(), 'MEM poses and score0 before') cenpose = pose.clone() ros.core.util.switch_to_residue_type_set(cenpose, 'centroid') score0 = sf(cenpose) # print(getmem(), 'MEM poses and score0 after') if score0 > max_score0: print(f'mbb{merge_bblock:04} {iresult:06} score0 fail', merge_bblock, iresult, 'score0', score0, 'rms', rms, 'grade', grade) continue if hasattr(criteria, 'symfile_modifiers'): symdata = util.get_symdata_modified( criteria.symname, **criteria.symfile_modifiers(segpos=result.pos[iresult])) else: symdata = util.get_symdata(criteria.symname) # print(getmem(), 'MEM poses and score0sym before') sympose = cenpose.clone() # if pose.pdb_info() and pose.pdb_info().crystinfo().A() > 0: # ros.protocols.cryst.MakeLatticeMover().apply(sympose) # else: ros.core.pose.symmetry.make_symmetric_pose(sympose, symdata) score0sym = sfsym(sympose) # print(getmem(), 'MEM poses and score0sym after') if score0sym >= 2.0 * max_score0: print(f'mbb{merge_bblock:06} {iresult:04} score0sym fail', score0sym, 'rms', rms, 'grade', grade) continue mbbstr = 'None' if merge_bblock is not None: mbbstr = f'{merge_bblock:4d}' # print(getmem(), 'MEM chains before') chains = pose.split_by_chain() chain_info = '%4d ' % (len(list(chains))) chain_info += '-'.join(str(len(c)) for c in chains) # print(getmem(), 'MEM chains after') # print(getmem(), 'MEM get_affected_positions before') mod, new, lost, junct = get_affected_positions(cenpose, prov) # print(getmem(), 'MEM get_affected_positions after') jpos = '-'.join(str(x) for x in junct) fname = '%s_%04i_%s_%s_%s' % (head, iresult, jpos, jstr[:200], grade) if not info_file: info_file = open(f'{output_prefix}{mbb}.info', 'w') info_file.write( '%5.2f %5.2f %7.2f %7.2f %-8s %5.1f %5.1f %5.1f %4d %4d %4d %s %-80s %s %s %s \n' % (result.err[iresult], rms, score0, score0sym, grade, result.zheight[iresult], result.zradius[iresult], result.radius[iresult], mc, mcnh, mhc, bases_str, fname, chain_info, jstr1, sp)) info_file.flush() # print(getmem(), 'MEM dump pdb before') if output_symmetric: sympose.dump_pdb(fname + '_sym.pdb') if output_centroid: pose = cenpose pose.dump_pdb(fname + '_asym.pdb') nresults += 1 commas = lambda l: ','.join(str(_) for _ in l) with open(fname + '_asym.pdb', 'a') as out: for ip, p in enumerate(prov): lb, ub, psrc, lbsrc, ubsrc = p out.write( f'Segment: {ip:2} resis {lb:4}-{ub:4} come from resis ' + f'{lbsrc}-{ubsrc} of {psrc.pdb_info().name()}\n') nchain = pose.num_chains() out.write('Bases: ' + bases_str + '\n') out.write('Modified positions: ' + commas(mod) + '\n') out.write('New contact positions: ' + commas(new) + '\n') out.write('Lost contact positions: ' + commas(lost) + '\n') out.write('Junction residues: ' + commas(junct) + '\n') out.write('Length of asymetric unit: ' + str(len(pose)) + '\n') out.write('Number of chains in ASU: ' + str(nchain) + '\n') out.write('Closure error: ' + str(rms) + '\n') # print(getmem(), 'MEM dump pdb after') if info_file is not None: info_file.close() else: nresults = 0 for iresult in range(min(max_output, len(result.idx))): fname = '%s_%04i' % (head, iresult) graph_dump_pdb(fname + '.pdb', ssdag, result.idx[iresult], result.pos[iresult], join='bb', trim=True) nresults += 1 if nresults: return ['nresults output' + str(nresults)] else: return []