def main(): parser = argparse.ArgumentParser() parser.add_argument('-mode', default='q') parser.add_argument('-pc_old') parser.add_argument('-score_old') parser.add_argument('-obj_old') parser.add_argument('-pc_new') parser.add_argument('-score_new') parser.add_argument('-obj_new') parser.add_argument('-pc_wj') parser.add_argument('-score_wj') parser.add_argument('-obj_wj') parser.add_argument('-sc') parser.add_argument('-mp_sc') parser.add_argument('-rs_sc') parser.add_argument('-pc') parser.add_argument('-mp_pc') parser.add_argument('-rs_pc') parser.add_argument('-span_threshold', default=0.3, type=float) parser.add_argument('-names', default=None, help='file list of names to show') parser.add_argument('-log_path', default='./', help='path to place log file') parser.add_argument('-percent', type=int, default=100) parser.add_argument('-best', type=bool, default=False) parser.add_argument('-terms', nargs='+', default=['score', 'a_shape', 'a_pack', 'a_ddg', 'res_solv']) parser.add_argument('-threshold', type=int, default=5) parser.add_argument('-show', default='show') args = vars(parser.parse_args()) args['logger'] = Logger('logeer_%s.log' % time.strftime("%d.%0-m"), args['log_path']) if args['mode'] == 'old': analyse_old(args) elif args['mode'] == 'new': analyse_new(args) elif args['mode'] == 'wj': analyse_wj(args) elif args['mode'] == 'q': quick_rmsd_total(args) elif args['mode'] == 'slider': slide_ddg(args) elif args['mode'] == 's_by_s': side_by_side(args) elif args['mode'] == 'test': sc_df = Rf.score_file2df(args['sc']) new = Rf.get_best_of_best(sc_df) else: print('no mode') args['logger'].close()
def quick_rmsd_total(args): y_axis_term = 'score' sc_df = Rf.score_file2df(args['sc'], args['names']) args['logger'].log('found %i structs in sc_df' % len(sc_df)) pc_df = get_rmsds_from_table(args['pc']) args['logger'].log('found %i structs in pc' % len(pc_df)) a = sc_df.merge(pc_df, on='description') sc_df = a.copy() # if 'a_hha' in sc_df.columns: # sc_df['angle'] = sc_df['a_hha'] > 0 args['logger'].log('left with %i in merged df' % len(sc_df)) args['logger'].log('examining %s with span_topo threshold %f' % (args['sc'], args['span_threshold'])) fig, ax = plt.subplots() if args['best']: sc_df = sc_df[sc_df['a_tms_span_fa'] > 0.5 ] threshold = np.percentile(sc_df[y_axis_term], args['percent']) sc_df = sc_df[ sc_df[y_axis_term] < threshold ] sc_df = sc_df[ sc_df['a_span_topo'] >= 0.99 ] sc_df_pass = Rf.get_best_of_best(sc_df, args['terms'], args['threshold']) sc_df_fail = sc_df[ ~sc_df['description'].isin( sc_df_pass['description'] ) ] args['logger'].log('%i models returned from BEST' % len(sc_df_pass)) else: args['logger'].log('total of %i models in score' % len(sc_df)) sc_df = sc_df[sc_df['a_tms_span_fa'] > 0.5] args['logger'].log('%i models pass tms_span' % len(sc_df)) threshold = np.percentile(sc_df[y_axis_term], args['percent']) sc_df = sc_df[ sc_df[y_axis_term] < threshold ] args['logger'].log('for percent %.2f found threshold to be %.2f and %i strucutres pass it' % (args['percent'], threshold, len(sc_df))) sc_df = sc_df[sc_df['a_shape'] >= 0.6] sc_df = sc_df[sc_df['a_sasa'] > 700] args['logger'].log('%i passed sasa 600' % len(sc_df)) sc_df = sc_df[sc_df['a_ddg'] < -5] args['logger'].log('%i passed ddg' % len(sc_df)) # sc_df = sc_df[sc_df['a_pack'] > 0.6] sc_df = sc_df[sc_df['a_unsat'] < 1] args['logger'].log('%i passed unsat' % len(sc_df)) sc_df['pass'] = sc_df['a_span_topo'] > args['span_threshold'] sc_df = sc_df[sc_df['a_res_solv'] < -10] args['logger'].log('%i passed res_solv -10' % len(sc_df)) sc_df_pass = sc_df[sc_df['a_span_topo'] > args['span_threshold']] args['logger'].log('%i models passed span_topo threshold' % len(sc_df_pass)) sc_df_fail = sc_df[sc_df['a_span_topo'] <= args['span_threshold']] args['logger'].log('%i models failed span_topo threshold' % len(sc_df_fail)) # ax.scatter(sc_df_fail['rmsd_calc'].values, sc_df_fail['score'].values, color='r', marker='.') x_array = np.ndarray(buffer=sc_df_pass['pc_rmsd'].values, shape=(len(sc_df),)) y_array = np.ndarray(buffer=sc_df_pass[y_axis_term].values, shape=(len(sc_df))) if 'a_hha' in sc_df.columns: ax.scatter(sc_df_pass['pc_rmsd'].values, sc_df_pass[y_axis_term].values, marker='o', c=sc_df_pass['a_hha'].values, picker=True, cmap=plt.cm.coolwarm) else: ax.scatter(sc_df_pass['pc_rmsd'].values, sc_df_pass[y_axis_term].values, marker='o', c=sc_df_pass['a_span_topo'].values, picker=True, cmap=plt.cm.coolwarm) # min_energy = np.nanmin(list(sc_df_pass['score'].values)+list(sc_df_fail['score'].values)) min_energy = np.nanmin(list(sc_df_pass[y_axis_term].values)) max_energy = np.nanmax(list(sc_df_pass[y_axis_term].values)) plt.ylim([min_energy - 1, max_energy + 1]) plt.xlim([0, 15]) plt.title(args['sc']+'_pass') z_score, rmsd_threshold = Rf.get_z_score_by_rmsd_percent(sc_df_pass) plt.text(0.75, 0.2, "Zscore=%.2f" % z_score, transform=ax.transAxes) plt.axvline(rmsd_threshold) # if 'a_hha' in sc_df.columns: # ax.scatter(sc_df_fail['pc_rmsd'].values, sc_df_fail[y_axis_term].values, marker='x', # c=sc_df_fail['a_hha'].values, picker=True, cmap=plt.cm.coolwarm, s=5, alpha=90)#, markersize=200) # else: # ax.scatter(sc_df_fail['pc_rmsd'].values, sc_df_fail[y_axis_term].values, marker='x', # c=sc_df_fail['a_span_topo'].values, picker=True, cmap=plt.cm.coolwarm, s=5, alpha=90)#, markersize=200) # af = PrintLabel(sc_df_pass, 'rmsd_calc', 'score', ['description', 'pass']) # fig.canvas.mpl_connect('button_press_event', af) point_label_cols = list(set(args['terms'] + ['description', 'a_sasa', 'a_res_solv', 'a_pack', 'a_span_topo', 'a_ddg', 'fa_elec'])) pl = PointLabel(sc_df_pass, ax, fig, 'pc_rmsd', y_axis_term, point_label_cols, args['logger']) # a_shape ??? fig.canvas.mpl_connect('pick_event', pl.onpick) plt.xlabel('RMSD') plt.ylabel(y_axis_term) if args['show'] == 'show': # fig.canvas.mpl_connect('pick_event', on_pick3) # cursor = FollowDotCursor(ax, sc_df_pass['pc_rmsd'], sc_df_pass[y_axis_term]) plt.show() else: plt.savefig('%s.png' % args['sc'].split('.score')[0])
def main(): parser = argparse.ArgumentParser() parser.add_argument('-sc', type=str, help='score file') parser.add_argument('-percent', type=float, default=5, help='percent (1-100) best scoring to get') parser.add_argument('-filter', type=str, default='score', help='filter or score term to use') parser.add_argument('-num', default=10, type=int, help='use if you want a number of results, not better than percentile') parser.add_argument('-mode', default='%') parser.add_argument('-over_under', type=str, default='under', help='under/over score should be over/under threshold') parser.add_argument('-result', type=str, default=None, help='should the names be written to a file separate from the log file') parser.add_argument('-terms', nargs='+', default=['score', 'a_shape', 'a_pack', 'a_ddg', 'res_solv']) parser.add_argument('-thresholds', nargs='+', type=float) parser.add_argument('-percentile', default=10, type=int) args = vars(parser.parse_args()) logger = Logger('top_%.1f_%s.log' % (args['percent'], args['filter'])) # read in the score file, determine the threshold for the percentile sc_df = Rf.score_file2df(args['sc']) score = sc_df[args['filter']] if args['mode'] == '%': threshold = np.percentile(score, args['percent']) logger.log('found a threshold for %f for filter %s to be %.2f' % (args['percent'], args['filter'], threshold)) # create a df for lines that pass the threshold, either over or above it... if args['over_under'] == 'over': pass_df = sc_df[sc_df[args['filter']] >= threshold] elif args['over_under'] == 'under': pass_df = sc_df[sc_df[args['filter']] <= threshold] if args['mode'] == 'num': sc_df.sort_values(args['filter'], inplace=True) pass_df = sc_df.head(args['num']) if args['mode'] == 'best_of_best': threshold = np.percentile(score, args['percent']) sc_df = sc_df[sc_df[args['filter']] <= threshold] pass_df = Rf.get_best_of_best(sc_df, args['terms'], args['percentile']) if args['mode'] == 'thresholds': for term, thrs in zip(args['terms'], args['thresholds']): if term in ['a_sasa', 'a_pack', 'a_shape', 'a_tms_span_fa', 'a_tms_span', 'a_span_topo']: sc_df = sc_df[sc_df[term] > thrs] elif term in ['a_mars', 'a_ddg', 'score', 'total_score', 'a_res_solv', 'a_span_ins']: sc_df = sc_df[sc_df[term] < thrs] threshold = np.percentile(score, args['percent']) pass_df = sc_df[sc_df[args['filter']] < threshold] # output the names (description) of models that pass the threshold, either to the logger file, or to a separate file if args['result'] is None: logger.create_header('models passing the threshold:') for idx, row in pass_df.iterrows(): logger.log('%s %f' % (row['description'], row['score']), skip_stamp=True) else: with open(args['result'], 'w+') as fout: for name in pass_df['description']: fout.write(name + '\n') logger.close()
def quick_rmsd_total(args): y_axis_term = 'score' sc_df = Rf.score_file2df(args['sc'], args['names']) args['logger'].log('found %i structs in sc_df' % len(sc_df)) pc_df = get_rmsds_from_table(args['pc']) args['logger'].log('found %i structs in pc' % len(pc_df)) a = sc_df.merge(pc_df, on='description') sc_df = a.copy() if 'a_hha' in sc_df.columns: sc_df['angle'] = sc_df['a_hha'] > 0 args['logger'].log('left with %i in merged df' % len(sc_df)) args['logger'].log('examining %s with span_topo threshold %f' % (args['sc'], args['span_threshold'])) fig, ax = plt.subplots() if args['best']: # sc_df = sc_df[ sc_df['a_span_topo'] >= 0.95 ] sc_df_pass = Rf.get_best_of_best(sc_df, args['terms'], args['threshold']) sc_df_fail = sc_df[ ~sc_df['description'].isin( sc_df_pass['description'] ) ] args['logger'].log('%i models returned from BEST' % len(sc_df_pass)) else: args['logger'].log('total of %i models in score' % len(sc_df)) sc_df = sc_df[sc_df['a_tms_span_fa'] > 0.5] args['logger'].log('%i models pass tms_span' % len(sc_df)) threshold = np.percentile(sc_df[y_axis_term], args['percent']) sc_df = sc_df[ sc_df[y_axis_term] < threshold ] args['logger'].log('for percent %.2f found threshold to be %.2f and %i strucutres pass it' % (args['percent'], threshold, len(sc_df))) sc_df = sc_df[sc_df['a_shape'] >= 0.6] # sc_df = sc_df[sc_df['a_sasa'] > 900] sc_df = sc_df[sc_df['a_ddg'] < -6] args['logger'].log('%i passed ddg' % len(sc_df)) # sc_df = sc_df[sc_df['a_pack'] > 0.6] sc_df = sc_df[sc_df['a_unsat'] < 1] args['logger'].log('%i passed unsat' % len(sc_df)) sc_df['pass'] = sc_df['a_span_topo'] > args['span_threshold'] sc_df_pass = sc_df[sc_df['a_span_topo'] > args['span_threshold']] args['logger'].log('%i models passed span_topo threshold' % len(sc_df_pass)) sc_df_fail = sc_df[sc_df['a_span_topo'] <= args['span_threshold']] args['logger'].log('%i models failed span_topo threshold' % len(sc_df_fail)) # ax.scatter(sc_df_fail['rmsd_calc'].values, sc_df_fail['score'].values, color='r', marker='.') if 'a_hha' in sc_df.columns: ax.scatter(sc_df_pass['pc_rmsd'].values, sc_df_pass[y_axis_term].values, marker='o', c=sc_df_pass['a_hha'].values, picker=True, cmap=plt.cm.coolwarm) else: ax.scatter(sc_df_pass['pc_rmsd'].values, sc_df_pass[y_axis_term].values, marker='o', c=sc_df_pass['a_span_topo'].values, picker=True, cmap=plt.cm.coolwarm) # min_energy = np.nanmin(list(sc_df_pass['score'].values)+list(sc_df_fail['score'].values)) min_energy = np.nanmin(list(sc_df_pass[y_axis_term].values)) max_energy = np.nanmax(list(sc_df_pass[y_axis_term].values)) plt.ylim([min_energy - 1, max_energy + 1]) plt.xlim([0, 30]) plt.title(args['sc']+'_pass') # if 'a_hha' in sc_df.columns: # ax.scatter(sc_df_fail['pc_rmsd'].values, sc_df_fail[y_axis_term].values, marker='x', # c=sc_df_fail['a_hha'].values, picker=True, cmap=plt.cm.coolwarm, s=5, alpha=90)#, markersize=200) # else: # ax.scatter(sc_df_fail['pc_rmsd'].values, sc_df_fail[y_axis_term].values, marker='x', # c=sc_df_fail['a_span_topo'].values, picker=True, cmap=plt.cm.coolwarm, s=5, alpha=90)#, markersize=200) # af = PrintLabel(sc_df_pass, 'rmsd_calc', 'score', ['description', 'pass']) # fig.canvas.mpl_connect('button_press_event', af) point_label_cols = list(set(args['terms'] + ['description', 'a_sasa', 'a_res_solv', 'a_pack', 'a_span_topo', 'a_ddg', 'fa_elec'])) pl = PointLabel(sc_df_pass, ax, fig, 'pc_rmsd', y_axis_term, point_label_cols, args['logger']) # a_shape ??? fig.canvas.mpl_connect('pick_event', pl.onpick) # print('for pass') # print_best_scores(sc_df_pass, 'score', percentile=0.05) # print('for. fail') # print_best_scores(sc_df_fail, 'score', percentile=0.05) plt.xlabel('RMSD') plt.ylabel(y_axis_term) if args['show'] == 'show': plt.show() else: plt.savefig('%s.png' % args['sc'].split('.score')[0])