def erbb2_mutants(args): """ draw what rosetta thinks about assaf's mutants of ErbB2 """ score_dir = "/home/labs/fleishman/jonathaw/elazaridis/fold_and_dock/" + \ "erbb2/mutations/all_results/" exp_table = "/home/labs/fleishman/jonathaw/elazaridis/fold_and_dock/" + \ "erbb2/mutations/general_data/mut_table.txt" exp_df = parse_erbb2_exp_table(exp_table) wt_score_file = score_dir + "all_erbb2v4_wt_28Feb.score" wt_df = Rf.score_file2df(wt_score_file) wt_ddg = Rf.get_term_by_threshold(wt_df, 'score', 5, 'a_ddg', 'mean') exp_df['rosetta'] = np.nan # exp_df['rosetta_score'] = np.nan for sc_file in [a for a in os.listdir(score_dir) if '.score' in a and 'wt' not in a]: df = Rf.score_file2df(score_dir+sc_file) ddg = Rf.get_term_by_threshold(df, 'score', 5, 'a_ddg', 'mean') # scr = Rf.get_term_by_threshold(df, 'score', 5, 'score', 'mean') name = sc_file.split('_')[2] # print(sc_file, name) wt = name[0] pos = int(name[1:-1]) mut = name[-1] exp_df.set_value((exp_df['pos'] == pos) & (exp_df['wt'] == wt) & (exp_df['mut'] == mut), 'rosetta', ddg-wt_ddg) print(exp_df) exp_df = exp_df.dropna() print(exp_df.to_string()) plt.scatter(exp_df['rosetta'], exp_df['exp']) plt.ylabel('experimental ∆∆G') plt.xlabel('rosetta ∆∆G') plt.axhline(0) plt.axvline(0) for i, row in exp_df.iterrows(): plt.annotate('%s%i%s' % (row['wt'], row['pos'], row['mut']), (row['rosetta'], row['exp'])) plt.show()
def mutant_table( args: dict ): """ a function to find and display the correlation between ResSolv and MPFrameWork and experimental results from both Doung 2006 and Assaf """ scores_dir = '/home/labs/fleishman/jonathaw/elazaridis/fold_and_dock/gpa/mutant_results/%s' % args['dir'] mp_dir = '/home/labs/fleishman/jonathaw/elazaridis/fold_and_dock/gpa/mutant_results/mpframework_18Dec/' main_df = pd.read_csv("/home/labs/fleishman/jonathaw/elazaridis/" + "fold_and_dock/gpa/mutant_results/" + "experimental_results.tsv", sep='\s+') wt_beta_score_file = [a for a in os.listdir(scores_dir) if 'wt' in a and '.score' in a][0] wt_beta_df = Rf.score_file2df(scores_dir + '/' + wt_beta_score_file) wt_beta_ddg = Rf.get_term_by_threshold(wt_beta_df, 'score', 5, 'a_ddg', 'mean') wt_mp_df = Rf.score_file2df('%sall_gpav1_wt_mpframework_25Oct.score' % mp_dir) wt_mp_ddg = Rf.get_term_by_threshold(wt_mp_df, 'score', 5, 'a_ddg', 'mean') results = {'rs': {}, 'mp': {}} for sc_file in [a for a in os.listdir(scores_dir)+os.listdir(mp_dir) if '.score' in a]: if 'mpframework' in sc_file: df = Rf.score_file2df('%s/%s' % (mp_dir, sc_file)) else: df = Rf.score_file2df('%s/%s' % (scores_dir, sc_file)) name = sc_file.split('_')[2] if '16Mar' in sc_file: name = '%s%i%s' % (name[0], int(name[1:-1])+72, name[-1]) # if name[-1] == 'M': continue # threshold = np.percentile(df['score'].values, 5) min_ddg = Rf.get_term_by_threshold(df, 'score', 5, 'a_ddg', 'mean') if 'mpframework' in sc_file: results['mp'][name] = min_ddg main_df.set_value(main_df['name'] == name, 'mp', min_ddg-wt_mp_ddg) else: results['rs'][name] = min_ddg main_df.set_value(main_df['name'] == name, 'rs', min_ddg-wt_beta_ddg) print(main_df) # main_df = main_df.dropna( how='any' ) args['logger'].log(main_df) if args['all4']: fig = plt.figure(figsize=(10, 10), facecolor='w') i = 1 for scfxn in ['rs', 'mp']: for exp in ['dstbl', 'Doung']: ax = plt.subplot(2, 2, i) model = linear_model.LinearRegression() model.fit(main_df[scfxn].to_frame(), main_df[exp].to_frame()) line_x = np.linspace(main_df[scfxn].min(), main_df[scfxn].max()) line_y = model.predict(line_x[:, np.newaxis]) r2 = r2_score(main_df[exp].values, model.predict(main_df[scfxn].to_frame())) plt.scatter(main_df[scfxn], main_df[exp]) plt.plot(line_x, line_y) scfxn_name = 'ResSolv' if scfxn == 'rs' else 'MPFrameWork' exp_name = 'Doung 2006' if exp == 'Doung' else r'dsT$\beta$L' plt.title('%s Vs. %s' % (scfxn_name, exp_name)) plt.text(0.8, 0.1, r'$R^2=%.2f$' % r2, fontsize=15, horizontalalignment='center', verticalalignment='center', transform=ax.transAxes) plt.axhline(0, color='k') plt.axvline(0, color='k') if i == 3: plt.xlabel('Rosetta ∆∆G', fontsize=18) plt.ylabel('Experimental ∆∆G', fontsize=18) i += 1 plt.show() else: fig = plt.figure(facecolor='w') ax1 = plt.subplot(1, 2, 1) model = linear_model.LinearRegression() rs_df = main_df[['name', 'dstbl', 'rs']].dropna(how='any') model.fit(rs_df['rs'].to_frame(), rs_df['dstbl'].to_frame()) line_x = np.linspace(rs_df['rs'].min(), rs_df['rs'].max()) line_y = model.predict(line_x[:, np.newaxis]) r2 = r2_score(rs_df['dstbl'].values, model.predict(rs_df['rs'].to_frame())) plt.scatter(rs_df['rs'], rs_df['dstbl']) plt.plot(line_x, line_y) plt.title('%s Vs. %s' % ('ResSolv', r'dsT$\beta$L')) plt.text(0.8, 0.1, r'$R^2=%.2f$' % r2, fontsize=15, horizontalalignment='center', verticalalignment='center', transform=ax1.transAxes) plt.axhline(0, color='k') plt.axvline(0, color='k') plt.xlabel('Rosetta ∆∆G', fontsize=18) plt.ylabel(r'dsT$\beta$L experimental results', fontsize=18) for x, y, n in zip(rs_df['rs'], rs_df['dstbl'], rs_df['name']): ax1.annotate(n, (x, y)) ax2 = plt.subplot(1, 2, 2) model = linear_model.LinearRegression() mp_df = main_df[['name', 'dstbl', 'mp']].dropna(how='any') model.fit(mp_df['mp'].to_frame(), mp_df['dstbl'].to_frame()) line_x = np.linspace(mp_df['mp'].min(), mp_df['mp'].max()) line_y = model.predict(line_x[:, np.newaxis]) r2 = r2_score(mp_df['dstbl'].values, model.predict(mp_df['mp'].to_frame())) plt.scatter(mp_df['mp'], mp_df['dstbl']) plt.plot(line_x, line_y) plt.title('%s Vs. %s' % ('MPFrameWork', r'dsT$\beta$L')) plt.text(0.8, 0.1, r'$R^2=%.2f$' % r2, fontsize=15, horizontalalignment='center', verticalalignment='center', transform=ax2.transAxes) plt.axhline(0, color='k') plt.axvline(0, color='k') # plt.xlabel( 'Rosetta ∆∆G', fonctsize=18 ) # plt.ylabel( r'dsT$\beta$L experimental results', fonctsize=18 ) plt.show() plt.savefig('%s/dsTbL_alone.pdf' % scores_dir)