def test_quality_plot(self): df3 = parse_rosetta_fragments(self.frag3).sample_top_neighbors() df9 = parse_rosetta_fragments(self.frag9) # auto-load df3 = df3.add_quality_measure(None) # load target quality file with pytest.raises(ValueError): df9 = df9.add_quality_measure(self.frag3q) df9 = df9.add_quality_measure(self.frag9q) assert df3.is_comparable(df9) is False assert 'rmsd' in df3 assert 'rmsd' in df9 consensus_seq = df9.select_quantile().quick_consensus_sequence() consensus_sse = df9.select_quantile( ).quick_consensus_secondary_structure() assert consensus_seq == "KIPVPVVVNGKIVAVVVVPPENLEEALLEALKELGLIKDPEEVKAVVVSPDGRLELSF" assert consensus_sse == "EEEEEEEELLEEEEEEEELLLLHHHHHHHHHHHHLLLLLLLLLLEEEEELLLEEEEEE" fig = plt.figure(figsize=(25, 10)) plot_fragment_profiles(fig, df3, df9, consensus_seq, consensus_sse) plt.tight_layout() return fig
def test_frequency_matrices_and_networks(self): df3 = parse_rosetta_fragments(self.frag3) df9 = parse_rosetta_fragments(self.frag9) # auto-load df3 = df3.add_quality_measure(None) # load target quality file df9 = df9.add_quality_measure(self.frag9q) matrix = df3.select_quantile(0.1).make_sequence_matrix() assert matrix.min().min() == -9 matrix = df9.select_quantile(0.1).make_sequence_matrix(frequency=True) G = df9.select_quantile(0.1).make_per_position_frequency_network() Gf = df9.select_quantile(0.1).make_frequency_network() assert matrix.shape == (58, 20) assert G.number_of_edges() > Gf.number_of_edges() value = 1 - G.get_edge_data("0X", "1A")['weight'] assert matrix["A"].values[0] == pytest.approx(value) n = 6 target = str(n + 1) + "R" for aa in list("ARNDCQEGHILKMFPSTWYV"): origin = str(n) + aa if origin in G: value = 1 - G.get_edge_data(origin, target)['weight'] assert matrix["R"].values[n] == pytest.approx(value)
def test_concat_fragments(self): # load fragments _3mers = parse_rosetta_fragments(self.frag3) # make chunks _3mers_1 = _3mers[(_3mers['frame'] >= 9) & (_3mers['frame'] <= 12)] _3mers_2 = _3mers[(_3mers['frame'] >= 22) & (_3mers['frame'] <= 24)] _3mers_3 = _3mers[(_3mers['frame'] >= 45) & (_3mers['frame'] <= 46)] # mix fragments m = concat_fragments([_3mers_3, _3mers_1, _3mers_2]) # checkpoints assert len(m) == 5400 assert list(m.drop_duplicates('frame')['frame']) == list(range(1, 10)) f1 = NamedTemporaryFile(delete=False) f1.close() nonstrict = write_rosetta_fragments(m, 3, 300, f1.name, False) f2 = NamedTemporaryFile(delete=False) f2.close() isstrict = write_rosetta_fragments(m.renumber(10).top_limit(30), prefix=f2.name, strict=True) assert not parse_rosetta_fragments(nonstrict).is_comparable( parse_rosetta_fragments(isstrict))
def main( options ): # Read Fragment Files small_f = parse_rosetta_fragments(options.fsmall) large_f = parse_rosetta_fragments(options.flarge) # Read or calculate Fragment Quality small_f = small_f.add_quality_measure(options.qsmall, options.pdb) large_f = large_f.add_quality_measure(options.qlarge, options.pdb) # Plot fig = plt.figure(figsize=(40, 10) if options.format == "h" else (20, 20)) grid = (1, 2) if options.format == "h" else (2, 1) ax00 = plt.subplot2grid(grid, (0, 0), fig=fig) ax01 = plt.subplot2grid(grid, (0, 1) if options.format == "h" else (1, 0), fig=fig) ax00.yaxis.set_major_formatter(FormatStrFormatter('%.1f')) ax01.yaxis.set_major_formatter(FormatStrFormatter('%.1f')) plot_fragments(small_f, large_f, small_ax=ax00, large_ax=ax01, showfliers=False, titles="top" if options.format == "h" else "right") if options.format == "h": plt.tight_layout(pad=2) else: plt.tight_layout(rect=(0.037, 0, 1, 1)) # Write to file if options.ofile is not None: plt.savefig(options.ofile) # Show on screen if not options.silent: plt.show() return fig
def test_add_fragments_append(self): df = parse_rosetta_fragments(self.frag3) xx = df[(df['frame'] <= 10) & (df['neighbor'] <= 100)] dfrep = df.add_fragments(xx, 10, 'append') fig = plt.figure(figsize=(25, 10)) ax0 = plt.subplot2grid((2, 1), (0, 0)) prange = range(len(dfrep.groupby('frame'))) ax0.bar(prange, [max(y['neighbor']) for x, y in dfrep.groupby('frame')]) ax1 = plt.subplot2grid((2, 1), (1, 0)) ax1.bar( prange, [y['neighbors'].unique()[0] for x, y in dfrep.groupby('frame')]) plt.tight_layout() return fig
def make_fragment_files(self, dfloop: pd.DataFrame, edges: Dict, masfile: Path, no_loop: Optional[bool] = True) -> Dict: """Combin the fragments from the different matches. """ data = { 'loop_length': int(dfloop.iloc[0]['loop_length']), 'abego': list(dfloop['loop'].values), 'edges': edges, 'fragfiles': [], 'match_count': 0 } dfs3 = [] dfs9 = [] sample = math.ceil(200 / dfloop.shape[0]) if not no_loop: for i, row in dfloop.iterrows(): # Remember: MASTER match starts with 0! dfs3.append((parse_rosetta_fragments( str(row['3mers']), source=f'{row["pdb"]}_{row["chain"]}').slice_region( row['start'] + 1, row['stop'] + 1).sample_top_neighbors(sample).renumber( edges['ini']).top_limit(edges['end']))) dfs9.append((parse_rosetta_fragments( str(row['9mers']), source=f'{row["pdb"]}_{row["chain"]}').slice_region( row['start'] + 1, row['stop'] + 1).sample_top_neighbors(sample).renumber( edges['ini']).top_limit(edges['end']))) else: for i, row in dfloop.iterrows(): # Remember: MASTER match starts with 0! dfs3.append((parse_rosetta_fragments( str(row['3mers']), source=f'{row["pdb"]}_{row["chain"]}').slice_region( row['start'] + 1, row['stop'] + 1).sample_top_neighbors(sample).renumber( edges['ini']).top_limit(edges['end']))) dfs9.append((parse_rosetta_fragments( str(row['9mers']), source=f'{row["pdb"]}_{row["chain"]}').slice_region( row['start'] + 1, row['stop'] + 1).sample_top_neighbors(sample).renumber( edges['ini']).top_limit(edges['end']))) # Merge Fragments dfs3all = dfs3[0] dfs9all = dfs9[0] for i in range(1, len(dfs3)): dfs3all = dfs3all.add_fragments(dfs3[i], ini=edges['ini'], how='append') dfs9all = dfs9all.add_fragments(dfs9[i], ini=edges['ini'], how='append') dfs3all = dfs3all.sample_top_neighbors(200) dfs9all = dfs9all.sample_top_neighbors(200) # set up lord = int(dfloop.order.drop_duplicates().values[0]) nfolder = masfile.parent.absolute().joinpath(f'loop{int(lord):02d}') nfolder.mkdir(parents=True, exist_ok=True) masfile2 = str(nfolder.joinpath(f'jump{int(lord):02d}')) self.log.debug('Writing 3mers fragfile\n') #data['fragfiles'].append(write_rosetta_fragments(dfs3all, prefix=str(masfile.with_suffix('')), strict=True)) data['fragfiles'].append( write_rosetta_fragments(dfs3all, prefix=masfile2, strict=True)) self.log.debug(f'3mers fragfile: {data["fragfiles"][-1]}\n') self.log.debug('Writing 9mers fragfile\n') #data['fragfiles'].append(write_rosetta_fragments(dfs9all, prefix=str(masfile.with_suffix('')), strict=True)) data['fragfiles'].append( write_rosetta_fragments(dfs9all, prefix=masfile2, strict=True)) self.log.debug(f'9mers fragfile: {data["fragfiles"][-1]}\n') dfs3all.drop(columns=[ 'pdb', 'frame', 'neighbors', 'neighbor', 'aa', 'sse', 'phi', 'psi', 'omega' ]).to_csv(data['fragfiles'][0] + '.csv', index=False) dfs9all.drop(columns=[ 'pdb', 'frame', 'neighbors', 'neighbor', 'aa', 'sse', 'phi', 'psi', 'omega' ]).to_csv(data['fragfiles'][1] + '.csv', index=False) imageprefix = Path(masfile2).with_suffix('.fragprofile') TBPlot.plot_fragment_templates(self.log, dfs3all, dfs9all, imageprefix) return data, nfolder
def make_fragment_files(dfloop: pd.DataFrame, edges: Dict, masfile: Path) -> Dict: """ """ data = { 'loop_length': int(dfloop.iloc[0]['loop_length']), 'abego': list(dfloop['loop'].values), 'edges': edges, 'fragfiles': [], 'match_count': 0 } dfs3 = [] dfs9 = [] sample = math.ceil(200 / dfloop.shape[0]) for i, row in dfloop.iterrows(): # Remember: MASTER match starts with 0! dfs3.append((parse_rosetta_fragments( str(row['3mers']), source='{}_{}'.format(row['pdb'], row['chain'])).slice_region( row['match'][0][0] + 1, row['match'][1][1] + 1).sample_top_neighbors(sample).renumber( edges['ini']).top_limit(edges['end']))) dfs9.append((parse_rosetta_fragments( str(row['9mers']), source='{}_{}'.format(row['pdb'], row['chain'])).slice_region( row['match'][0][0] + 1, row['match'][1][1] + 1).sample_top_neighbors(sample).renumber( edges['ini']).top_limit(edges['end']))) # Merge Fragments dfs3all = dfs3[0] dfs9all = dfs9[0] for i in range(1, len(dfs3)): dfs3all = dfs3all.add_fragments(dfs3[i], ini=edges['ini'], how='append') dfs9all = dfs9all.add_fragments(dfs9[i], ini=edges['ini'], how='append') dfs3all = dfs3all.sample_top_neighbors(200) dfs9all = dfs9all.sample_top_neighbors(200) if TBcore.get_option('system', 'debug'): sys.stdout.write('Writing 3mers fragfile\n') data['fragfiles'].append( write_rosetta_fragments(dfs3all, prefix=str(masfile.with_suffix('')), strict=True)) if TBcore.get_option('system', 'debug'): sys.stdout.write('3mers fragfile: {}\n'.format(data['fragfiles'][-1])) sys.stdout.write('Writing 9mers fragfile\n') data['fragfiles'].append( write_rosetta_fragments(dfs9all, prefix=str(masfile.with_suffix('')), strict=True)) if TBcore.get_option('system', 'debug'): sys.stdout.write('9mers fragfile: {}\n'.format(data['fragfiles'][-1])) dfs3all.drop(columns=[ 'pdb', 'frame', 'neighbors', 'neighbor', 'aa', 'sse', 'phi', 'psi', 'omega' ]).to_csv(data['fragfiles'][0] + '.csv', index=False) dfs9all.drop(columns=[ 'pdb', 'frame', 'neighbors', 'neighbor', 'aa', 'sse', 'phi', 'psi', 'omega' ]).to_csv(data['fragfiles'][1] + '.csv', index=False) imageprefix = masfile.with_suffix('.fragprofile') TButil.plot_fragment_templates(dfs3all, dfs9all, imageprefix) return data
TButil.checkpoint_out(checkpoint, data) return case def loop_master_protocol(case: Case, folders: Path) -> Tuple[str, str]: """ """ lf = case['metadata.loop_fragments'] if lf is None: raise TButil.PluginOrderError( 'Data that should be loaded through loop_master is not found.') for i, loop in enumerate(lf): if i == 0: ff3 = parse_rosetta_fragments(loop['fragfiles'][0]) ff9 = parse_rosetta_fragments(loop['fragfiles'][1]) df3 = [ pd.read_csv(str(loop['fragfiles'][0]) + '.csv'), ] df9 = [ pd.read_csv(str(loop['fragfiles'][1]) + '.csv'), ] else: df3.append(pd.read_csv(str(loop['fragfiles'][0]) + '.csv')) df9.append(pd.read_csv(str(loop['fragfiles'][1]) + '.csv')) ff3 = ff3.add_fragments(parse_rosetta_fragments( loop['fragfiles'][0]), ini=int(loop['edges']['ini']), how='append') ff9 = ff9.add_fragments(parse_rosetta_fragments(
# Checkpoint save TButil.checkpoint_out(self.log, checkpoint, data) return case def loop_master_protocol(self, case: Case, folders: Path) -> Tuple[str, str]: """ """ lf = case['metadata.loop_fragments'] if lf is None: raise NodeMissingError( 'Data that should be loaded through loop_master is not found.') for i, loop in enumerate(lf): if i == 0: ff3 = parse_rosetta_fragments(loop['fragfiles'][0]) ff9 = parse_rosetta_fragments(loop['fragfiles'][1]) df3 = [ pd.read_csv(str(loop['fragfiles'][0]) + '.csv'), ] df9 = [ pd.read_csv(str(loop['fragfiles'][1]) + '.csv'), ] else: df3.append(pd.read_csv(str(loop['fragfiles'][0]) + '.csv')) df9.append(pd.read_csv(str(loop['fragfiles'][1]) + '.csv')) ff3 = ff3.add_fragments(parse_rosetta_fragments( loop['fragfiles'][0]), ini=int(loop['edges']['ini']), how='append') ff9 = ff9.add_fragments(parse_rosetta_fragments(