def enrichment_pymol_script(enrichment_table, output_file, sphere_view=True, chain=None): """ Create a Pymol .pml script to visualize EC "enrichment" Parameters ---------- enrichment_table : pandas.DataFrame Mapping of position (column i) to EC enrichment (column enrichemnt), as returned by evcouplings.couplings.pairs.enrichment() output_file : str File path where to store pml script sphere_view : bool, optional (default: True) If True, create pml that highlights enriched positions with spheres and color; if False, create pml that highlights enrichment using b-factor and "cartoon putty" chain : str, optional (default: None) Use this PDB chain in residue selection """ t = enrichment_table.query("enrichment > 1") # compute boundaries for highly coupled residues # that will be specially highlighted boundary1 = int(0.05 * len(t)) # top 5% boundary2 = int(0.15 * len(t)) # top 15% t.loc[:, "b_factor"] = t.enrichment # set color for "low" enrichment (anything > 1) t.loc[:, "color"] = "yelloworange" # high t.loc[t.iloc[0:boundary1].index, "color"] = "red" # medium t.loc[t.iloc[boundary1:boundary2].index, "color"] = "orange" if sphere_view: t.loc[t.iloc[0:boundary2].index, "show"] = "spheres" if chain is not None: chain_sel = ", chain '{}'".format(chain) else: chain_sel = "" with open(output_file, "w") as f: f.write("as cartoon{}\n".format(chain_sel)) f.write("color grey80{}\n".format(chain_sel)) if chain is None: f.write("alter all, b=0.0\n") else: f.write("alter chain '{}', b=0.0\n".format(chain)) pymol_mapping(t, f, chain) if not sphere_view: f.write("cartoon putty{}\n".format(chain_sel))
def enrichment_pymol_script(enrichment_table, output_file, sphere_view=True, chain=None, legacy=False): """ Create a Pymol .pml script to visualize EC "enrichment" Parameters ---------- enrichment_table : pandas.DataFrame Mapping of position (column i) to EC enrichment (column enrichemnt), as returned by evcouplings.couplings.pairs.enrichment() output_file : str File path where to store pml script sphere_view : bool, optional (default: True) If True, create pml that highlights enriched positions with spheres and color; if False, create pml that highlights enrichment using b-factor and "cartoon putty" chain : str, optional (default: None) Use this PDB chain in residue selection legacy: bool, optional (default: False) Use legacy (2011) red and yellow colormap for EC enrichment """ if legacy: t = enrichment_table.query("enrichment > 1") t.loc[:, "b_factor"] = t.enrichment # compute boundaries for highly coupled residues # that will be specially highlighted boundary1 = int(0.05 * len(t)) # top 5% boundary2 = int(0.15 * len(t)) # top 15% t.loc[:, "b_factor"] = t.enrichment # set color for "low" enrichment (anything > 1) t.loc[:, "color"] = "yelloworange" # high t.loc[t.iloc[0:boundary1].index, "color"] = "red" # medium t.loc[t.iloc[boundary1:boundary2].index, "color"] = "orange" else: t = deepcopy(enrichment_table) t.loc[:, "b_factor"] = t.enrichment # set boundaries for enrichment levels # that will be specially highlighted # create nine subsets boundary_list = [ int(0.11 * len(t)), int(0.22 * len(t)), int(0.33 * len(t)), int(0.44 * len(t)), int(0.55 * len(t)), int(0.66 * len(t)), int(0.77 * len(t)), int(0.88 * len(t)), int(1.00 * len(t)) ] # list of colors to color each category # must be same length and order as boundary_list # list of rgb tuples color_list = [ (77, 0, 75), # dark purple (129, 15, 124), (136, 65, 157), (140, 107, 177), (140, 150, 198), (158, 188, 218), (191, 211, 230), (224, 236, 244), (247, 252, 253) # almost white ] # convert to fractions color_list = [(x / 255, y / 255, z / 255) for x, y, z in color_list] prior_boundary = 0 for idx, boundary in enumerate(boundary_list): t.loc[t.iloc[prior_boundary:boundary].index, "color"] = 'color{}'.format(idx) prior_boundary = boundary if sphere_view: t.loc[t.iloc[0:boundary].index, "show"] = "spheres" if chain is not None: chain_sel = ", chain '{}'".format(chain) else: chain_sel = "" with open(output_file, "w") as f: if chain is None: f.write("alter all, b=0.0\n") else: f.write("alter chain '{}', b=0.0\n".format(chain)) # for leagcy mode, background color is grey80 if legacy: f.write("color grey80{}\n".format(chain_sel)) # for non-legacy mode, background color is the last color in the spectrum else: for idx, c in enumerate(color_list): f.write("set_color color{}, [{},{},{}]\n".format( idx, c[0], c[1], c[2])) f.write("color color{}{}\n".format( len(boundary_list) - 1, chain_sel)) f.write("as cartoon{}\n".format(chain_sel)) pymol_mapping(t, f, chain) if not sphere_view: f.write("cartoon putty{}\n".format(chain_sel))
def mutation_pymol_script(mutation_table, output_file, effect_column="prediction_epistatic", mutant_column="mutant", agg_func="mean", cmap=plt.cm.RdBu_r, segment_to_chain_mapping=None): """ Create a Pymol .pml script to visualize single mutation effects Parameters ---------- mutation_table : pandas.DataFrame Table with mutation effects (will be filtered for single mutants) output_file : str File path where to store pml script effect_column : str, optional (default: "prediction_epistatic") Column in mutation_table that contains mutation effects mutant_column : str, optional (default: "mutant") Column in mutation_table that contains mutations (in format "A123G") agg_func : str, optional (default: "mean") Function used to aggregate single mutations into one aggregated effect per position (any pandas aggregation operation, including "mean", "min, "max") cmap : matplotlib.colors.LinearSegmentedColormap, optional (default: plt.cm.RdBu_r) Colormap used to map mutation effects to colors segment_to_chain_mapping: str or dict(str -> str), optional (default: None) PDB chain(s) that should be targeted by line drawing * If None, residues will be selected py position alone, which may cause wrong assignments if multiple chains are present in the structure. * Different chains can be assigned for position if a dictionary that maps from segment (str) to PDB chain (str) is given. Raises ------ ValueError If no single mutants contained in mutation_table ValueError If mutation_table contains a segment identifier not found in segment_to_chain_mapping """ # split mutation strings t = split_mutants(mutation_table, mutant_column) # only pick single mutants t = t.query("num_mutations == 1") if len(t) == 0: raise ValueError("mutation_table does not contain any single " "amino acid substitutions.") # add a segment column if missing if "segment" not in t.columns: t.loc[:, "segment"] = None with open(output_file, "w") as f: #handle each segment independently # have to fill NaNs with a string for groupby to work t = t.fillna("none") for segment_name, _t in t.groupby("segment"): if segment_to_chain_mapping is None: chain = None elif type(segment_to_chain_mapping) is str: chain = segment_to_chain_mapping elif segment_name not in segment_to_chain_mapping: raise ValueError("Segment name {} has no mapping to PyMOL " "chain. Available mappings are: {}".format( segment_name, segment_to_chain_mapping)) else: chain = segment_to_chain_mapping[segment_name] # aggregate into positional information _t = _t.loc[:, ["pos", effect_column]].rename(columns={ "pos": "i", effect_column: "effect" }) t_agg = _t.groupby("i").agg(agg_func).reset_index() t_agg.loc[:, "i"] = pd.to_numeric(t_agg.i).astype(int) # map aggregated effects to colors max_val = t_agg.effect.abs().max() mapper = colormap(-max_val, max_val, cmap) t_agg.loc[:, "color"] = t_agg.effect.map(mapper) t_agg.loc[:, "show"] = "spheres" if chain is not None: chain_sel = ", chain '{}'".format(chain) else: chain_sel = "" f.write("as cartoon{}\n".format(chain_sel)) f.write("color grey80{}\n".format(chain_sel)) pymol_mapping(t_agg, f, chain, atom="CA")
def mutation_pymol_script(mutation_table, output_file, effect_column="prediction_epistatic", mutant_column="mutant", agg_func="mean", cmap=plt.cm.RdBu_r, chain=None): """ Create a Pymol .pml script to visualize single mutation effects Parameters ---------- mutation_table : pandas.DataFrame Table with mutation effects (will be filtered for single mutants) output_file : str File path where to store pml script effect_column : str, optional (default: "prediction_epistatic") Column in mutation_table that contains mutation effects mutant_column : str, optional (default: "mutant") Column in mutation_table that contains mutations (in format "A123G") agg_func : str, optional (default: "mean") Function used to aggregate single mutations into one aggregated effect per position (any pandas aggregation operation, including "mean", "min, "max") cmap : matplotlib.colors.LinearSegmentedColormap, optional (default: plt.cm.RdBu_r) Colormap used to map mutation effects to colors chain : str, optional (default: None) Use this PDB chain in residue selection Raises ------ ValueError If no single mutants contained in mutation_table """ # split mutation strings t = split_mutants(mutation_table, mutant_column) # only pick single mutants t = t.query("num_mutations == 1") if len(t) == 0: raise ValueError("mutation_table does not contain any single " "amino acid substitutions.") # aggregate into positional information t = t.loc[:, ["pos", effect_column]].rename(columns={ "pos": "i", effect_column: "effect" }) t_agg = t.groupby("i").agg(agg_func).reset_index() t_agg.loc[:, "i"] = pd.to_numeric(t_agg.i).astype(int) # map aggregated effects to colors max_val = t_agg.effect.abs().max() mapper = colormap(-max_val, max_val, cmap) t_agg.loc[:, "color"] = t_agg.effect.map(mapper) t_agg.loc[:, "show"] = "spheres" if chain is not None: chain_sel = ", chain '{}'".format(chain) else: chain_sel = "" with open(output_file, "w") as f: f.write("as cartoon{}\n".format(chain_sel)) f.write("color grey80{}\n".format(chain_sel)) pymol_mapping(t_agg, f, chain, atom="CA")