def generate_2x1(consensus, rna=False): seq = seqlib.standardize(consensus, rna) listVariants = [] for i, base in enumerate(seq): for otherBase1 in seqlib.allOtherBases(base, rna): for otherBase2 in seqlib.allOtherBases(base, rna): variant = seq[:i] + otherBase1 + otherBase2 + seq[i+1:] listVariants.append(variant) return listVariants
def generate_2x1(consensus, rna=False): seq = seqlib.standardize(consensus, rna) listVariants = [] for i, base in enumerate(seq): for otherBase1 in seqlib.allOtherBases(base, rna): for otherBase2 in seqlib.allOtherBases(base, rna): variant = seq[:i] + otherBase1 + otherBase2 + seq[i + 1:] listVariants.append(variant) return listVariants
def generate_2x3(consensus, rna=False): seq = seqlib.standardize(consensus, rna) listVariants = [] for i, base in enumerate(seq[:-2]): nextBase = seq[i + 1] nextNextBase = seq[i + 2] for otherBase1 in seqlib.allOtherBases(base, rna): for otherBase3 in seqlib.allOtherBases(nextNextBase, rna): variant = seq[:i] + otherBase1 + otherBase3 + seq[i + 3:] listVariants.append(variant) return listVariants
def generate_2x3(consensus, rna=False): seq = seqlib.standardize(consensus, rna) listVariants = [] for i, base in enumerate(seq[:-2]): nextBase = seq[i+1] nextNextBase = seq[i+2] for otherBase1 in seqlib.allOtherBases(base, rna): for otherBase3 in seqlib.allOtherBases(nextNextBase, rna): variant = seq[:i] + otherBase1 + otherBase3 + seq[i+3:] listVariants.append(variant) return listVariants
def generate_4x4(consensus, rna=False): seq = seqlib.standardize(consensus, rna) listVariants = [] for i, base in enumerate(seq[:-3]): nextBase = seq[i+1] nextNextBase = seq[i+2] nextNextNextBase = seq[i+3] for otherBase1 in seqlib.allOtherBases(base, rna): for newBase2 in seqlib.allBases(rna): for newBase3 in seqlib.allBases(rna): for otherBase4 in seqlib.allOtherBases(nextNextNextBase, rna): variant = seq[:i] + otherBase1 + newBase2 + newBase3 + otherBase4 + seq[i+4:] listVariants.append(variant) return listVariants
def generate_4x4(consensus, rna=False): seq = seqlib.standardize(consensus, rna) listVariants = [] for i, base in enumerate(seq[:-3]): nextBase = seq[i + 1] nextNextBase = seq[i + 2] nextNextNextBase = seq[i + 3] for otherBase1 in seqlib.allOtherBases(base, rna): for newBase2 in seqlib.allBases(rna): for newBase3 in seqlib.allBases(rna): for otherBase4 in seqlib.allOtherBases( nextNextNextBase, rna): variant = seq[: i] + otherBase1 + newBase2 + newBase3 + otherBase4 + seq[ i + 4:] listVariants.append(variant) return listVariants
def generate_1x2(consensus, rna=False): seq = seqlib.standardize(consensus, rna) listVariants = [] for i, base in enumerate(seq[:-1]): nextBase = seq[i + 1] for otherBase in seqlib.allOtherBases([base, nextBase], rna): variant = seq[:i] + otherBase + seq[i + 2:] listVariants.append(variant) return listVariants
def generate_1x2(consensus, rna=False): seq = seqlib.standardize(consensus, rna) listVariants = [] for i, base in enumerate(seq[:-1]): nextBase = seq[i+1] for otherBase in seqlib.allOtherBases([base, nextBase], rna): variant = seq[:i] + otherBase + seq[i+2:] listVariants.append(variant) return listVariants
def doubleMutantMatrix(data, refVariant, libSeq, startPos=1, coop=False): """Auxiliary function to generate the doubel mutant matrix""" # Get library positions and create labels for mutants libPos = [i for (i, base) in enumerate(libSeq.upper()) if base == 'N'] mutantLabels = [refVariant[i]+str(i+startPos)+otherBase for i in libPos for otherBase in seqlib.allOtherBases(refVariant[i])] # Grep the mutants and fill in the signals dim = len(mutantLabels) doubleMutantSignals = np.zeros(shape=(dim, dim)) for i, mutation1 in enumerate(mutantLabels): for j, mutation2 in enumerate(mutantLabels): # Get the index for the degenerate base along the sequence pos1 = int(mutation1[1:-1]) - startPos otherBase1 = mutation1[-1] pos2 = int(mutation2[1:-1]) - startPos otherBase2 = mutation2[-1] # Create the current mutant sequence currSeq = list(refVariant) currSeq[pos1] = otherBase1 currSeq[pos2] = otherBase2 currSeq = ''.join(currSeq) # Grep the signal and fill in the double mutant matrix if not (currSeq in data.index): doubleMutantSignals[i, j] = np.nan elif pos1 != pos2: doubleMutantSignals[i, j] = data[currSeq] elif (pos1 == pos2) and (otherBase1 == otherBase2): doubleMutantSignals[i, j] = data[currSeq] else: doubleMutantSignals[i, j] = np.nan # Compute cooperativity if requested if coop: coopSignals = np.zeros(shape=(dim, dim)) for i in xrange(dim): for j in xrange(dim): coopSignals[i, j] = doubleMutantSignals[i, i] + doubleMutantSignals[j, j] - doubleMutantSignals[i, j] doubleMutantSignals = coopSignals return doubleMutantSignals, mutantLabels
def plotTertSMcoop(dfUnqClusters, listConsensus, listSeqPos, refConsensus, field='params2.median', listConsensusName=None, vmin=None, vmax=None, cmap='RdBu', c_bad='0.55', robust=True, figsize=None, figunitheight=1, figunitwidth=1, maxfigwidth=32, actLabel=r'$\mathrm{\mathsf{\Delta\Delta G^{\ddag}\ (kcal\ mol^{-1})}}$', show=True, **kwargs): """Plot tertiary contact-single point mutant cooperivity heatmap""" # Define constants R = 1.9872041e-3 T = 293.0 # Make dfUnqClusters indexed by annotation if not already if dfUnqClusters.index.name == 'annotation': dfUnqClusters2 = dfUnqClusters.copy() else: dfUnqClusters2 = dfUnqClusters.set_index('annotation') # Make an empty dataframe for the TC-SPM matrix mat_kobs = pd.DataFrame(columns=listConsensus) mat_ddG = pd.DataFrame(columns=listConsensus) # Make an empty series for the consensus variants seriesConsensusAct = pd.Series(index=listConsensus) # Make the list of annotations to plot listConsensusAct = [] for con in listConsensus: listAnnt = [] listName = [] for pos in listSeqPos: # Get base at the current position currBase = pos[0] # Get the 3 other bases at the current position allOtherBases = seqlib.allOtherBases(currBase) # Make the list of annotations of the 3 possible mismatches at the current position mAnnts = [con+':1:0:0:'+pos+base+':::' for base in allOtherBases] mNames = [pos+base for base in allOtherBases] # Add to listAnnt and listName listAnnt.extend(mAnnts) listName.extend(mNames) # Get the activity of the consensus variants seriesConsensusAct[con] = 1./dfUnqClusters2.loc[con+':0:0:0::::'][field]*60 # Add the list of activity of the annotations from the current consensus as a column mat_kobs[con] = (1./dfUnqClusters2.loc[listAnnt][field]*60).tolist() # Compute delta delta G from k_obs for con in listConsensus: mat_ddG[con] = - R * T * np.log((mat_kobs[con] * seriesConsensusAct[refConsensus]) / (mat_kobs[refConsensus] * seriesConsensusAct[con])) # if listConsensusName is not None: mat_kobs.columns = listConsensusName mat_ddG.columns = listConsensusName # mat_kobs.columns.name = 'Tertiary contact knockouts' mat_ddG.columns.name = 'Tertiary contact knockouts' # Set single mutant names mat_kobs['Single point mutants'] = listName mat_kobs = mat_kobs.set_index('Single point mutants').transpose() mat_ddG['Single point mutants'] = listName mat_ddG = mat_ddG.set_index('Single point mutants').transpose() # Define colormap if isinstance(cmap, basestring): cmap = plt.get_cmap(cmap) cmap.set_bad(c_bad, 0.8) # Make mask for nan data mask = ~np.isfinite(mat_ddG) # Plot heatmap if not figsize: figwidth = min(figunitwidth * len(listAnnt), maxfigwidth) figheight = figunitheight * len(listConsensus) + 0.5 figsize = (figwidth, figheight) fig, ax = plt.subplots(1, 1, figsize=figsize) cbar_ax = fig.add_axes([.78, .83, .2, .05]) sns.heatmap(mat_ddG, ax=ax, square=True, mask=mask, robust=robust, vmin=vmin, vmax=vmax, center=0, cmap=cmap, cbar_ax=cbar_ax, cbar_kws={'orientation': 'horizontal'}) setproperties(ax=cbar_ax, fontsize=21, xlabel=actLabel) setproperties(ax=ax, tickfontsize=22, yticklabelrot=0, labelfontsize=28, tight=True, pad=1.0) if show: plt.show(block=False) return
def plotSingleMutants(df, consensus, listSeqPos, muttype='m', colorbymut=True, fullname=False, collapse=False, **kwargs): """Plot the activities and counts of single mutants given a df of all clusters, name of consensus sequence, and list of positions This function defaults to plotting time/rate constants, but can be used to plot something else too""" # Define constants allBases = ['A', 'T', 'C', 'G'] allColors = {'A': sns.xkcd_rgb["windows blue"], 'T': sns.xkcd_rgb["amber"], 'C': sns.xkcd_rgb["faded green"], 'G': sns.xkcd_rgb["reddish orange"], 'x': sns.xkcd_rgb["dusty purple"]} defaultColor = sns.xkcd_rgb["greyish"] # Compute how many bars to be plotted at each position and xtickpos numBarDict = {'m': 3, 'i': 3, 'd': 1} numBar = np.sum([numBarDict.get(i, 0) for i in muttype]) gap = int(numBar) / 3 xtickspos = [range((numBar+gap)*pos+1, (numBar+gap)*pos+numBar+1) for pos in range(0, len(listSeqPos))] xtickspos = np.array([item for sublist in xtickspos for item in sublist]) # Make the list of annotations to plot listAnnt = [] listName = [] listColor = [] for pos in listSeqPos: # Add mismatches if 'm' in muttype: # Get base at the current position currBase = pos[0] # Get the 3 other bases at the current position allOtherBases = seqlib.allOtherBases(currBase) # Make the list of annotations of the 3 possible mismatches at the current position mAnnts = [consensus+':1:0:0:'+pos+base+':::' for base in allOtherBases] mNames = [pos+base for base in allOtherBases] mColors = [allColors.get(base, defaultColor) for base in allOtherBases] # Add to listAnnt and listName listAnnt.extend(mAnnts) listName.extend(mNames) listColor.extend(mColors) # Add insertions if 'i' in muttype: # Make the list of annotations of the 4 possible insertions at the current position iAnnts = [consensus+':0:1:0::+'+pos[1:]+base+'::' for base in allBases] iName = ['+'+pos[1:]+base for base in allBases] iColors = [allColors.get(base, defaultColor) for base in allBases] # Add to listAnnt and listName listAnnt.extend(iAnnts) listName.extend(iName) listColor.extend(iColors) # Add deletions if 'd' in muttype: # Make annotation for the only possible deletion at the current position dAnnt = consensus+':0:0:1:::'+pos+'x:' dName = pos+'x' dColor = allColors.get('x', defaultColor) # Add to listAnnt and listName listAnnt.append(dAnnt) listName.append(dName) listColor.append(dColor) # Check if annotations exist in df, if not remove from listAnnt, listName, listColor, and xtickspos if df.index.name == 'annotation': listValidAnnt = [i for i, annt in enumerate(listAnnt) if annt in df.index] else: df2 = df.set_index('annotation') listValidAnnt = [i for i, annt in enumerate(listAnnt) if annt in df2.index] listAnnt = [item for i, item in enumerate(listAnnt) if i in listValidAnnt] listName = [item for i, item in enumerate(listName) if i in listValidAnnt] listColor = [item for i, item in enumerate(listColor) if i in listValidAnnt] xtickspos = np.array([item for i, item in enumerate(xtickspos) if i in listValidAnnt]) # Call plotVariants to plot if not colorbymut: listColor = None if fullname: listName = None if collapse: xtickspos = None return plotVariants(df, listAnnt, listName=listName, color=listColor, _xticks=xtickspos, xticklabelrot=90, **kwargs)
def plotSingleMutants(df, consensus, listSeqPos, muttype='m', colorbymut=True, fullname=False, collapse=False, **kwargs): """Plot the activities and counts of single mutants given a df of all clusters, name of consensus sequence, and list of positions This function defaults to plotting time/rate constants, but can be used to plot something else too""" # Define constants allBases = ['A', 'T', 'C', 'G'] allColors = { 'A': sns.xkcd_rgb["windows blue"], 'T': sns.xkcd_rgb["amber"], 'C': sns.xkcd_rgb["faded green"], 'G': sns.xkcd_rgb["reddish orange"], 'x': sns.xkcd_rgb["dusty purple"] } defaultColor = sns.xkcd_rgb["greyish"] # Compute how many bars to be plotted at each position and xtickpos numBarDict = {'m': 3, 'i': 3, 'd': 1} numBar = np.sum([numBarDict.get(i, 0) for i in muttype]) gap = int(numBar) / 3 xtickspos = [ range((numBar + gap) * pos + 1, (numBar + gap) * pos + numBar + 1) for pos in range(0, len(listSeqPos)) ] xtickspos = np.array([item for sublist in xtickspos for item in sublist]) # Make the list of annotations to plot listAnnt = [] listName = [] listColor = [] for pos in listSeqPos: # Add mismatches if 'm' in muttype: # Get base at the current position currBase = pos[0] # Get the 3 other bases at the current position allOtherBases = seqlib.allOtherBases(currBase) # Make the list of annotations of the 3 possible mismatches at the current position mAnnts = [ consensus + ':1:0:0:' + pos + base + ':::' for base in allOtherBases ] mNames = [pos + base for base in allOtherBases] mNames = [ name[:-1] if i % 3 == 1 else ' ' for i, name in enumerate(mNames) ] mColors = [ allColors.get(base, defaultColor) for base in allOtherBases ] # Add to listAnnt and listName listAnnt.extend(mAnnts) listName.extend(mNames) listColor.extend(mColors) # Add insertions if 'i' in muttype: # Make the list of annotations of the 4 possible insertions at the current position iAnnts = [ consensus + ':0:1:0::+' + pos[1:] + base + '::' for base in allBases ] iName = ['+' + pos[1:] + base for base in allBases] iColors = [allColors.get(base, defaultColor) for base in allBases] # Add to listAnnt and listName listAnnt.extend(iAnnts) listName.extend(iName) listColor.extend(iColors) # Add deletions if 'd' in muttype: # Make annotation for the only possible deletion at the current position dAnnt = consensus + ':0:0:1:::' + pos + 'x:' dName = pos + 'x' dColor = allColors.get('x', defaultColor) # Add to listAnnt and listName listAnnt.append(dAnnt) listName.append(dName) listColor.append(dColor) # Check if annotations exist in df, if not remove from listAnnt, listName, listColor, and xtickspos if df.index.name == 'annotation': listValidAnnt = [ i for i, annt in enumerate(listAnnt) if annt in df.index ] else: df2 = df.set_index('annotation') listValidAnnt = [ i for i, annt in enumerate(listAnnt) if annt in df2.index ] listAnnt = [item for i, item in enumerate(listAnnt) if i in listValidAnnt] listName = [item for i, item in enumerate(listName) if i in listValidAnnt] listColor = [ item for i, item in enumerate(listColor) if i in listValidAnnt ] xtickspos = np.array( [item for i, item in enumerate(xtickspos) if i in listValidAnnt]) # Call plotVariants to plot if not colorbymut: listColor = None if fullname: listName = None if collapse: xtickspos = None return plotVariants(df, listAnnt, listName=listName, color=listColor, _xticks=xtickspos, xticklabelrot=90, **kwargs)
def plotTertSMcoop( dfUnqClusters, listConsensus, listSeqPos, refConsensus, field='params2.median', listConsensusName=None, vmin=None, vmax=None, cmap='RdBu', c_bad='0.55', robust=True, figsize=None, figunitheight=1, figunitwidth=1, maxfigwidth=32, actLabel=r'$\mathrm{\mathsf{\Delta\Delta G^{\ddag}\ (kcal\ mol^{-1})}}$', show=True, **kwargs): """Plot tertiary contact-single point mutant cooperivity heatmap""" # Define constants R = 1.9872041e-3 T = 293.0 # Make dfUnqClusters indexed by annotation if not already if dfUnqClusters.index.name == 'annotation': dfUnqClusters2 = dfUnqClusters.copy() else: dfUnqClusters2 = dfUnqClusters.set_index('annotation') # Make an empty dataframe for the TC-SPM matrix mat_kobs = pd.DataFrame(columns=listConsensus) mat_ddG = pd.DataFrame(columns=listConsensus) # Make an empty series for the consensus variants seriesConsensusAct = pd.Series(index=listConsensus) # Make the list of annotations to plot listConsensusAct = [] for con in listConsensus: listAnnt = [] listName = [] for pos in listSeqPos: # Get base at the current position currBase = pos[0] # Get the 3 other bases at the current position allOtherBases = seqlib.allOtherBases(currBase) # Make the list of annotations of the 3 possible mismatches at the current position mAnnts = [ con + ':1:0:0:' + pos + base + ':::' for base in allOtherBases ] mNames = [pos + base for base in allOtherBases] # Add to listAnnt and listName listAnnt.extend(mAnnts) listName.extend(mNames) # Get the activity of the consensus variants seriesConsensusAct[con] = 1. / dfUnqClusters2.loc[ con + ':0:0:0::::'][field] * 60 # Add the list of activity of the annotations from the current consensus as a column mat_kobs[con] = (1. / dfUnqClusters2.loc[listAnnt][field] * 60).tolist() # Compute delta delta G from k_obs for con in listConsensus: mat_ddG[con] = -R * T * np.log( (mat_kobs[con] * seriesConsensusAct[refConsensus]) / (mat_kobs[refConsensus] * seriesConsensusAct[con])) # if listConsensusName is not None: mat_kobs.columns = listConsensusName mat_ddG.columns = listConsensusName # mat_kobs.columns.name = 'Tertiary contact knockouts' mat_ddG.columns.name = 'Tertiary contact knockouts' # Set single mutant names mat_kobs['Single point mutants'] = listName mat_kobs = mat_kobs.set_index('Single point mutants').transpose() mat_ddG['Single point mutants'] = listName mat_ddG = mat_ddG.set_index('Single point mutants').transpose() # Define colormap if isinstance(cmap, basestring): cmap = plt.get_cmap(cmap) cmap.set_bad(c_bad, 0.8) # Make mask for nan data mask = ~np.isfinite(mat_ddG) # Plot heatmap if not figsize: figwidth = min(figunitwidth * len(listAnnt), maxfigwidth) figheight = figunitheight * len(listConsensus) + 0.5 figsize = (figwidth, figheight) fig, ax = plt.subplots(1, 1, figsize=figsize) cbar_ax = fig.add_axes([.78, .83, .2, .05]) sns.heatmap(mat_ddG, ax=ax, square=True, mask=mask, robust=robust, vmin=vmin, vmax=vmax, center=0, cmap=cmap, cbar_ax=cbar_ax, cbar_kws={'orientation': 'horizontal'}) setproperties(ax=cbar_ax, fontsize=21, xlabel=actLabel) setproperties(ax=ax, tickfontsize=22, yticklabelrot=0, labelfontsize=28, tight=True, pad=1.0) if show: plt.show(block=False) return