示例#1
0
def generate_2x1(consensus, rna=False):
    
    seq = seqlib.standardize(consensus, rna)
    listVariants = []
    for i, base in enumerate(seq):
        for otherBase1 in seqlib.allOtherBases(base, rna):
            for otherBase2 in seqlib.allOtherBases(base, rna):
                variant = seq[:i] + otherBase1 + otherBase2 + seq[i+1:]
                listVariants.append(variant)
    
    return listVariants
示例#2
0
def generate_2x1(consensus, rna=False):

    seq = seqlib.standardize(consensus, rna)
    listVariants = []
    for i, base in enumerate(seq):
        for otherBase1 in seqlib.allOtherBases(base, rna):
            for otherBase2 in seqlib.allOtherBases(base, rna):
                variant = seq[:i] + otherBase1 + otherBase2 + seq[i + 1:]
                listVariants.append(variant)

    return listVariants
示例#3
0
def generate_2x3(consensus, rna=False):

    seq = seqlib.standardize(consensus, rna)
    listVariants = []
    for i, base in enumerate(seq[:-2]):
        nextBase = seq[i + 1]
        nextNextBase = seq[i + 2]
        for otherBase1 in seqlib.allOtherBases(base, rna):
            for otherBase3 in seqlib.allOtherBases(nextNextBase, rna):
                variant = seq[:i] + otherBase1 + otherBase3 + seq[i + 3:]
                listVariants.append(variant)

    return listVariants
示例#4
0
def generate_2x3(consensus, rna=False):
    
    seq = seqlib.standardize(consensus, rna)
    listVariants = []
    for i, base in enumerate(seq[:-2]):
        nextBase = seq[i+1]
        nextNextBase = seq[i+2]
        for otherBase1 in seqlib.allOtherBases(base, rna):
            for otherBase3 in seqlib.allOtherBases(nextNextBase, rna):
                variant = seq[:i] + otherBase1 + otherBase3 + seq[i+3:]
                listVariants.append(variant)
    
    return listVariants
示例#5
0
def generate_4x4(consensus, rna=False):
    
    seq = seqlib.standardize(consensus, rna)
    listVariants = []
    for i, base in enumerate(seq[:-3]):
        nextBase = seq[i+1]
        nextNextBase = seq[i+2]
        nextNextNextBase = seq[i+3]
        for otherBase1 in seqlib.allOtherBases(base, rna):
            for newBase2 in seqlib.allBases(rna):
                for newBase3 in seqlib.allBases(rna):
                    for otherBase4 in seqlib.allOtherBases(nextNextNextBase, rna):
                        variant = seq[:i] + otherBase1 + newBase2 + newBase3 + otherBase4 + seq[i+4:]
                        listVariants.append(variant)
    
    return listVariants
示例#6
0
def generate_4x4(consensus, rna=False):

    seq = seqlib.standardize(consensus, rna)
    listVariants = []
    for i, base in enumerate(seq[:-3]):
        nextBase = seq[i + 1]
        nextNextBase = seq[i + 2]
        nextNextNextBase = seq[i + 3]
        for otherBase1 in seqlib.allOtherBases(base, rna):
            for newBase2 in seqlib.allBases(rna):
                for newBase3 in seqlib.allBases(rna):
                    for otherBase4 in seqlib.allOtherBases(
                            nextNextNextBase, rna):
                        variant = seq[:
                                      i] + otherBase1 + newBase2 + newBase3 + otherBase4 + seq[
                                          i + 4:]
                        listVariants.append(variant)

    return listVariants
示例#7
0
def generate_1x2(consensus, rna=False):

    seq = seqlib.standardize(consensus, rna)
    listVariants = []
    for i, base in enumerate(seq[:-1]):
        nextBase = seq[i + 1]
        for otherBase in seqlib.allOtherBases([base, nextBase], rna):
            variant = seq[:i] + otherBase + seq[i + 2:]
            listVariants.append(variant)

    return listVariants
示例#8
0
def generate_1x2(consensus, rna=False):
    
    seq = seqlib.standardize(consensus, rna)
    listVariants = []
    for i, base in enumerate(seq[:-1]):
        nextBase = seq[i+1]
        for otherBase in seqlib.allOtherBases([base, nextBase], rna):
            variant = seq[:i] + otherBase + seq[i+2:]
            listVariants.append(variant)
    
    return listVariants
示例#9
0
def doubleMutantMatrix(data, refVariant, libSeq, startPos=1, coop=False):
    """Auxiliary function to generate the doubel mutant matrix"""
    # Get library positions and create labels for mutants
    libPos = [i for (i, base) in enumerate(libSeq.upper()) if base == 'N']
    mutantLabels = [refVariant[i]+str(i+startPos)+otherBase 
                    for i in libPos for otherBase in seqlib.allOtherBases(refVariant[i])]
    
    # Grep the mutants and fill in the signals
    dim = len(mutantLabels)
    doubleMutantSignals = np.zeros(shape=(dim, dim))
    for i, mutation1 in enumerate(mutantLabels):
        for j, mutation2 in enumerate(mutantLabels):
            # Get the index for the degenerate base along the sequence
            pos1 = int(mutation1[1:-1]) - startPos
            otherBase1 = mutation1[-1]
            pos2 = int(mutation2[1:-1]) - startPos
            otherBase2 = mutation2[-1]
            # Create the current mutant sequence
            currSeq = list(refVariant)
            currSeq[pos1] = otherBase1
            currSeq[pos2] = otherBase2
            currSeq = ''.join(currSeq)
            # Grep the signal and fill in the double mutant matrix
            if not (currSeq in data.index):
                doubleMutantSignals[i, j] = np.nan
            elif pos1 != pos2:
                doubleMutantSignals[i, j] = data[currSeq]
            elif (pos1 == pos2) and (otherBase1 == otherBase2):
                doubleMutantSignals[i, j] = data[currSeq]
            else:
                doubleMutantSignals[i, j] = np.nan
    
    # Compute cooperativity if requested
    if coop:
        coopSignals = np.zeros(shape=(dim, dim))
        for i in xrange(dim):
            for j in xrange(dim):
                coopSignals[i, j] = doubleMutantSignals[i, i] + doubleMutantSignals[j, j] - doubleMutantSignals[i, j]
        doubleMutantSignals = coopSignals

    return doubleMutantSignals, mutantLabels
示例#10
0
def plotTertSMcoop(dfUnqClusters, listConsensus, listSeqPos, refConsensus, field='params2.median', listConsensusName=None,
                   vmin=None, vmax=None, cmap='RdBu', c_bad='0.55', robust=True,
                   figsize=None, figunitheight=1, figunitwidth=1, maxfigwidth=32,
                   actLabel=r'$\mathrm{\mathsf{\Delta\Delta G^{\ddag}\ (kcal\ mol^{-1})}}$',
                   show=True, **kwargs):
    """Plot tertiary contact-single point mutant cooperivity heatmap"""
    # Define constants
    R = 1.9872041e-3
    T = 293.0

    # Make dfUnqClusters indexed by annotation if not already
    if dfUnqClusters.index.name == 'annotation':
        dfUnqClusters2 = dfUnqClusters.copy()
    else:
        dfUnqClusters2 = dfUnqClusters.set_index('annotation')

    # Make an empty dataframe for the TC-SPM matrix
    mat_kobs = pd.DataFrame(columns=listConsensus)
    mat_ddG = pd.DataFrame(columns=listConsensus)

    # Make an empty series for the consensus variants
    seriesConsensusAct = pd.Series(index=listConsensus)

    # Make the list of annotations to plot
    listConsensusAct = []
    for con in listConsensus:

        listAnnt = []
        listName = []
        for pos in listSeqPos:

            # Get base at the current position
            currBase = pos[0]
            # Get the 3 other bases at the current position
            allOtherBases = seqlib.allOtherBases(currBase)
            # Make the list of annotations of the 3 possible mismatches at the current position
            mAnnts = [con+':1:0:0:'+pos+base+':::' for base in allOtherBases]
            mNames = [pos+base for base in allOtherBases]
            # Add to listAnnt and listName
            listAnnt.extend(mAnnts)
            listName.extend(mNames)

        # Get the activity of the consensus variants
        seriesConsensusAct[con] = 1./dfUnqClusters2.loc[con+':0:0:0::::'][field]*60
        # Add the list of activity of the annotations from the current consensus as a column
        mat_kobs[con] = (1./dfUnqClusters2.loc[listAnnt][field]*60).tolist()

    # Compute delta delta G from k_obs
    for con in listConsensus:
        mat_ddG[con] = - R * T * np.log((mat_kobs[con] * seriesConsensusAct[refConsensus]) 
                                        / (mat_kobs[refConsensus] * seriesConsensusAct[con]))

    #
    if listConsensusName is not None:
        mat_kobs.columns = listConsensusName
        mat_ddG.columns = listConsensusName

    # 
    mat_kobs.columns.name = 'Tertiary contact knockouts'
    mat_ddG.columns.name = 'Tertiary contact knockouts'

    # Set single mutant names
    mat_kobs['Single point mutants'] = listName
    mat_kobs = mat_kobs.set_index('Single point mutants').transpose()
    mat_ddG['Single point mutants'] = listName
    mat_ddG = mat_ddG.set_index('Single point mutants').transpose()

    # Define colormap
    if isinstance(cmap, basestring):
        cmap = plt.get_cmap(cmap)
    cmap.set_bad(c_bad, 0.8)

    # Make mask for nan data
    mask = ~np.isfinite(mat_ddG)
    
    # Plot heatmap
    if not figsize:
        figwidth = min(figunitwidth * len(listAnnt), maxfigwidth)
        figheight = figunitheight * len(listConsensus) + 0.5
        figsize = (figwidth, figheight)
    fig, ax = plt.subplots(1, 1, figsize=figsize)
    cbar_ax = fig.add_axes([.78, .83, .2, .05])
    sns.heatmap(mat_ddG, ax=ax, square=True, mask=mask, robust=robust,
                vmin=vmin, vmax=vmax, center=0, cmap=cmap,
                cbar_ax=cbar_ax, cbar_kws={'orientation': 'horizontal'})
    setproperties(ax=cbar_ax, fontsize=21, xlabel=actLabel)
    setproperties(ax=ax, tickfontsize=22, yticklabelrot=0,
                  labelfontsize=28, tight=True, pad=1.0)

    if show:
        plt.show(block=False)

    return
示例#11
0
def plotSingleMutants(df, consensus, listSeqPos, muttype='m',
                      colorbymut=True, fullname=False, collapse=False, **kwargs):
    """Plot the activities and counts of single mutants given a df of all clusters, name of consensus
    sequence, and list of positions
    This function defaults to plotting time/rate constants, but can be used to plot something else too"""
    # Define constants
    allBases = ['A', 'T', 'C', 'G']
    allColors = {'A': sns.xkcd_rgb["windows blue"],
                 'T': sns.xkcd_rgb["amber"],
                 'C': sns.xkcd_rgb["faded green"],
                 'G': sns.xkcd_rgb["reddish orange"],
                 'x': sns.xkcd_rgb["dusty purple"]}
    defaultColor = sns.xkcd_rgb["greyish"]

    # Compute how many bars to be plotted at each position and xtickpos
    numBarDict = {'m': 3, 'i': 3, 'd': 1}
    numBar = np.sum([numBarDict.get(i, 0) for i in muttype])
    gap = int(numBar) / 3
    xtickspos = [range((numBar+gap)*pos+1, (numBar+gap)*pos+numBar+1) for pos in range(0, len(listSeqPos))]
    xtickspos = np.array([item for sublist in xtickspos for item in sublist])

    # Make the list of annotations to plot
    listAnnt = []
    listName = []
    listColor = []
    for pos in listSeqPos:

        # Add mismatches
        if 'm' in muttype:
            # Get base at the current position
            currBase = pos[0]
            # Get the 3 other bases at the current position
            allOtherBases = seqlib.allOtherBases(currBase)
            # Make the list of annotations of the 3 possible mismatches at the current position
            mAnnts = [consensus+':1:0:0:'+pos+base+':::' for base in allOtherBases]
            mNames = [pos+base for base in allOtherBases]
            mColors = [allColors.get(base, defaultColor) for base in allOtherBases]
            # Add to listAnnt and listName
            listAnnt.extend(mAnnts)
            listName.extend(mNames)
            listColor.extend(mColors)

        # Add insertions
        if 'i' in muttype:
            # Make the list of annotations of the 4 possible insertions at the current position
            iAnnts = [consensus+':0:1:0::+'+pos[1:]+base+'::' for base in allBases]
            iName = ['+'+pos[1:]+base for base in allBases]
            iColors = [allColors.get(base, defaultColor) for base in allBases]
            # Add to listAnnt and listName
            listAnnt.extend(iAnnts)
            listName.extend(iName)
            listColor.extend(iColors)

        # Add deletions
        if 'd' in muttype:
            # Make annotation for the only possible deletion at the current position
            dAnnt = consensus+':0:0:1:::'+pos+'x:'
            dName = pos+'x'
            dColor = allColors.get('x', defaultColor)
            # Add to listAnnt and listName
            listAnnt.append(dAnnt)
            listName.append(dName)
            listColor.append(dColor)

    # Check if annotations exist in df, if not remove from listAnnt, listName, listColor, and xtickspos
    if df.index.name == 'annotation':
        listValidAnnt = [i for i, annt in enumerate(listAnnt) if annt in df.index]
    else:
        df2 = df.set_index('annotation')
        listValidAnnt = [i for i, annt in enumerate(listAnnt) if annt in df2.index]

    listAnnt = [item for i, item in enumerate(listAnnt) if i in listValidAnnt]
    listName = [item for i, item in enumerate(listName) if i in listValidAnnt]
    listColor = [item for i, item in enumerate(listColor) if i in listValidAnnt]
    xtickspos = np.array([item for i, item in enumerate(xtickspos) if i in listValidAnnt])

    # Call plotVariants to plot
    if not colorbymut:
        listColor = None
    if fullname:
        listName = None
    if collapse:
        xtickspos = None

    return plotVariants(df, listAnnt, listName=listName, color=listColor, _xticks=xtickspos,
                        xticklabelrot=90, **kwargs)
示例#12
0
def plotSingleMutants(df,
                      consensus,
                      listSeqPos,
                      muttype='m',
                      colorbymut=True,
                      fullname=False,
                      collapse=False,
                      **kwargs):
    """Plot the activities and counts of single mutants given a df of all clusters, name of consensus
    sequence, and list of positions
    This function defaults to plotting time/rate constants, but can be used to plot something else too"""
    # Define constants
    allBases = ['A', 'T', 'C', 'G']
    allColors = {
        'A': sns.xkcd_rgb["windows blue"],
        'T': sns.xkcd_rgb["amber"],
        'C': sns.xkcd_rgb["faded green"],
        'G': sns.xkcd_rgb["reddish orange"],
        'x': sns.xkcd_rgb["dusty purple"]
    }
    defaultColor = sns.xkcd_rgb["greyish"]

    # Compute how many bars to be plotted at each position and xtickpos
    numBarDict = {'m': 3, 'i': 3, 'd': 1}
    numBar = np.sum([numBarDict.get(i, 0) for i in muttype])
    gap = int(numBar) / 3
    xtickspos = [
        range((numBar + gap) * pos + 1, (numBar + gap) * pos + numBar + 1)
        for pos in range(0, len(listSeqPos))
    ]
    xtickspos = np.array([item for sublist in xtickspos for item in sublist])

    # Make the list of annotations to plot
    listAnnt = []
    listName = []
    listColor = []
    for pos in listSeqPos:

        # Add mismatches
        if 'm' in muttype:
            # Get base at the current position
            currBase = pos[0]
            # Get the 3 other bases at the current position
            allOtherBases = seqlib.allOtherBases(currBase)
            # Make the list of annotations of the 3 possible mismatches at the current position
            mAnnts = [
                consensus + ':1:0:0:' + pos + base + ':::'
                for base in allOtherBases
            ]
            mNames = [pos + base for base in allOtherBases]
            mNames = [
                name[:-1] if i % 3 == 1 else ' '
                for i, name in enumerate(mNames)
            ]
            mColors = [
                allColors.get(base, defaultColor) for base in allOtherBases
            ]
            # Add to listAnnt and listName
            listAnnt.extend(mAnnts)
            listName.extend(mNames)
            listColor.extend(mColors)

        # Add insertions
        if 'i' in muttype:
            # Make the list of annotations of the 4 possible insertions at the current position
            iAnnts = [
                consensus + ':0:1:0::+' + pos[1:] + base + '::'
                for base in allBases
            ]
            iName = ['+' + pos[1:] + base for base in allBases]
            iColors = [allColors.get(base, defaultColor) for base in allBases]
            # Add to listAnnt and listName
            listAnnt.extend(iAnnts)
            listName.extend(iName)
            listColor.extend(iColors)

        # Add deletions
        if 'd' in muttype:
            # Make annotation for the only possible deletion at the current position
            dAnnt = consensus + ':0:0:1:::' + pos + 'x:'
            dName = pos + 'x'
            dColor = allColors.get('x', defaultColor)
            # Add to listAnnt and listName
            listAnnt.append(dAnnt)
            listName.append(dName)
            listColor.append(dColor)

    # Check if annotations exist in df, if not remove from listAnnt, listName, listColor, and xtickspos
    if df.index.name == 'annotation':
        listValidAnnt = [
            i for i, annt in enumerate(listAnnt) if annt in df.index
        ]
    else:
        df2 = df.set_index('annotation')
        listValidAnnt = [
            i for i, annt in enumerate(listAnnt) if annt in df2.index
        ]

    listAnnt = [item for i, item in enumerate(listAnnt) if i in listValidAnnt]
    listName = [item for i, item in enumerate(listName) if i in listValidAnnt]
    listColor = [
        item for i, item in enumerate(listColor) if i in listValidAnnt
    ]
    xtickspos = np.array(
        [item for i, item in enumerate(xtickspos) if i in listValidAnnt])

    # Call plotVariants to plot
    if not colorbymut:
        listColor = None
    if fullname:
        listName = None
    if collapse:
        xtickspos = None

    return plotVariants(df,
                        listAnnt,
                        listName=listName,
                        color=listColor,
                        _xticks=xtickspos,
                        xticklabelrot=90,
                        **kwargs)
示例#13
0
def plotTertSMcoop(
        dfUnqClusters,
        listConsensus,
        listSeqPos,
        refConsensus,
        field='params2.median',
        listConsensusName=None,
        vmin=None,
        vmax=None,
        cmap='RdBu',
        c_bad='0.55',
        robust=True,
        figsize=None,
        figunitheight=1,
        figunitwidth=1,
        maxfigwidth=32,
        actLabel=r'$\mathrm{\mathsf{\Delta\Delta G^{\ddag}\ (kcal\ mol^{-1})}}$',
        show=True,
        **kwargs):
    """Plot tertiary contact-single point mutant cooperivity heatmap"""
    # Define constants
    R = 1.9872041e-3
    T = 293.0

    # Make dfUnqClusters indexed by annotation if not already
    if dfUnqClusters.index.name == 'annotation':
        dfUnqClusters2 = dfUnqClusters.copy()
    else:
        dfUnqClusters2 = dfUnqClusters.set_index('annotation')

    # Make an empty dataframe for the TC-SPM matrix
    mat_kobs = pd.DataFrame(columns=listConsensus)
    mat_ddG = pd.DataFrame(columns=listConsensus)

    # Make an empty series for the consensus variants
    seriesConsensusAct = pd.Series(index=listConsensus)

    # Make the list of annotations to plot
    listConsensusAct = []
    for con in listConsensus:

        listAnnt = []
        listName = []
        for pos in listSeqPos:

            # Get base at the current position
            currBase = pos[0]
            # Get the 3 other bases at the current position
            allOtherBases = seqlib.allOtherBases(currBase)
            # Make the list of annotations of the 3 possible mismatches at the current position
            mAnnts = [
                con + ':1:0:0:' + pos + base + ':::' for base in allOtherBases
            ]
            mNames = [pos + base for base in allOtherBases]
            # Add to listAnnt and listName
            listAnnt.extend(mAnnts)
            listName.extend(mNames)

        # Get the activity of the consensus variants
        seriesConsensusAct[con] = 1. / dfUnqClusters2.loc[
            con + ':0:0:0::::'][field] * 60
        # Add the list of activity of the annotations from the current consensus as a column
        mat_kobs[con] = (1. / dfUnqClusters2.loc[listAnnt][field] *
                         60).tolist()

    # Compute delta delta G from k_obs
    for con in listConsensus:
        mat_ddG[con] = -R * T * np.log(
            (mat_kobs[con] * seriesConsensusAct[refConsensus]) /
            (mat_kobs[refConsensus] * seriesConsensusAct[con]))

    #
    if listConsensusName is not None:
        mat_kobs.columns = listConsensusName
        mat_ddG.columns = listConsensusName

    #
    mat_kobs.columns.name = 'Tertiary contact knockouts'
    mat_ddG.columns.name = 'Tertiary contact knockouts'

    # Set single mutant names
    mat_kobs['Single point mutants'] = listName
    mat_kobs = mat_kobs.set_index('Single point mutants').transpose()
    mat_ddG['Single point mutants'] = listName
    mat_ddG = mat_ddG.set_index('Single point mutants').transpose()

    # Define colormap
    if isinstance(cmap, basestring):
        cmap = plt.get_cmap(cmap)
    cmap.set_bad(c_bad, 0.8)

    # Make mask for nan data
    mask = ~np.isfinite(mat_ddG)

    # Plot heatmap
    if not figsize:
        figwidth = min(figunitwidth * len(listAnnt), maxfigwidth)
        figheight = figunitheight * len(listConsensus) + 0.5
        figsize = (figwidth, figheight)
    fig, ax = plt.subplots(1, 1, figsize=figsize)
    cbar_ax = fig.add_axes([.78, .83, .2, .05])
    sns.heatmap(mat_ddG,
                ax=ax,
                square=True,
                mask=mask,
                robust=robust,
                vmin=vmin,
                vmax=vmax,
                center=0,
                cmap=cmap,
                cbar_ax=cbar_ax,
                cbar_kws={'orientation': 'horizontal'})
    setproperties(ax=cbar_ax, fontsize=21, xlabel=actLabel)
    setproperties(ax=ax,
                  tickfontsize=22,
                  yticklabelrot=0,
                  labelfontsize=28,
                  tight=True,
                  pad=1.0)

    if show:
        plt.show(block=False)

    return