def plotErrorByDeltaGBin(self, binedges=np.arange(-13, -6.1, 0.1), ylim=[0,0.8], min_n=5, xlim=None, ax=None, color='r', marker='>'):
        """ Plot how the ci changes with dG. """
        variant_table = self.variant_table

        variant_table.loc[:, 'ci_width'] = (variant_table.dG_ub - variant_table.dG_lb)/2
        variant_table.loc[:, 'dG_bin'] = np.digitize(variant_table.dG, binedges)
        
        x, y, yerr = returnFractionGroupedBy(variant_table.loc[variant_table.numTests >= min_n], 'dG_bin', 'ci_width')
        binstart = binedges[0]
        binwidth = (binedges[1] - binedges[0])
        bincenters = np.arange(binedges[0]-binwidth*.5, binedges[-1]+2*binwidth, binwidth)
        x = bincenters[np.array(x).astype(int)]
        if ax is None:
            fig = plt.figure(figsize=(3,3))
            ax = fig.add_subplot(111)
            plt.subplots_adjust(left=0.2, bottom=0.2, top=0.95, right=0.95)
        ax.scatter(x, y, c=color, edgecolor='k', linewidth=0.5, marker=marker, s=20)
        ax.errorbar(x, y, yerr, fmt='-', elinewidth=1, capsize=0, capthick=1,
                    color=color, linestyle='', ecolor='k', linewidth=0.5)


        plt.xlabel('$\Delta G$ (kcal/mol)')
        plt.ylabel('average error (kcal/mol)')
        plt.ylim(ylim)
        plt.xlim(xlim)
        fix_axes(ax)
 def plotHex(self, xlim=[0, 800], min_fluorescence=None):
     x = self.x
     y = self.y
     
     fig = plt.figure(figsize=(4,3))
     ax = fig.add_subplot(111, aspect='equal')
     im = ax.hexbin(x, y, cmap='Spectral_r', mincnt=1, bins='log', extent=xlim+xlim)
     plt.colorbar(im)
     #slope, intercept, r_value, p_value, std_err = st.linregress(x.loc[index],y.loc[index])
     
     ax.set_xlim(xlim)
     ax.set_ylim(xlim)
     
     index = self.getGoodClusters(min_fluorescence=min_fluorescence)
     #plt.plot(xlim, xlim*slope + intercept, 'k')
     slope = (y.loc[index]/x.loc[index]).median()
     plt.plot(xlim, np.array(xlim)*slope, 'k:')
     plt.plot(xlim, xlim, color='0.5', alpha=0.5 )
     plt.xlabel('signal in image 1')
     plt.ylabel('signal in image 2')
     annotation_text = ('slope (median)=%4.2f'%(slope))
     plt.annotate(annotation_text, xy=(.05, .95), xycoords='axes fraction',
                     horizontalalignment='left', verticalalignment='top')
     plt.tight_layout()
     fix_axes(ax)
     return x, y, index, 
 def plotFractionNotDifferentByN(self, offset=0, xlim=[0,25], rep=1):
     """ Plot fraction not different between replicates. """ 
     
     # get data to plot
     combined = self.findCombinedTable(offset=offset)
     x, y, yerr = returnFractionGroupedBy(combined, 'rep%d_n'%rep, 'within_bound')
     
     fig = plt.figure(figsize=(4,3));
     ax = fig.add_subplot(111)
     
     ax.scatter(x, y, c='r', edgecolor='k', linewidth=0.5, marker='>')
     ax.errorbar(x, y, yerr, fmt='-', elinewidth=1, capsize=2, capthick=1,
                 color='r', linestyle='', ecolor='k', linewidth=1)
     majorLocator   = mpl.ticker.MultipleLocator(5)
     majorFormatter = mpl.ticker.FormatStrFormatter('%d')
     minorLocator   = mpl.ticker.MultipleLocator(1)
     ax.xaxis.set_major_locator(majorLocator)
     ax.xaxis.set_major_formatter(majorFormatter)
     ax.xaxis.set_minor_locator(minorLocator)
     plt.xlabel('number of measurements')
     plt.ylabel('fraction not different from replicate')
     plt.ylim(0, 1)
     plt.xlim(xlim)
     fix_axes(ax)
     plt.tight_layout()
 def plotBootstrappedDist(self, variant, param, log_axis=False):
     """Plot the distribution of a param."""
     variant_table = self.variant_table
     cluster_table = self.cluster_table
     subSeries = self.getVariantBindingSeries(variant)
     
     params = cluster_table.loc[subSeries.index, param]
     
     # make bootstrapped dist
     if log_axis:
         vec = np.log10(params.dropna())
         med = np.log10(variant_table.loc[variant, param])
         ub = np.log10(variant_table.loc[variant, param+'_ub'])
         lb = np.log10(variant_table.loc[variant, param+'_lb'])
         xlabel = 'log '+param
     else:
         vec = params.dropna()
         med = variant_table.loc[variant, param]
         ub = variant_table.loc[variant, param+'_ub']
         lb = variant_table.loc[variant, param+'_lb']
         xlabel = param
     plt.figure(figsize=(4,3))
     sns.distplot(vec, color='r', kde=False)
     plt.axvline(med, color='0.5', linestyle='-')
     plt.axvline(lb, color='0.5', linestyle=':')
     plt.axvline(ub, color='0.5', linestyle=':')
     plt.xlabel(xlabel)
     fix_axes(plt.gca())
     plt.tight_layout()
 def plotKdVersusKoff(self, ):
     parameters = fitting.fittingParameters()
     results_off = self.offRate.variant_table.loc[self.all_variants]
     dG = self.affinityData.variant_table.dG.loc[self.all_variants]
     
     plt.figure(figsize=(4,4))
     plt.hexbin(dG, results_off.koff, yscale='log', mincnt=1, cmap='Spectral_r')
     plt.xlabel('$\Delta G$ (kcal/mol)')
     plt.ylabel('$k_{off}$ (s)')
     fix_axes(plt.gca())
     plt.tight_layout()
 def plotNumberOfMeasurments(self, xlim=[1,100], ax=None):
     """ Plot how the ci changes with dG. """
     variant_table = self.variant_table
     if ax is None:
         fig = plt.figure(figsize=(4,3))
         ax = fig.add_subplot(111)
         plt.subplots_adjust(left=0.15, bottom=0.15, top=0.95, right=0.95)
     sns.distplot(variant_table.numTests, bins=np.arange(*xlim), kde=False, hist_kws={'histtype':'stepfilled', 'linewidth':1}, color='0.5')
     plt.xlabel('number of measurements')
     plt.ylabel('number of variants')
     plt.xlim(xlim)
     fix_axes(ax)
 def plotSlopeVersusSignal(self, min_fluorescence=None):
     x = self.x
     y = self.y
     index = self.getGoodClusters(min_fluorescence=min_fluorescence)
     
     slopes = y.loc[index]/x.loc[index]
     mean_signal = (x + y).loc[index]/2.
     
     fig = plt.figure(figsize=(4,3))
     ax = fig.add_subplot(111,)
     im = ax.hexbin(mean_signal, np.log2(slopes), cmap='Spectral_r', mincnt=1, bins='log')
     plt.xlabel('mean signal per cluster')
     plt.ylabel('signal in image2/image1')
     plt.tight_layout()
     fix_axes(plt.gca())
 def findOptimalOffset(self):
     """ Find the offset that minimizes the fraction different between replicates. """
     offsets = np.linspace(-1, 1, 100)
     number = pd.Series(index=offsets)
     total_number = float(len(self.findCombinedTable()))
     for offset in offsets:
         number.loc[offset] = self.findCombinedTable(offset=offset).within_bound.sum()/total_number
     
     plt.figure(figsize=(4,3));
     plt.plot(offsets, number)
     plt.xlabel('offset')
     plt.ylabel('number within bound')
     plt.tight_layout()
     fix_axes(plt.gca())
     return number.idxmax()
 def plotEquilibrationTimes(self, concentration, wait_time, initial=1E-9):
     parameters = fitting.fittingParameters()
     variants = self.getGoodVariants()
     koff = self.offRate.variant_table.loc[variants].koff
     dG = self.affinityData.variant_table.loc[variants].dG
     kds = parameters.find_Kd_from_dG(dG.astype(float))*1E-9
     
     # for meshgrid
     kobs_bounds = [-5, 0]
     koff_bounds = [-4, -2]
     xx, yy = np.meshgrid(np.linspace(*koff_bounds, num=200),
                          np.linspace(*kobs_bounds, num=200))
     
     # for each kobs = x(i,j)+y(i,j), plot fraction equilibrated
     labels = np.array([0, 0.2, 0.4, 0.6, 0.8, 0.9, 0.95, 0.99, 1-1E-12])
     min_kobs = kobs_bounds[0] + np.log10(concentration/initial)
     plt.figure(figsize=(4,4))
     cs =plt.contour(xx, yy, 100*fraction_equilibrated(np.power(10, xx)+np.power(10, yy),
                                                   wait_time),
                     labels*100, colors='k', linewidths=1)
     plt.clabel(cs, inline=1, fontsize=10, fmt='%1.0f')
     
     plt.hexbin(np.log10(koff), np.log10(concentration*koff/kds), mincnt=1,
                cmap='Spectral_r', extent=koff_bounds+[min_kobs, min_kobs+2], gridsize=150)
     plt.xlabel('log$(k_{off})$')
     plt.ylabel('log$([$flow$] k_{on})$')
     plt.title('%.2e'%(concentration*1E9))
     plt.ylim(min_kobs, min_kobs+2)
     ax = fix_axes(plt.gca())
     plt.tight_layout()
 def plotErrorByNumberofMeasurements(self, xlim=[1,100], ylim=[0,1], ax=None, color='r', marker='>'):
     """ Plot how the ci changes with number of measruements. """
     variant_table = self.variant_table
     variant_table.loc[:, 'ci_width'] = (variant_table.dG_ub - variant_table.dG_lb)/2
     x, y, yerr = returnFractionGroupedBy(variant_table, 'numTests', 'ci_width')
     if ax is None:
         fig = plt.figure(figsize=(4,3))
         ax = fig.add_subplot(111)
         plt.subplots_adjust(left=0.15, bottom=0.15, top=0.95, right=0.95)
     ax.scatter(x, y, c=color, edgecolor='k', linewidth=0.5, marker=marker, s=5)
     ax.errorbar(x, y, yerr, fmt='-', elinewidth=1, capsize=0, capthick=1,
                 color=color, linestyle='', ecolor='k', linewidth=0.5)
     plt.xlabel('number of measurements')
     plt.ylabel('average error (kcal/mol)')
     plt.xlim(xlim)
     plt.ylim(ylim)
     fix_axes(ax)
 def plotFractionFit(self):
     """Plot the fraction fit."""
     variant_table = self.variant_table
     pvalue_cutoff = 0.01
     # plot
     binwidth=0.01
     bins=np.arange(0,1+binwidth, binwidth)
     plt.figure(figsize=(4, 3.5))
     plt.hist(variant_table.loc[variant_table.pvalue <= pvalue_cutoff].fitFraction.values,
              alpha=0.5, color='red', bins=bins, label='passing cutoff')
     plt.hist(variant_table.loc[variant_table.pvalue > pvalue_cutoff].fitFraction.values,
              alpha=0.5, color='grey', bins=bins,  label='fails cutoff')
     plt.ylabel('number of variants')
     plt.xlabel('fraction fit')
     plt.legend(loc='upper left')
     plt.tight_layout()
     fix_axes(plt.gca())
 def plotFractionNotDifferentByDeltaG(self, offset=0, binedges=np.linspace(-12, -6, 12), min_n=0):
     combined = self.findCombinedTable(offset=offset, binedges=binedges)
     x, y, yerr = returnFractionGroupedBy(combined, 'rep1_bin', 'within_bound')
 
     fig = plt.figure(figsize=(4,3));
     ax = fig.add_subplot(111)
     
     ax.scatter(x, y, c='r', edgecolor='k', linewidth=0.5, marker='o')
     ax.errorbar(x, y, yerr, fmt='-', elinewidth=1, capsize=2, capthick=1,
                 color='r', linestyle='', ecolor='k', linewidth=1)
     binlabels = binedges
     
     plt.xlabel('$\Delta G$')
     plt.ylabel('fraction not different from replicate')
     plt.ylim(0, 1)
     plt.xticks(np.arange(1, len(binedges)+1)-0.5, ['%.2f'%i for i in binedges], rotation=90)
     plt.xlim([-0.5, len(binedges)+0.5])
     fix_axes(ax)
     plt.tight_layout()
    def compareParam(self, param, log_axes=False, filter_pvalue=False, min_n=0,
                     max_dG=None, variants=None):
        """ Compare measured values for two experiments.
        
        Can apply different cutoffs. Default is to use all values that are not
        NaN in both. Can also apply 'pvalue' cutoff, min_n measurements, maxdG,
        or you can provide list of variants in which case these override any cutoffs.
        
        """
        x = self.expt1.variant_table.loc[self.all_variants, param]
        y = self.expt2.variant_table.loc[self.all_variants, param]
        
        
        if variants is None:
            variants = np.logical_not(pd.concat([x, y], axis=1).isnull()).all(axis=1)
            if filter_pvalue:
                variants = variants&self.getGoodVariants()

            if min_n > 0:
                variants = (variants&
                            pd.concat([self.expt1.variant_table.numTests >= min_n,
                                      self.expt2.variant_table.numTests >= min_n], axis=1).all(axis=1))
            if max_dG is not None:
                variants = (variants&
                            pd.concat([self.expt1.variant_table.dG <= max_dG,
                                       self.expt2.variant_table.dG <= max_dG], axis=1).all(axis=1))
        
        x = x.loc[variants]
        y = y.loc[variants]
            
            
        plt.figure(figsize=(4,4))
        if log_axes:
            plt.hexbin(x, y, cmap='Spectral_r', mincnt=1, yscale='log', xscale='log')
        else:
            plt.hexbin(x, y, cmap='Spectral_r', mincnt=1,)
        plt.xlabel('expt1 %s'%param)
        plt.ylabel('expt2 %s'%param)
        ax = fix_axes(plt.gca())
        plt.tight_layout()
        
        # plot linear fit
        xlim = np.array(ax.get_xlim())
        
        if log_axes:
            slope, intercept, r_value, p_value, std_err = st.linregress(np.log(x),np.log(y))
            plt.plot(xlim, np.power(xlim, slope)*np.exp(intercept), 'k', linewidth=1)
            plt.plot(xlim, xlim/np.mean(x)*np.mean(y), 'r:', linewidth=1)
        else:
            slope, intercept, r_value, p_value, std_err = st.linregress(x, y)
            plt.plot(xlim, xlim*slope+intercept, 'k', linewidth=1)
            plt.plot(xlim, xlim-np.mean(x)+np.mean(y), 'r:', linewidth=1)
        
        
        return slope, r_value**2
 def findAlpha(self, n):
     x = self.x
     y = self.y
     index = (x>0)&(y>0)
     #vec = np.exp(np.log(y/x)/n).loc[index]
     vec = np.power(y/x, 1/float(n)).loc[index]
     alpha = vec.median()
     lb, ub = bootstrap.ci(vec, statfunction=np.median, n_samples=1000)
     xlim = [0.9 ,1.1]
     bins = np.arange(0.898, 1.1, 0.001)
     plt.figure(figsize=(4,3));
     sns.distplot(vec, bins=bins, hist_kws={'histtype':'stepfilled'}, kde_kws={'clip':xlim});
     plt.axvline(alpha, color='k', linewidth=0.5);
     plt.axvline(lb, color='k', linestyle=':', linewidth=0.5);
     plt.axvline(ub, color='k', linestyle=':', linewidth=0.5);
     plt.xlabel('photobleach fraction per image');
     plt.ylabel('probability');
     plt.xlim(xlim)
     fix_axes(plt.gca());
     plt.tight_layout()
     return alpha, lb, ub
    def bootstrapSlope(self, min_fluorescence=None, log_axis=False):
        x = self.x
        y = self.y
        index = self.getGoodClusters(min_fluorescence=min_fluorescence)
        
        if log_axis:
            slopes = np.log(y.loc[index]/x.loc[index])
            bins = np.linspace(-3, 3, 100)
            xlim = [-3, 3]
        else:
            slopes = y.loc[index]/x.loc[index]
            bins = np.linspace(0, 5, 100)
            xlim = [0, 2]       
        
        plt.figure(figsize=(4,4));
        sns.distplot(slopes, bins=bins, kde_kws={'clip':xlim},
                     hist_kws={'histtype':'stepfilled'}, color='0.5')
        
        plt.axvline(slopes.median(), color='k', linewidth=1)
        
        # find probability distribution max
        digitized = np.digitize(slopes, bins)
        mode = st.mode(digitized).mode[0]
        max_prob = (bins[mode] + bins[mode-1])*0.5
        plt.axvline(max_prob, color='r', linewidth=1, linestyle=':', label='max probability')

        annotation_text = ('slope (median)=%4.3f\nslope (max prob)=%4.5f'%(slopes.median(), max_prob))
        plt.annotate(annotation_text, xy=(.05, .95), xycoords='axes fraction',
                        horizontalalignment='left', verticalalignment='top')
        
        #lb, ub = bootstrap.ci(slopes, statfunction=np.median, n_samples=1000)
        #for bound in [lb, ub]:
        #    plt.axvline(bound, color='k', linewidth=1, linestyle=':')
        plt.xlabel('slope')
        plt.ylabel('probability distribution')
        plt.xlim(xlim)
        plt.tight_layout()
        fix_axes(plt.gca())
        return slopes
    def plotDeltaGDoubleDagger(self, variant=None, dG_cutoff=None, plot_on=False, params=['koff', 'dG'], variants=None):
        parameters = fitting.fittingParameters()
        koff = self.offRate.variant_table.loc[self.all_variants, params[0]].astype( float)
        dG = self.affinityData.variant_table.loc[self.all_variants, params[1]].astype(float)

        if variant is None:
            variant = 34936
        
        # find dG predicted from off and on rates
        dG_dagger = parameters.find_dG_from_Kd(koff.astype(float))
        kds = parameters.find_Kd_from_dG(dG)
        dG_dagger_on = parameters.find_dG_from_Kd((koff/kds).astype(float))
        
        # find the variants to plot based on goodness of fit and dG cutoff if given
        if variants is None:
            variants = self.getGoodVariants()
            if dG_cutoff is not None:
                variants = variants&(dG < dG_cutoff)
            
        # deced which y to plot based on user input
        x = (dG - dG.loc[variant]).loc[variants]
        if plot_on:
            y = -(dG_dagger_on - dG_dagger_on.loc[variant]).loc[variants]
        else:
            y = (dG_dagger - dG_dagger.loc[variant]).loc[variants]
        
        # plot
        fig = plt.figure(figsize=(3,3));
        ax = fig.add_subplot(111, aspect='equal')
        
        xlim = [min(x.min(), y.min()), max(x.max(), y.max())]
        im = plt.hexbin(x, y,  extent=xlim+xlim, gridsize=100, cmap='Spectral_r', mincnt=1)
        #sns.kdeplot(x, z,  cmap="Blues", shade=True, shade_lowest=False)
        slope, intercept, r_value, p_value, std_err = st.linregress(x,y)
        
        # offset

        num_variants = 100
        offset = (x - y).mean()
        
        xlim = np.array(ax.get_xlim())
        plt.plot(xlim, xlim*slope + intercept, 'k--', linewidth=1)
        if plot_on:
            plt.plot(xlim, [y.mean()]*2, 'r', linewidth=1)
        else:
            plt.plot(xlim, xlim, 'r', linewidth=1)
        plt.xlabel('$\Delta \Delta G$')
        plt.ylabel('$\Delta \Delta G_{off}\dagger$')
        
        #plt.colorbar(im)
        plt.tight_layout()
        fix_axes(ax)

        annotationText = ['slope = %4.2f '%(slope),
                          'intercept = %4.2f'%intercept,
                          'pvalue = %4.1e'%p_value
                          ]
        ax.annotate('\n'.join(annotationText), xy=(.05, .95), xycoords='axes fraction',
                    horizontalalignment='left', verticalalignment='top')
        
        return x, y