Exemplo n.º 1
0
def plotDocUsageForProposal(docUsageByUID, savefilename=None, **kwargs):
    ''' Make trace plot of doc usage for each component.
    '''
    pylab.figure()
    L = 0
    maxVal = 0
    for k, uid in enumerate(docUsageByUID):
        ys = np.asarray(docUsageByUID[uid])
        xs = np.arange(0, ys.size)
        if k < 6: # only a few labels fit well on a legend
            pylab.plot(xs, ys, label=uid)
        else:
            pylab.plot(xs, ys)
        L = np.maximum(L, ys.size)
        maxVal = np.maximum(maxVal, ys.max())
    # Use big chunk of left-hand side of plot for legend display
    xlims = np.asarray([-0.75*L, L-0.5])
    pylab.xlim(xlims)
    pylab.xticks(np.arange(1, L))
    pylab.ylim([0, 1.1*maxVal])
    pylab.xlabel('num proposal steps')
    pylab.ylabel('num docs using each comp')
    pylab.legend(loc='upper left', fontsize=12)
    pylab.subplots_adjust(left=0.2)
    if savefilename is not None:
        pylab.savefig(savefilename, pad_inches=0)
        pylab.close('all')
Exemplo n.º 2
0
def plotGauss1D(mu, sigma2, color='b', ax_handle=None, **kwargs):
    if ax_handle is not None:
        pylab.sca(ax_handle)

    mu = np.squeeze(mu)
    sigma = np.sqrt(np.squeeze(sigma2))

    assert mu.size == 1 and mu.ndim == 0
    assert sigma.size == 1 and sigma.ndim == 0

    xs = mu + sigma * np.arange(-4, 4, 0.01)
    ps = 1. / np.sqrt(2 * np.pi) * 1. / sigma * \
        np.exp(-0.5 * (xs - mu)**2 / sigma**2)
    pylab.plot(xs, ps, '.', markerfacecolor=color, markeredgecolor=color)
Exemplo n.º 3
0
def plotGauss2DContour(
    mu,
    Sigma,
    color='b',
    radiusLengths=[1.0, 3.0],
    markersize=3.0,
    ax_handle=None,
):
    ''' Plot elliptical contours for provided mean mu, covariance Sigma.

    Uses only the first 2 dimensions.

    Post Condition
    --------------
    Plot created on current axes
    '''
    if ax_handle is not None:
        pylab.sca(ax_handle)

    mu = np.asarray(mu)
    Sigma = np.asarray(Sigma)

    mu = mu[:2]
    Sigma = Sigma[:2, :2]
    D, V = np.linalg.eig(Sigma)
    sqrtSigma = np.dot(V, np.sqrt(np.diag(D)))

    # Prep for plotting elliptical contours
    # by creating grid of (x,y) points along perfect circle
    ts = np.arange(-np.pi, np.pi, 0.03)
    x = np.sin(ts)
    y = np.cos(ts)
    Zcirc = np.vstack([x, y])

    # Warp circle into ellipse defined by Sigma's eigenvectors
    Zellipse = np.dot(sqrtSigma, Zcirc)

    # plot contour lines across several radius lengths
    # TODO: instead, choose radius by percentage of prob mass contained within
    for r in radiusLengths:
        Z = r * Zellipse + mu[:, np.newaxis]
        pylab.plot(Z[0],
                   Z[1],
                   '.',
                   markersize=markersize,
                   markerfacecolor=color,
                   markeredgecolor=color)
Exemplo n.º 4
0
def makePlot(muVals=[(0.01,0), (0.1,0), (1,0), (10,0)], doCorrection=1):
    pylab.figure()
    xgrid = np.linspace(-8, 8, 2000)
    pylab.hold('on')
    pylab.plot(xgrid, np.zeros_like(xgrid), ':', alpha=0.2)
    for mu1, mu2 in muVals:
        ygrid = calcBregDiv_Gauss1D(xgrid, mu1, mu2, doCorrection=doCorrection)
        print ygrid.min()
        pylab.plot(xgrid, ygrid, label='mu1=% 6.2f mu2=% 6.2f' % (mu1, mu2))
    pylab.legend(loc='lower right')
    pylab.xlim([xgrid.min(), xgrid.max()])
    pylab.ylim([xgrid.min(), xgrid.max()])
    pylab.xlabel('x')
    if doCorrection:
        pylab.ylabel('D(x, \mu) + correction')
    else:
        pylab.ylabel('D(x, \mu)')
Exemplo n.º 5
0
def makePlot(muVals=[0.01, 0.1, 1, 10], B=1e-10, nu=2, justMahalTerm=0):
    pylab.figure()
    xgrid = np.linspace(0, 8, 2000)
    pylab.hold('on')
    for mu in muVals:
        ygrid = calcBregDiv_ZeroMean(xgrid,
                                     mu,
                                     B=B,
                                     nu=nu,
                                     justMahalTerm=justMahalTerm)
        pylab.plot(xgrid, ygrid, linewidth=2, label='\mu=%6.2f' % (mu))
    pylab.legend(loc='upper right')
    pylab.xlim([-0.1, xgrid.max()])
    pylab.ylim([-0.1, xgrid.max()])
    pylab.xlabel('x')
    pylab.ylabel('D(x, \mu)')
    pylab.title('B=%s  nu=%s' % (str(B), str(nu)))
Exemplo n.º 6
0
def plotELBOtermsForProposal(
        curLdict, propLdictList,
        xs=None,
        ymin=-0.5,
        ymax=0.5,
        savefilename=None,
        **kwargs):
    ''' Create trace plot of ELBO gain/loss relative to current model.
    '''
    pylab.figure()
    L = len(propLdictList)
    if xs is None:
        xs = np.arange(0, L)
    legendKeys = []
    for key in curLdict:
        if key.count('_') == 0:
            legendKeys.append(key)
    for key in legendKeys:
        if key.count('total'):
            linewidth= 4
            alpha = 1
            style = '-'
        else:
            linewidth = 3
            alpha = 0.5
            style = '--'
        ys = np.asarray([propLdictList[i][key] for i in range(L)])
        ys -= curLdict[key]
        pylab.plot(xs, ys, style,
                   color=_getLineColorFromELBOKey(key),
                   linewidth=linewidth,
                   alpha=alpha,
                   label=key)
    L = L + 1
    xlims = np.asarray([-0.75*L, L-0.5])
    pylab.xlim(xlims)
    pylab.xticks(xs)
    pylab.plot(xlims, np.zeros_like(xlims), 'k:')
    pylab.xlabel('num proposal steps')
    pylab.ylabel('L gain (prop - current)')
    pylab.legend(loc='lower left', fontsize=12)
    pylab.subplots_adjust(left=0.2)
    if savefilename is not None:
        pylab.savefig(savefilename, pad_inches=0)
        pylab.close('all')
Exemplo n.º 7
0
def plotSingleLineAcrossJobsByXVar(jpathPattern,
                                   label='',
                                   xvar=None,
                                   xvals=None,
                                   xlabel=None,
                                   yvar='evidence',
                                   lineStyle='.-',
                                   taskids='all',
                                   lineID=0,
                                   lvar='',
                                   **kwargs):
    ''' Create line plot in current figure for job matching the pattern

    Iterates over each xval in provided list of values.
    Each one corresponds to a single saved job.

    Post Condition
    --------------
    Current axes have one line added.
    '''
    prefixfilepath = os.path.sep.join(jpathPattern.split(os.path.sep)[:-1])
    PPListMap = makePPListMapFromJPattern(jpathPattern)
    if xvals is None:
        xvals = PPListMap[xvar]

    xs = np.zeros(len(xvals))
    ys = np.zeros(len(xvals))
    jpathList = makeListOfJPatternsWithSpecificVals(
        PPListMap,
        prefixfilepath=prefixfilepath,
        key=xvar,
        vals=xvals,
        **kwargs)

    plotargs = copy.deepcopy(DefaultLinePlotKwArgs)
    # Plot all tasks as faint points with no connections
    for i, jobpath in enumerate(jpathList):
        if not os.path.exists(jobpath):
            raise ValueError("PATH NOT FOUND: %s" % (jobpath))
        x = float(xvals[i])

        for key in plotargs:
            if key in kwargs:
                plotargs[key] = kwargs[key]
        plotargs['markeredgecolor'] = plotargs['color']

        alltaskids = BNPYArgParser.parse_task_ids(jobpath, taskids)
        for tid in alltaskids:
            y = loadYValFromDisk(jobpath, tid, yvar=yvar)
            pylab.plot(x, y, '.', **plotargs)

    # Plot top-ranked tasks as solid points connected by line
    for i, jobpath in enumerate(jpathList):
        rankTasksForSingleJobOnDisk(os.path.join(jobpath))
        x = float(xvals[i])
        y = loadYValFromDisk(jobpath, '.best', yvar=yvar)
        assert isinstance(x, float)
        assert isinstance(y, float)
        xs[i] = x
        ys[i] = y

    plotargs = copy.deepcopy(DefaultLinePlotKwArgs)
    for key in plotargs:
        if key in kwargs:
            plotargs[key] = kwargs[key]
    plotargs['markeredgecolor'] = plotargs['color']
    plotargs['label'] = label
    pylab.plot(xs, ys, lineStyle, **plotargs)

    if lineID == 0:
        if xlabel is None:
            xlabel = xvar
        pylab.xlabel(xlabel)
        pylab.ylabel(LabelMap[yvar])
Exemplo n.º 8
0
def plotGauss2DFromHModel(hmodel,
                          compListToPlot=None,
                          compsToHighlight=None,
                          activeCompIDs=None,
                          MaxKToDisplay=50,
                          proba_thr=0.0001,
                          ax_handle=None,
                          dataset=None,
                          Colors=Colors,
                          **kwargs):
    ''' Plot 2D contours for components in hmodel in current pylab figure

    Args
    ----
    hmodel : bnpy HModel object
    compListToPlot : array-like of integer IDs of components within hmodel
    compsToHighlight : int or array-like
        integer IDs to highlight
        if None, all components get unique colors
        if not None, only highlighted components get colors.
    proba_thr : float
        Minimum weight assigned to component in order to be plotted.
        All components with weight below proba_thr are ignored.
    '''
    if ax_handle is not None:
        pylab.sca(ax_handle)

    if compsToHighlight is not None:
        compsToHighlight = np.asarray(compsToHighlight)
        if compsToHighlight.ndim == 0:
            compsToHighlight = np.asarray([compsToHighlight])
    else:
        compsToHighlight = list()
    if compListToPlot is None:
        compListToPlot = np.arange(0, hmodel.obsModel.K)
    if activeCompIDs is None:
        activeCompIDs = np.arange(0, hmodel.obsModel.K)

    # Load appearance probabilities as single vector
    if hmodel.allocModel.K == hmodel.obsModel.K:
        w = hmodel.allocModel.get_active_comp_probs()
    else:
        w = np.ones(hmodel.obsModel.K)

    if dataset is not None and hasattr(dataset, 'X'):
        pylab.plot(dataset.X[:, 0],
                   dataset.X[:, 1],
                   '.',
                   color=(.3, .3, .3),
                   alpha=0.5)

    nSkip = 0
    nGood = 0
    for ii, compID in enumerate(compListToPlot):
        if compID not in activeCompIDs:
            continue

        kk = np.flatnonzero(activeCompIDs == compID)
        assert kk.size == 1
        kk = kk[0]

        if w[kk] < proba_thr and compID not in compsToHighlight:
            nSkip += 1
            continue

        mu = hmodel.obsModel.get_mean_for_comp(kk)
        Sigma = hmodel.obsModel.get_covar_mat_for_comp(kk)

        if len(compsToHighlight) == 0 or compID in compsToHighlight:
            color = Colors[ii % len(Colors)]
            plotGauss2DContour(mu, Sigma, color=color)
        elif kk not in compsToHighlight:
            plotGauss2DContour(mu, Sigma, color='k')

        nGood += 1
        if nGood >= MaxKToDisplay:
            print('DISPLAY LIMIT EXCEEDED. Showing %d/%d components' \
                % (nGood, len(activeCompIDs)))
            break
    if nSkip > 0:
        print('SKIPPED %d comps with size below %.2f' % (nSkip, proba_thr))
Exemplo n.º 9
0
def tryDeleteProposalForSpecificTarget_HDPTopicModel(
        Data,
        hmodel,
        LPkwargs=dict(),
        ktarget=0,
        kabsorbList=[1],
        verbose=True,
        doPlotComps=True,
        doPlotELBO=True,
        doPlotDocTopicCount=False,
        nELBOSteps=3,
        nUpdateSteps=5,
        d_initTargetDocTopicCount='warm_start',
        d_initWordCounts='none',
        **kwargs):
    ''' Execute merge for specific whole dataset

    Returns
    -------
    propModel : HModel
    propSS : SuffStatBag
    propLscore : scalar real
        ELBO score of proposed model
    curModel : HModel
    curSS : SuffStatBag
    curLscore : scalar real
        ELBO score of current model
    '''
    kabsorbList = parse_list_of_absorbing_comps(
        kabsorbList, ktarget, hmodel.obsModel.K)

    from bnpy.allocmodel.topics.HDPTopicRestrictedLocalStep2 \
        import summarizeRestrictedLocalStep_HDPTopicModel
    curModel = hmodel.copy()
    propModel = hmodel.copy()

    # Update current model
    if verbose:
        print ""
        print "Loading model from disk and performing local step..."
    starttime = time.time()
    curLP = curModel.calc_local_params(Data, **LPkwargs)
    curSS = curModel.get_global_suff_stats(Data, curLP, doPrecompEntropy=1)
    curModel.update_global_params(curSS)
    curLdict = curModel.calc_evidence(SS=curSS, todict=1)
    curLscore = curLdict['Ltotal']
    if verbose:
        print "%5.1f sec to obtain current model, LP, and SS" % (
            time.time() - starttime)

    nontrivialdocIDs = np.flatnonzero(curLP['DocTopicCount'][:, ktarget] > .01)
    sort_mask = np.argsort(-1*curLP['DocTopicCount'][nontrivialdocIDs, ktarget]) 
    nontrivialdocIDs = nontrivialdocIDs[sort_mask]
    docIDs = nontrivialdocIDs[:5]
    if verbose:
        print ""
        print "Proposing deletion of cluster %d" % (ktarget)
        print "    total mass N_k = %.1f" % (curSS.getCountVec()[ktarget])
        print "    %d docs with non-trivial mass" % (nontrivialdocIDs.size)
        print ""
        print "Absorbing into %d/%d remaining clusters" % (
            len(kabsorbList), curSS.K-1)
        print " ".join(['%3d' % (kk) for kk in kabsorbList])
        print ""

    # Create init observation model for absorbing states
    xObsModel = propModel.obsModel.copy()
    xinitSS = curSS.copy(includeELBOTerms=False, includeMergeTerms=False)
    for k in reversed(np.arange(xObsModel.K)):
        if k not in kabsorbList:
            xinitSS.removeComp(k)
    # Find clusters correlated in appearance with the target
    if curModel.getObsModelName().count('Mult') and d_initWordCounts.count('bycorr'):
        corrVec = calcCorrelationFromTargetToAbsorbingSet(
            curLP['DocTopicCount'], ktarget, kabsorbList)
        bestAbsorbIDs = np.flatnonzero(corrVec >= .001)
        print "absorbIDs with best correlation:"
        print bestAbsorbIDs
        for k in bestAbsorbIDs:
            xinitSS.WordCounts[k,:] += curSS.WordCounts[ktarget,:]
    xObsModel.update_global_params(xinitSS)

    # Create init pi vector for absorbing states
    curPiVec = propModel.allocModel.get_active_comp_probs()
    xPiVec = curPiVec[kabsorbList].copy()
    xPiVec /= xPiVec.sum()
    xPiVec *= (curPiVec[kabsorbList].sum() +  curPiVec[ktarget])
    assert np.allclose(np.sum(xPiVec),
        curPiVec[ktarget] + np.sum(curPiVec[kabsorbList]))

    if verbose:
        print "Reassigning target mass among absorbing set..."
    starttime = time.time()
    propLscoreList = list()
    for ELBOstep in range(nELBOSteps):
        xSS, Info = summarizeRestrictedLocalStep_HDPTopicModel(
            Dslice=Data,
            curModel=curModel,
            curLPslice=curLP,
            ktarget=ktarget,
            kabsorbList=kabsorbList,
            curPiVec=curPiVec,
            xPiVec=xPiVec,
            xObsModel=xObsModel,
            nUpdateSteps=nUpdateSteps,
            d_initTargetDocTopicCount=d_initTargetDocTopicCount,
            LPkwargs=LPkwargs)

        if ELBOstep < nELBOSteps - 1:
            # Update the xObsModel
            xObsModel.update_global_params(xSS)
            # TODO: update xPiVec???

        print " completed step %d/%d after %5.1f sec" % (
            ELBOstep+1, nELBOSteps, time.time() - starttime)

        propSS = curSS.copy()
        propSS.replaceCompsWithContraction(
            replaceSS=xSS,
            replaceUIDs=[curSS.uids[k] for k in kabsorbList],
            removeUIDs=[curSS.uids[ktarget]],
            )
        assert np.allclose(propSS.getCountVec().sum(),
            curSS.getCountVec().sum(),
            atol=0.01,
            rtol=0)
        propModel.update_global_params(propSS)
        propLdict = propModel.calc_evidence(SS=propSS, todict=1)
        propLscore = propModel.calc_evidence(SS=propSS)
        propLscoreList.append(propLscore)

    if verbose:
        print ""
        print "Proposal result:"
        if propLscore - curLscore > 0:
            print "  ACCEPTED"
        else:
            print "  REJECTED"
        print "%.4e  cur ELBO score" % (curLscore)
        print "%.4e prop ELBO score" % (propLscore)
        print "% .4e change in ELBO score" % (propLscore - curLscore)
        print ""
        for key in sorted(curLdict.keys()):
            if key.count('_') or key.count('total'):
                continue
            print "  gain %8s % .3e" % (
                key, propLdict[key] - curLdict[key])
        print ""
        if docIDs.size > 0:
            np.set_printoptions(suppress=1, precision=2, linewidth=120)
            xLPslice = Info['xLPslice']

            print "BEFORE"
            print "-----"
            print np.hstack([
                curLP['DocTopicCount'][docIDs,:][:,kabsorbList],
                curLP['DocTopicCount'][docIDs,:][:,ktarget][:,np.newaxis]
                ])
            print "AFTER"
            print "-----"
            print xLPslice['DocTopicCount'][docIDs,:]

    if doPlotELBO:
        import bnpy.viz
        from bnpy.viz.PlotUtil import pylab
        bnpy.viz.PlotUtil.ConfigPylabDefaults(pylab)
        iters = np.arange(len(propLscoreList))
        pylab.plot(iters, propLscoreList, 'b-')
        pylab.plot(iters, curLscore*np.ones_like(iters), 'k--')
        pylab.show()

    if doPlotDocTopicCount:
        import bnpy.viz
        from bnpy.viz.PlotUtil import pylab
        bnpy.viz.PlotUtil.ConfigPylabDefaults(pylab)

        kplotList = [x for x in kabsorbList]
        kplotList.append(ktarget)
        for d in docIDs:
            curDTClabels = ['%.1f' % (x) for x in 
                curLP['DocTopicCount'][d, kplotList]]
            bnpy.viz.PlotComps.plotCompsFromHModel(
                curModel,
                compListToPlot=kplotList,
                compsToHighlight=[ktarget],
                xlabels=curDTClabels,
                vmin=0,
                vmax=.01)
            fig = pylab.gcf()
            fig.canvas.set_window_title('doc %d BEFORE' % (d))

            propLP = Info['xLPslice']
            propDTClabels = ['%.1f' % (x) for x in 
                propLP['DocTopicCount'][d, :]]
            bnpy.viz.PlotComps.plotCompsFromHModel(
                propModel,
                xlabels=propDTClabels,
                vmin=0,
                vmax=.01)
            fig = pylab.gcf()
            fig.canvas.set_window_title('doc %d AFTER' % (d))
            pylab.show(block=False)

        # Plot docs        
        dIm = np.zeros((docIDs.size*2, 900))
        dImLabels = list()
        tImLabels = list()
        row = 0
        for ii,d in enumerate(docIDs):
            start = Data.doc_range[d]
            stop = Data.doc_range[d+1]
            wid = Data.word_id[start:stop]
            wct = Data.word_count[start:stop]
            dIm[row, wid] = wct
            dImLabels.append('doc %d' % (d))

            tmask = np.flatnonzero(curLP['resp'][start:stop, ktarget] > .01)
            targetDoc = np.zeros(900)
            dIm[row+docIDs.size, wid[tmask]] = wct[tmask] \
                * curLP['resp'][start + 1*tmask, ktarget]
            tImLabels.append('trgt doc %d' % (d))
            row += 1

        bnpy.viz.BarsViz.showTopicsAsSquareImages(
            dIm,
            ncols=2,
            vmin=0,
            vmax=1,
            xlabels=dImLabels.extend(tImLabels),
            cmap='jet')
        pylab.show()

    if doPlotComps:
        import bnpy.viz
        from bnpy.viz.PlotUtil import pylab
        bnpy.viz.PlotUtil.ConfigPylabDefaults(pylab)

        bnpy.viz.PlotComps.plotCompsFromSS(
                curModel,
                curSS,
                compsToHighlight=[ktarget],
                vmin=0,
                vmax=.01)
        fig = pylab.gcf()
        fig.canvas.set_window_title('BEFORE')

        bnpy.viz.PlotComps.plotCompsFromSS(
                propModel,
                propSS,
                vmin=0,
                vmax=.01)
        fig = pylab.gcf()
        fig.canvas.set_window_title('AFTER')
        pylab.show()
    return (
        propModel,
        propSS,
        propLscoreList,
        curModel,
        curSS,
        curLscore)
Exemplo n.º 10
0
def plot_all_tasks_for_job(jobpath, label, taskids=None,
                           lineType='.-',
                           spreadLineType='--',
                           color=None,
                           yvar='avgLikScore',
                           xvar='laps',
                           markersize=10,
                           linewidth=2,
                           minLap=0,
                           showFinalPt=0,
                           fileSuffix='PredLik.mat',
                           xjitter=None,
                           prefix='predlik',
                           colorID=0,
                           **kwargs):
    ''' Create line plot in current figure for each task/run of jobpath
    '''
    if not os.path.exists(jobpath):
        print('PATH NOT FOUND', jobpath)
        return None
    if not yvar.startswith('avg') and yvar.count('Kactive') == 0:
        yvar = 'avg' + yvar
    if not yvar.endswith('Score') and yvar.count('Kactive') == 0:
        yvar = yvar + 'Score'

    if color is None:
        color = Colors[colorID % len(Colors)]
    taskids = BNPYArgParser.parse_task_ids(jobpath, taskids)

    for tt, taskid in enumerate(taskids):
        taskoutpath = os.path.join(jobpath, taskid)
        hpaths = glob.glob(os.path.join(taskoutpath, '*' + fileSuffix))
        txtpaths = glob.glob(os.path.join(taskoutpath, 'predlik-*.txt'))
        ys_hi = None
        ys_lo = None
        if len(txtpaths) > 0:
            if fileSuffix.endswith('.txt'):
                suffix = '-' + fileSuffix
            else:
                suffix = '.txt'
            if xvar.count('lap'):
                xs = np.loadtxt(
                    os.path.join(taskoutpath, prefix + '-lapTrain.txt'))
            elif xvar.count('K'):
                xs = np.loadtxt(os.path.join(taskoutpath, prefix + '-K.txt'))
            elif xvar.count('time'):
                xs = np.loadtxt(os.path.join(
                    taskoutpath, prefix + '-timeTrain.txt'))
            else:
                raise ValueError("Unrecognized xvar: " + xvar)
            if yvar.count('Kactive') and not yvar.count('Percentile'):
                ys = np.loadtxt(os.path.join(taskoutpath,
                        prefix + '-' + yvar + 'Percentile50.txt'))
                ys_lo = np.loadtxt(os.path.join(taskoutpath,
                    prefix + '-' + yvar + 'Percentile10.txt'))
                ys_hi = np.loadtxt(os.path.join(taskoutpath,
                    prefix + '-' + yvar + 'Percentile90.txt'))
            else:
                ys = np.loadtxt(
                    os.path.join(taskoutpath, prefix + '-' + yvar + suffix))

            if minLap > 0 and taskoutpath.count('fix'):
                mask = laps > minLap
                xs = xs[mask]
                ys = ys[mask]
        elif len(hpaths) > 0:
            hpaths.sort()
            basenames = [x.split(os.path.sep)[-1] for x in hpaths]
            xs = np.asarray([float(x[3:11]) for x in basenames])
            ys = np.zeros_like(xs)
            for ii, hpath in enumerate(hpaths):
                MatVars = scipy.io.loadmat(hpath)
                ys[ii] = float(MatVars['avgPredLL'])
        else:
            raise ValueError(
                'Pred Lik data unavailable for job\n' + taskoutpath)

        plotargs = dict(markersize=markersize, linewidth=linewidth, label=None,
                        color=color, markeredgecolor=color,
                        )
        plotargs.update(kwargs)

        if tt == 0:
            plotargs['label'] = label
        if xjitter is not None:
            xs = xs + xjitter
        pylab.plot(xs, ys, lineType, **plotargs)
        if ys_lo is not None:
            del plotargs['label']
            pylab.plot(xs, ys_lo, spreadLineType, **plotargs)
            pylab.plot(xs, ys_hi, spreadLineType, **plotargs)

        if showFinalPt:
            pylab.plot(xs[-1], ys[-1], '.', **plotargs)
    pylab.xlabel(XLabelMap[xvar])
    pylab.ylabel(YLabelMap[yvar])
Exemplo n.º 11
0
def plot_all_tasks_for_job(jobpath,
                           label,
                           taskids=None,
                           color=None,
                           colorID=0,
                           density=2,
                           yvar='evidence',
                           markersize=10,
                           linewidth=2,
                           linestyle='-',
                           drawLineToXMax=None,
                           showOnlyAfterLap=0,
                           xvar='laps',
                           **kwargs):
    ''' Create line plot in current figure for each task/run of jobpath
    '''
    if not os.path.exists(jobpath):
        if not jobpath.startswith(os.path.sep):
            jobpath_tmp = os.path.join(os.environ['BNPYOUTDIR'], jobpath)
            if not os.path.exists(jobpath_tmp):
                raise ValueError("PATH NOT FOUND: %s" % (jobpath))
            jobpath = jobpath_tmp
    if color is None:
        color = Colors[colorID % len(Colors)]
    taskids = BNPYArgParser.parse_task_ids(jobpath, taskids)

    if yvar == 'hamming-distance':
        yspfile = os.path.join(jobpath, taskids[0], yvar + '-saved-params.txt')
        if xvar == 'laps' and os.path.isfile(yspfile):
            xvar = 'laps-saved-params'

    for tt, taskid in enumerate(taskids):
        xs = None
        ys = None
        laps = None

        try:
            var_ext = ''
            ytxtfile = os.path.join(jobpath, taskid, yvar + '.txt')
            if not os.path.isfile(ytxtfile):
                var_ext = '-saved-params'
                ytxtfile = os.path.join(jobpath, taskid,
                                        yvar + var_ext + '.txt')
            ys = np.loadtxt(ytxtfile)

            if ytxtfile.count('saved-params'):
                laptxtfile = os.path.join(jobpath, taskid,
                                          'laps-saved-params.txt')
            else:
                laptxtfile = os.path.join(jobpath, taskid, 'laps.txt')
        except IOError as e:
            # TODO: when is this code needed?
            # xs, ys = loadXYFromTopicModelFiles(jobpath, taskid)
            try:
                if isinstance(xs, np.ndarray) and yvar.count('Keff'):
                    ys = loadKeffForTask(os.path.join(jobpath, taskid),
                                         **kwargs)
                    assert xs.size == ys.size
                else:
                    # Heldout metrics
                    xs, ys = loadXYFromTopicModelSummaryFiles(jobpath,
                                                              taskid,
                                                              xvar=xvar,
                                                              yvar=yvar)
                    if showOnlyAfterLap and showOnlyAfterLap > 0:
                        laps, _ = loadXYFromTopicModelSummaryFiles(jobpath,
                                                                   taskid,
                                                                   xvar='laps',
                                                                   yvar=yvar)
            except ValueError:
                try:
                    xs, ys = loadXYFromTopicModelSummaryFiles(jobpath, taskid)
                except ValueError:
                    raise e
        if yvar == 'hamming-distance' or yvar == 'Keff':
            if xvar == 'laps-saved-params':
                # fix off-by-one error, if we save an extra dist on final lap
                if xs.size == ys.size - 1:
                    ys = ys[:-1]
                elif ys.size == xs.size - 1:
                    xs = xs[:-1]  # fix off-by-one error, if we quit early
            elif xs.size != ys.size:
                # Try to subsample both time series at laps where they
                # intersect
                laps_x = np.loadtxt(os.path.join(jobpath, taskid, 'laps.txt'))
                laps_y = np.loadtxt(
                    os.path.join(jobpath, taskid, 'laps-saved-params.txt'))
                assert xs.size == laps_x.size
                if ys.size == laps_y.size - 1:
                    laps_y = laps_y[:-1]
                xs = xs[np.in1d(laps_x, laps_y)]
                ys = ys[np.in1d(laps_y, laps_x)]

        if xs.size != ys.size:
            raise ValueError('Dimension mismatch. len(xs)=%d, len(ys)=%d' %
                             (xs.size, ys.size))

        # Cleanup laps data. Verify that it is sorted, with no collisions.
        if xvar == 'laps':
            diff = xs[1:] - xs[:-1]
            goodIDs = np.flatnonzero(diff >= 0)
            if len(goodIDs) < xs.size - 1:
                print(
                    'WARNING: looks like multiple runs writing to this file!')
                print(jobpath)
                print('Task: ', taskid)
                print(len(goodIDs), xs.size - 1)
                xs = np.hstack([xs[goodIDs], xs[-1]])
                ys = np.hstack([ys[goodIDs], ys[-1]])

        if xvar == 'laps' and yvar == 'evidence':
            mask = xs >= 1.0
            xs = xs[mask]
            ys = ys[mask]
        elif showOnlyAfterLap:
            # print "Filtering for data recorded at lap >= %s" % (
            #    showOnlyAfterLap)
            if laps is None:
                laps = np.loadtxt(laptxtfile)
            mask = laps >= showOnlyAfterLap
            xs = xs[mask]
            ys = ys[mask]

        # Force plot density (data points per lap) to desired specification
        # This avoids making plots that have huge file sizes,
        # due to too much content in the given display space
        if xvar == 'laps' and xs.size > 20 and np.sum(xs > 5) > 10:
            if (xs[-1] - xs[9]) != 0:
                curDensity = (xs.size - 10) / (xs[-1] - xs[9])
            else:
                curDensity = density
            while curDensity > density and xs.size > 11:
                # Thin xs and ys data by a factor of 2
                # while preserving the first 10 data points
                xs = np.hstack([xs[:10], xs[10::2]])
                ys = np.hstack([ys[:10], ys[10::2]])
                curDensity = (xs.size - 10) / (xs[-1] - xs[9])

        plotargs = dict(markersize=markersize,
                        linewidth=linewidth,
                        linestyle=linestyle,
                        label=None,
                        color=color,
                        markeredgecolor=color)
        for key in kwargs:
            if key in plotargs:
                plotargs[key] = kwargs[key]
        if tt == 0:
            plotargs['label'] = label

        pylab.plot(xs, ys, **plotargs)
        if drawLineToXMax:
            xs_dashed = np.asarray([xs[-1], drawLineToXMax])
            ys_dashed = np.asarray([ys[-1], ys[-1]])
            plotargs['label'] = None
            pylab.plot(xs_dashed, ys_dashed, '--', **plotargs)

    pylab.xlabel(LabelMap[xvar])
    if yvar in LabelMap:
        yLabelStr = LabelMap[yvar]
        if yvar == 'Keff' and 'effCountThr' in kwargs:
            effCountThr = float(kwargs['effCountThr'])
            yLabelStr = yLabelStr + ' > %s' % (str(effCountThr))
        pylab.ylabel(yLabelStr)