示例#1
0
def plotDataAsSquareImages(Data,
                           unitIDsToPlot=None,
                           figID=None,
                           nPlots=16,
                           doShowNow=False,
                           seed=0,
                           randstate=np.random.RandomState(0),
                           **kwargs):
    if seed is not None:
        randstate = np.random.RandomState(seed)
    if figID is None:
        pylab.figure()

    V = Data.dim
    assert isPerfectSquare(V)
    sqrtV = int(np.sqrt(V))
    if unitIDsToPlot is not None:
        nPlots = len(unitIDsToPlot)
    else:
        size = np.minimum(Data.nObs, nPlots)
        unitIDsToPlot = randstate.choice(Data.nObs, size=size, replace=False)
    nRows = np.floor(np.sqrt(nPlots))
    nCols = np.ceil(nPlots / nRows)

    for plotPos, unitID in enumerate(unitIDsToPlot):
        squareIm = np.reshape(Data.X[unitID], (sqrtV, sqrtV))
        pylab.subplot(nRows, nCols, plotPos + 1)
        pylab.imshow(squareIm, **imshowArgs)
        pylab.axis('image')
        pylab.xticks([])
        pylab.yticks([])
    pylab.tight_layout()
    if doShowNow:
        pylab.show()
示例#2
0
def plotBarsFromHModel(
        hmodel,
        Data=None,
        doShowNow=False,
        figH=None,
        doSquare=1,
        xlabels=[],
        compsToHighlight=None,
        compListToPlot=None,
        activeCompIDs=None,
        Kmax=50,
        width=6,
        height=3,
        vmax=None,
        block=0,  # unused
        jobname='',  # unused
        **kwargs):
    if vmax is not None:
        kwargs['vmax'] = vmax
    if hasattr(hmodel.obsModel, 'Post'):
        lam = hmodel.obsModel.Post.lam
        topics = lam / lam.sum(axis=1)[:, np.newaxis]
    else:
        topics = hmodel.obsModel.EstParams.phi.copy()

    # Determine intensity scale for topic-word image
    global imshowArgs
    if vmax is not None:
        imshowArgs['vmax'] = vmax
    else:
        imshowArgs['vmax'] = 1.5 * np.percentile(topics, 95)

    if doSquare:
        figH = showTopicsAsSquareImages(topics,
                                        activeCompIDs=activeCompIDs,
                                        compsToHighlight=compsToHighlight,
                                        compListToPlot=compListToPlot,
                                        Kmax=Kmax,
                                        figH=figH,
                                        xlabels=xlabels,
                                        **kwargs)
    else:
        if figH is None:
            figH = pylab.figure(figsize=(width, height))
        else:
            pylab.axes(figH)
        showAllTopicsInSingleImage(topics, compsToHighlight, **kwargs)
    if doShowNow:
        pylab.show()
    return figH
示例#3
0
def plotCompsFromHModel(
        hmodel,
        doShowNow=False,
        block=0,  # unused
        jobname='',  # unused
        vocabList=None,  # catchall
        **kwargs):
    if hasattr(hmodel.obsModel, 'Post'):
        hmodel.obsModel.setEstParamsFromPost()
    phi = hmodel.obsModel.EstParams.phi.copy()

    dim = phi.shape[1]
    if dim > 9 and isPerfectSquare(dim):
        figH = plotCompsAsSquareImages(phi, **kwargs)
    else:
        figH = plotCompsAsRowsInSingleImage(phi, **kwargs)
    if doShowNow:
        pylab.show()
    return figH
示例#4
0
def plotCompsForJob(jobpath='', taskids=[1], lap=None, **kwargs):
    ''' Show plot of learned clusters from run(s) saved results on disk
    '''

    # Verify given absolute path is valid.
    jobpath_originalarg = jobpath
    if not os.path.isdir(jobpath):
        # Fallback: try to prepend BNPYOUTDIR to handle "shortcut" names
        jobpath = os.path.join(os.environ['BNPYOUTDIR'], jobpath)
    if not os.path.isdir(jobpath):
        raise ValueError('Not valid path: ' + jobpath_originalarg)
    taskids = BNPYArgParser.parse_task_ids(jobpath, taskids)
    for tt, taskid in enumerate(taskids):
        if tt == 0 and isinstance(taskid, str):
            if taskid.startswith('.'):
                rankTasksForSingleJobOnDisk(jobpath)
        taskpath = os.path.join(jobpath, str(taskid))
        plotCompsForTask(taskpath, lap=lap, **kwargs)
    if 'block' in kwargs:
        pylab.show(block=kwargs['block'])
示例#5
0
def plotExampleBarsDocs(Data,
                        docIDsToPlot=None,
                        figID=None,
                        vmax=None,
                        nDocToPlot=16,
                        doShowNow=False,
                        seed=0,
                        randstate=np.random.RandomState(0),
                        xlabels=None,
                        W=1,
                        H=1,
                        **kwargs):
    kwargs['vmin'] = 0
    kwargs['interpolation'] = 'nearest'
    if vmax is not None:
        kwargs['vmax'] = vmax
    if seed is not None:
        randstate = np.random.RandomState(seed)
    V = Data.vocab_size
    sqrtV = int(np.sqrt(V))
    assert np.allclose(sqrtV * sqrtV, V)
    if docIDsToPlot is not None:
        nDocToPlot = len(docIDsToPlot)
    else:
        size = np.minimum(Data.nDoc, nDocToPlot)
        docIDsToPlot = randstate.choice(Data.nDoc, size=size, replace=False)
    ncols = 5
    nrows = int(np.ceil(nDocToPlot / float(ncols)))
    if vmax is None:
        DocWordArr = Data.getDocTypeCountMatrix()
        vmax = int(np.max(np.percentile(DocWordArr, 98, axis=0)))

    if figID is None:
        figH, ha = pylab.subplots(nrows=nrows,
                                  ncols=ncols,
                                  figsize=(ncols * W, nrows * H))

    for plotPos, docID in enumerate(docIDsToPlot):
        start = Data.doc_range[docID]
        stop = Data.doc_range[docID + 1]
        wIDs = Data.word_id[start:stop]
        wCts = Data.word_count[start:stop]
        docWordHist = np.zeros(V)
        docWordHist[wIDs] = wCts
        squareIm = np.reshape(docWordHist, (sqrtV, sqrtV))
        pylab.subplot(nrows, ncols, plotPos + 1)
        pylab.imshow(squareIm, **kwargs)
        pylab.axis('image')
        pylab.xticks([])
        pylab.yticks([])
        if xlabels is not None:
            pylab.xlabel(xlabels[plotPos])

    # Disable empty plots!
    for kdel in xrange(plotPos + 2, nrows * ncols + 1):
        aH = pylab.subplot(nrows, ncols, kdel)
        aH.axis('off')

    # Fix margins between subplots
    pylab.subplots_adjust(wspace=0.04,
                          hspace=0.04,
                          left=0.01,
                          right=0.99,
                          top=0.99,
                          bottom=0.01)
    if doShowNow:
        pylab.show()
示例#6
0
        print('Wrote: %s' % (outfilepath))


def parseArgs(**kwargs):
    ''' Read args from stdin into defined dict fields
    '''
    parser = argparse.ArgumentParser()
    parser.add_argument('task_output_path')
    parser.add_argument('--lap', default=None, type=float)
    parser.add_argument('--taskids',
                        type=str,
                        default=None,
                        help=taskidsHelpMsg)
    parser.add_argument('--vocabfile', type=str, default=None)
    args = parser.parse_args()
    arg_dict = vars(args)
    if args.vocabfile is not None:
        with open(args.vocabfile, 'r') as f:
            arg_dict['vocabList'] = map(str.strip, f.readlines())
    return arg_dict


if __name__ == "__main__":
    arg_dict = parseArgs()
    #plotCompsForJob(block=1, **argDict)
    if 'taskids' in arg_dict and arg_dict['taskids'] is not None:
        pass
    else:
        plotCompsForTask(**arg_dict)
    pylab.show()
示例#7
0
def plotManyPanelsByPVar(jpathPattern='/tmp/',
                         pvar=None,
                         pvals=None,
                         W=5,
                         H=4,
                         savefilename=None,
                         doShowNow=False,
                         **kwargs):
    ''' Create line plots for jobs matching pattern and provided kwargs
    '''
    if pvar is None:
        jpathList = [jpathPattern]
        pvar = None
        pvals = [None]
    else:
        prefixfilepath = os.path.sep.join(jpathPattern.split(os.path.sep)[:-1])
        PPListMap = makePPListMapFromJPattern(jpathPattern)
        if pvals is None:
            pvals = PPListMap[pvar]
        else:
            pvals = [p for p in pvals if p in PPListMap[pvar]]
        jpathList = makeListOfJPatternsWithSpecificVals(
            PPListMap,
            prefixfilepath=prefixfilepath,
            key=pvar,
            vals=pvals,
            **kwargs)

    nrows = 1
    ncols = len(pvals)
    pylab.subplots(nrows=nrows, ncols=ncols, figsize=(ncols * W, nrows * H))

    axH = None
    for panelID, panel_jobPattern in enumerate(jpathList):
        axH = pylab.subplot(nrows, ncols, panelID + 1, sharey=axH, sharex=axH)
        # Only show legend on first plot
        if panelID > 0 and 'loc' in kwargs:
            kwargs['loc'] = None
        kwargs['doShowNow'] = False
        plotMultipleLinesByLVar(panel_jobPattern, **kwargs)
        if pvar is not None:
            pylab.title('%s=%s' % (pvar, pvals[panelID]))

    pylab.subplots_adjust(bottom=0.15, wspace=0.5)

    if savefilename is not None:
        try:
            pylab.show(block=False)
        except TypeError:
            pass  # when using IPython notebook
        pylab.savefig(savefilename, bbox_inches='tight', pad_inches=0)
    elif doShowNow:
        try:
            pylab.show(block=True)
        except TypeError:
            pass  # when using IPython notebook
    Info = dict(
        nrows=nrows,
        ncols=ncols,
    )
    return Info
示例#8
0
def plotMultipleLinesByLVar(jpathPattern,
                            lvar=None,
                            lvals=None,
                            ColorMap=DefaultColorList,
                            loc=None,
                            bbox_to_anchor=None,
                            savefilename=None,
                            tickfontsize=None,
                            doShowNow=False,
                            **kwargs):
    ''' Create line plots for provided jobs.
    '''
    prefixfilepath = os.path.sep.join(jpathPattern.split(os.path.sep)[:-1])
    PPListMap = makePPListMapFromJPattern(jpathPattern)
    if lvals is None:
        lvals = PPListMap[lvar]
    elif not isinstance(lvals, list):
        lvals = [lvals]
    # Make sure all lval values are street legal (aka exist on disk)
    lvals = [ll for ll in lvals if ll == '.best' or ll in PPListMap[lvar]]

    # Do ranking in advance for each relevant job
    '''if lvals[0] == '.best':
        xvar = kwargs['xvar']
        if 'xvals' in kwargs:
            xvals = kwargs['xvals']
        else:
            xvals = PPListMap[xvar]
        for xval in xvals:
            keyValDict = dict()
            keyValDict[xvar] = xval
            jpatternForXVal = makeJPatternWithSpecificVals(
                PPListMap,
                prefixfilepath=prefixfilepath, **keyValDict)
            TaskRanker.markBestAmongJobPatternOnDisk(jpatternForXVal)
    '''
    # Create list of jobs with corresponding pattern
    jpathList = makeListOfJPatternsWithSpecificVals(
        PPListMap,
        prefixfilepath=prefixfilepath,
        key=lvar,
        vals=lvals,
        **kwargs)
    for lineID, line_jobPattern in enumerate(jpathList):
        line_label = '%s=%s' % (lvar, lvals[lineID])
        if isinstance(ColorMap, dict):
            for label in [line_label, line_jobPattern]:
                try:
                    line_color = ColorMap[label]
                except KeyError:
                    line_color = DefaultColorList[lineID]
        else:
            # Access next elt in ColorMap list
            line_color = ColorMap[lineID]
        plotSingleLineAcrossJobsByXVar(line_jobPattern,
                                       label=line_label,
                                       color=line_color,
                                       lineID=lineID,
                                       lvar=lvar,
                                       **kwargs)

    if loc is not None and len(jpathList) > 1:
        pylab.legend(loc=loc, bbox_to_anchor=bbox_to_anchor)
    if tickfontsize is not None:
        pylab.tick_params(axis='both', which='major', labelsize=tickfontsize)

    if savefilename is not None:
        try:
            pylab.show(block=False)
        except TypeError:
            pass  # when using IPython notebook
        pylab.savefig(savefilename, bbox_inches='tight', pad_inches=0)
    elif doShowNow:
        try:
            pylab.show(block=True)
        except TypeError:
            pass  # when using IPython notebook
示例#9
0
def plotJobs(jpaths, legNames, styles=None, density=2,
             xvar='laps', yvar='evidence', loc='upper right',
             xmin=None, xmax=None,
             taskids=None, savefilename=None, tickfontsize=None,
             bbox_to_anchor=None, **kwargs):
    ''' Create line plots for provided jobs.
    '''
    nLines = len(jpaths)
    if nLines == 0:
        raise ValueError('Empty job list. Nothing to plot.')

    nLeg = len(legNames)

    for lineID in xrange(nLines):
        if styles is None:
            curStyle = dict(colorID=lineID)
        else:
            curStyle = styles[lineID]

        task_kwargs = dict(**kwargs)
        task_kwargs.update(curStyle)
        plot_all_tasks_for_job(jpaths[lineID], legNames[lineID],
                               xvar=xvar, yvar=yvar,
                               taskids=taskids, density=density, **task_kwargs)

    # Y-axis limit determination
    # If we have "enough" data about the run beyond two full passes of dataset,
    # we zoom in on the region of data beyond lap 2
    if xvar == 'laps' and yvar == 'evidence':
        xmax = 0
        ymin = np.inf
        ymin2 = np.inf
        ymax = -np.inf
        allRunsHaveXBeyond1 = True
        for line in pylab.gca().get_lines():
            xd = line.get_xdata()
            yd = line.get_ydata()
            if xd.size < 3:
                allRunsHaveXBeyond1 = False
                continue
            posLap1 = np.searchsorted(xd, 1.0)
            posLap2 = np.searchsorted(xd, 2.0)
            if posLap1 < xd.size:
                ymin = np.minimum(ymin, yd[posLap1])
                ymax = np.maximum(ymax, yd[posLap1:].max())
            if posLap2 < xd.size:
                ymin2 = np.minimum(ymin2, yd[posLap2])
            xmax = np.maximum(xmax, xd.max())
            if xd.max() <= 1:
                allRunsHaveXBeyond1 = False
        if allRunsHaveXBeyond1 and xmax > 1.5:
            # If all relevant curves extend beyond x=1, only show that part
            xmin = 1.0 - 1e-5
        else:
            xmin = 0
        if allRunsHaveXBeyond1 and ymin2 < ymax:
            range1 = ymax - ymin
            range2 = ymax - ymin2
            if 10 * range2 < range1:
                # Y values jump from lap1 to lap2 is enormous,
                # so let's just show y values from lap2 onward...
                ymin = ymin2
        if (not np.allclose(ymax, ymin)) and allRunsHaveXBeyond1:
            pylab.ylim([ymin, ymax + 0.1 * (ymax - ymin)])
        pylab.xlim([xmin, xmax + .05 * (xmax - xmin)])
    
    if loc is not None and len(jpaths) > 1:
        pylab.legend(loc=loc, bbox_to_anchor=bbox_to_anchor)
    if tickfontsize is not None:
        pylab.tick_params(axis='both', which='major', labelsize=tickfontsize)

    if savefilename is not None:
        try:
            pylab.show(block=False)
        except TypeError:
            pass  # when using IPython notebook
        pylab.savefig(savefilename, bbox_inches='tight', pad_inches=0)
    else:
        try:
            pylab.show(block=True)
        except TypeError:
            pass  # when using IPython notebook
示例#10
0
def plotJobs(jpaths,
             legNames,
             styles=None,
             fileSuffix='PredLik.mat',
             xvar='laps',
             yvar='avgLikScore',
             loc='upper right',
             minLap=0,
             showFinalPt=0,
             prefix='predlik',
             taskids=None,
             savefilename=None,
             tickfontsize=None,
             xjitter=None,
             bbox_to_anchor=None,
             **kwargs):
    ''' Create line plots for provided jobs
    '''
    nLines = len(jpaths)
    nLeg = len(legNames)
    assert nLines <= nLeg

    jitterByJob = np.linspace(-.5, .5, len(jpaths))

    for lineID in xrange(nLines):
        if styles is None:
            curStyle = dict(colorID=lineID)
        else:
            curStyle = styles[lineID]

        if xjitter is not None:
            xjitter = jitterByJob[lineID]
        plot_all_tasks_for_job(jpaths[lineID],
                               legNames[lineID],
                               minLap=minLap,
                               xvar=xvar,
                               yvar=yvar,
                               fileSuffix=fileSuffix,
                               showFinalPt=showFinalPt,
                               prefix=prefix,
                               taskids=taskids,
                               xjitter=xjitter,
                               **curStyle)

    if loc is not None and len(jpaths) > 1:
        pylab.legend(loc=loc, bbox_to_anchor=bbox_to_anchor)

    if tickfontsize is not None:
        pylab.tick_params(axis='both', which='major', labelsize=tickfontsize)

    if savefilename is not None:
        try:
            pylab.show(block=False)
        except TypeError:
            pass  # when using IPython notebook
        pylab.savefig(savefilename, bbox_inches='tight', pad_inches=0)
    else:
        try:
            pylab.show(block=True)
        except TypeError:
            pass  # when using IPython notebook