def plotDataAsSquareImages(Data, unitIDsToPlot=None, figID=None, nPlots=16, doShowNow=False, seed=0, randstate=np.random.RandomState(0), **kwargs): if seed is not None: randstate = np.random.RandomState(seed) if figID is None: pylab.figure() V = Data.dim assert isPerfectSquare(V) sqrtV = int(np.sqrt(V)) if unitIDsToPlot is not None: nPlots = len(unitIDsToPlot) else: size = np.minimum(Data.nObs, nPlots) unitIDsToPlot = randstate.choice(Data.nObs, size=size, replace=False) nRows = np.floor(np.sqrt(nPlots)) nCols = np.ceil(nPlots / nRows) for plotPos, unitID in enumerate(unitIDsToPlot): squareIm = np.reshape(Data.X[unitID], (sqrtV, sqrtV)) pylab.subplot(nRows, nCols, plotPos + 1) pylab.imshow(squareIm, **imshowArgs) pylab.axis('image') pylab.xticks([]) pylab.yticks([]) pylab.tight_layout() if doShowNow: pylab.show()
def plotBarsFromHModel( hmodel, Data=None, doShowNow=False, figH=None, doSquare=1, xlabels=[], compsToHighlight=None, compListToPlot=None, activeCompIDs=None, Kmax=50, width=6, height=3, vmax=None, block=0, # unused jobname='', # unused **kwargs): if vmax is not None: kwargs['vmax'] = vmax if hasattr(hmodel.obsModel, 'Post'): lam = hmodel.obsModel.Post.lam topics = lam / lam.sum(axis=1)[:, np.newaxis] else: topics = hmodel.obsModel.EstParams.phi.copy() # Determine intensity scale for topic-word image global imshowArgs if vmax is not None: imshowArgs['vmax'] = vmax else: imshowArgs['vmax'] = 1.5 * np.percentile(topics, 95) if doSquare: figH = showTopicsAsSquareImages(topics, activeCompIDs=activeCompIDs, compsToHighlight=compsToHighlight, compListToPlot=compListToPlot, Kmax=Kmax, figH=figH, xlabels=xlabels, **kwargs) else: if figH is None: figH = pylab.figure(figsize=(width, height)) else: pylab.axes(figH) showAllTopicsInSingleImage(topics, compsToHighlight, **kwargs) if doShowNow: pylab.show() return figH
def plotCompsFromHModel( hmodel, doShowNow=False, block=0, # unused jobname='', # unused vocabList=None, # catchall **kwargs): if hasattr(hmodel.obsModel, 'Post'): hmodel.obsModel.setEstParamsFromPost() phi = hmodel.obsModel.EstParams.phi.copy() dim = phi.shape[1] if dim > 9 and isPerfectSquare(dim): figH = plotCompsAsSquareImages(phi, **kwargs) else: figH = plotCompsAsRowsInSingleImage(phi, **kwargs) if doShowNow: pylab.show() return figH
def plotCompsForJob(jobpath='', taskids=[1], lap=None, **kwargs): ''' Show plot of learned clusters from run(s) saved results on disk ''' # Verify given absolute path is valid. jobpath_originalarg = jobpath if not os.path.isdir(jobpath): # Fallback: try to prepend BNPYOUTDIR to handle "shortcut" names jobpath = os.path.join(os.environ['BNPYOUTDIR'], jobpath) if not os.path.isdir(jobpath): raise ValueError('Not valid path: ' + jobpath_originalarg) taskids = BNPYArgParser.parse_task_ids(jobpath, taskids) for tt, taskid in enumerate(taskids): if tt == 0 and isinstance(taskid, str): if taskid.startswith('.'): rankTasksForSingleJobOnDisk(jobpath) taskpath = os.path.join(jobpath, str(taskid)) plotCompsForTask(taskpath, lap=lap, **kwargs) if 'block' in kwargs: pylab.show(block=kwargs['block'])
def plotExampleBarsDocs(Data, docIDsToPlot=None, figID=None, vmax=None, nDocToPlot=16, doShowNow=False, seed=0, randstate=np.random.RandomState(0), xlabels=None, W=1, H=1, **kwargs): kwargs['vmin'] = 0 kwargs['interpolation'] = 'nearest' if vmax is not None: kwargs['vmax'] = vmax if seed is not None: randstate = np.random.RandomState(seed) V = Data.vocab_size sqrtV = int(np.sqrt(V)) assert np.allclose(sqrtV * sqrtV, V) if docIDsToPlot is not None: nDocToPlot = len(docIDsToPlot) else: size = np.minimum(Data.nDoc, nDocToPlot) docIDsToPlot = randstate.choice(Data.nDoc, size=size, replace=False) ncols = 5 nrows = int(np.ceil(nDocToPlot / float(ncols))) if vmax is None: DocWordArr = Data.getDocTypeCountMatrix() vmax = int(np.max(np.percentile(DocWordArr, 98, axis=0))) if figID is None: figH, ha = pylab.subplots(nrows=nrows, ncols=ncols, figsize=(ncols * W, nrows * H)) for plotPos, docID in enumerate(docIDsToPlot): start = Data.doc_range[docID] stop = Data.doc_range[docID + 1] wIDs = Data.word_id[start:stop] wCts = Data.word_count[start:stop] docWordHist = np.zeros(V) docWordHist[wIDs] = wCts squareIm = np.reshape(docWordHist, (sqrtV, sqrtV)) pylab.subplot(nrows, ncols, plotPos + 1) pylab.imshow(squareIm, **kwargs) pylab.axis('image') pylab.xticks([]) pylab.yticks([]) if xlabels is not None: pylab.xlabel(xlabels[plotPos]) # Disable empty plots! for kdel in xrange(plotPos + 2, nrows * ncols + 1): aH = pylab.subplot(nrows, ncols, kdel) aH.axis('off') # Fix margins between subplots pylab.subplots_adjust(wspace=0.04, hspace=0.04, left=0.01, right=0.99, top=0.99, bottom=0.01) if doShowNow: pylab.show()
print('Wrote: %s' % (outfilepath)) def parseArgs(**kwargs): ''' Read args from stdin into defined dict fields ''' parser = argparse.ArgumentParser() parser.add_argument('task_output_path') parser.add_argument('--lap', default=None, type=float) parser.add_argument('--taskids', type=str, default=None, help=taskidsHelpMsg) parser.add_argument('--vocabfile', type=str, default=None) args = parser.parse_args() arg_dict = vars(args) if args.vocabfile is not None: with open(args.vocabfile, 'r') as f: arg_dict['vocabList'] = map(str.strip, f.readlines()) return arg_dict if __name__ == "__main__": arg_dict = parseArgs() #plotCompsForJob(block=1, **argDict) if 'taskids' in arg_dict and arg_dict['taskids'] is not None: pass else: plotCompsForTask(**arg_dict) pylab.show()
def plotManyPanelsByPVar(jpathPattern='/tmp/', pvar=None, pvals=None, W=5, H=4, savefilename=None, doShowNow=False, **kwargs): ''' Create line plots for jobs matching pattern and provided kwargs ''' if pvar is None: jpathList = [jpathPattern] pvar = None pvals = [None] else: prefixfilepath = os.path.sep.join(jpathPattern.split(os.path.sep)[:-1]) PPListMap = makePPListMapFromJPattern(jpathPattern) if pvals is None: pvals = PPListMap[pvar] else: pvals = [p for p in pvals if p in PPListMap[pvar]] jpathList = makeListOfJPatternsWithSpecificVals( PPListMap, prefixfilepath=prefixfilepath, key=pvar, vals=pvals, **kwargs) nrows = 1 ncols = len(pvals) pylab.subplots(nrows=nrows, ncols=ncols, figsize=(ncols * W, nrows * H)) axH = None for panelID, panel_jobPattern in enumerate(jpathList): axH = pylab.subplot(nrows, ncols, panelID + 1, sharey=axH, sharex=axH) # Only show legend on first plot if panelID > 0 and 'loc' in kwargs: kwargs['loc'] = None kwargs['doShowNow'] = False plotMultipleLinesByLVar(panel_jobPattern, **kwargs) if pvar is not None: pylab.title('%s=%s' % (pvar, pvals[panelID])) pylab.subplots_adjust(bottom=0.15, wspace=0.5) if savefilename is not None: try: pylab.show(block=False) except TypeError: pass # when using IPython notebook pylab.savefig(savefilename, bbox_inches='tight', pad_inches=0) elif doShowNow: try: pylab.show(block=True) except TypeError: pass # when using IPython notebook Info = dict( nrows=nrows, ncols=ncols, ) return Info
def plotMultipleLinesByLVar(jpathPattern, lvar=None, lvals=None, ColorMap=DefaultColorList, loc=None, bbox_to_anchor=None, savefilename=None, tickfontsize=None, doShowNow=False, **kwargs): ''' Create line plots for provided jobs. ''' prefixfilepath = os.path.sep.join(jpathPattern.split(os.path.sep)[:-1]) PPListMap = makePPListMapFromJPattern(jpathPattern) if lvals is None: lvals = PPListMap[lvar] elif not isinstance(lvals, list): lvals = [lvals] # Make sure all lval values are street legal (aka exist on disk) lvals = [ll for ll in lvals if ll == '.best' or ll in PPListMap[lvar]] # Do ranking in advance for each relevant job '''if lvals[0] == '.best': xvar = kwargs['xvar'] if 'xvals' in kwargs: xvals = kwargs['xvals'] else: xvals = PPListMap[xvar] for xval in xvals: keyValDict = dict() keyValDict[xvar] = xval jpatternForXVal = makeJPatternWithSpecificVals( PPListMap, prefixfilepath=prefixfilepath, **keyValDict) TaskRanker.markBestAmongJobPatternOnDisk(jpatternForXVal) ''' # Create list of jobs with corresponding pattern jpathList = makeListOfJPatternsWithSpecificVals( PPListMap, prefixfilepath=prefixfilepath, key=lvar, vals=lvals, **kwargs) for lineID, line_jobPattern in enumerate(jpathList): line_label = '%s=%s' % (lvar, lvals[lineID]) if isinstance(ColorMap, dict): for label in [line_label, line_jobPattern]: try: line_color = ColorMap[label] except KeyError: line_color = DefaultColorList[lineID] else: # Access next elt in ColorMap list line_color = ColorMap[lineID] plotSingleLineAcrossJobsByXVar(line_jobPattern, label=line_label, color=line_color, lineID=lineID, lvar=lvar, **kwargs) if loc is not None and len(jpathList) > 1: pylab.legend(loc=loc, bbox_to_anchor=bbox_to_anchor) if tickfontsize is not None: pylab.tick_params(axis='both', which='major', labelsize=tickfontsize) if savefilename is not None: try: pylab.show(block=False) except TypeError: pass # when using IPython notebook pylab.savefig(savefilename, bbox_inches='tight', pad_inches=0) elif doShowNow: try: pylab.show(block=True) except TypeError: pass # when using IPython notebook
def plotJobs(jpaths, legNames, styles=None, density=2, xvar='laps', yvar='evidence', loc='upper right', xmin=None, xmax=None, taskids=None, savefilename=None, tickfontsize=None, bbox_to_anchor=None, **kwargs): ''' Create line plots for provided jobs. ''' nLines = len(jpaths) if nLines == 0: raise ValueError('Empty job list. Nothing to plot.') nLeg = len(legNames) for lineID in xrange(nLines): if styles is None: curStyle = dict(colorID=lineID) else: curStyle = styles[lineID] task_kwargs = dict(**kwargs) task_kwargs.update(curStyle) plot_all_tasks_for_job(jpaths[lineID], legNames[lineID], xvar=xvar, yvar=yvar, taskids=taskids, density=density, **task_kwargs) # Y-axis limit determination # If we have "enough" data about the run beyond two full passes of dataset, # we zoom in on the region of data beyond lap 2 if xvar == 'laps' and yvar == 'evidence': xmax = 0 ymin = np.inf ymin2 = np.inf ymax = -np.inf allRunsHaveXBeyond1 = True for line in pylab.gca().get_lines(): xd = line.get_xdata() yd = line.get_ydata() if xd.size < 3: allRunsHaveXBeyond1 = False continue posLap1 = np.searchsorted(xd, 1.0) posLap2 = np.searchsorted(xd, 2.0) if posLap1 < xd.size: ymin = np.minimum(ymin, yd[posLap1]) ymax = np.maximum(ymax, yd[posLap1:].max()) if posLap2 < xd.size: ymin2 = np.minimum(ymin2, yd[posLap2]) xmax = np.maximum(xmax, xd.max()) if xd.max() <= 1: allRunsHaveXBeyond1 = False if allRunsHaveXBeyond1 and xmax > 1.5: # If all relevant curves extend beyond x=1, only show that part xmin = 1.0 - 1e-5 else: xmin = 0 if allRunsHaveXBeyond1 and ymin2 < ymax: range1 = ymax - ymin range2 = ymax - ymin2 if 10 * range2 < range1: # Y values jump from lap1 to lap2 is enormous, # so let's just show y values from lap2 onward... ymin = ymin2 if (not np.allclose(ymax, ymin)) and allRunsHaveXBeyond1: pylab.ylim([ymin, ymax + 0.1 * (ymax - ymin)]) pylab.xlim([xmin, xmax + .05 * (xmax - xmin)]) if loc is not None and len(jpaths) > 1: pylab.legend(loc=loc, bbox_to_anchor=bbox_to_anchor) if tickfontsize is not None: pylab.tick_params(axis='both', which='major', labelsize=tickfontsize) if savefilename is not None: try: pylab.show(block=False) except TypeError: pass # when using IPython notebook pylab.savefig(savefilename, bbox_inches='tight', pad_inches=0) else: try: pylab.show(block=True) except TypeError: pass # when using IPython notebook
def plotJobs(jpaths, legNames, styles=None, fileSuffix='PredLik.mat', xvar='laps', yvar='avgLikScore', loc='upper right', minLap=0, showFinalPt=0, prefix='predlik', taskids=None, savefilename=None, tickfontsize=None, xjitter=None, bbox_to_anchor=None, **kwargs): ''' Create line plots for provided jobs ''' nLines = len(jpaths) nLeg = len(legNames) assert nLines <= nLeg jitterByJob = np.linspace(-.5, .5, len(jpaths)) for lineID in xrange(nLines): if styles is None: curStyle = dict(colorID=lineID) else: curStyle = styles[lineID] if xjitter is not None: xjitter = jitterByJob[lineID] plot_all_tasks_for_job(jpaths[lineID], legNames[lineID], minLap=minLap, xvar=xvar, yvar=yvar, fileSuffix=fileSuffix, showFinalPt=showFinalPt, prefix=prefix, taskids=taskids, xjitter=xjitter, **curStyle) if loc is not None and len(jpaths) > 1: pylab.legend(loc=loc, bbox_to_anchor=bbox_to_anchor) if tickfontsize is not None: pylab.tick_params(axis='both', which='major', labelsize=tickfontsize) if savefilename is not None: try: pylab.show(block=False) except TypeError: pass # when using IPython notebook pylab.savefig(savefilename, bbox_inches='tight', pad_inches=0) else: try: pylab.show(block=True) except TypeError: pass # when using IPython notebook