def method_errorbar(data,xlabels, line_color=default_color, med_color=None, legend=None, y_offset=0.0,alpha=0.05): if not med_color: med_color=line_color ax.grid(axis='x', color='0.9', linestyle='-', linewidth=0.2) ax.set_axisbelow(True) n,m=data.shape medians = [percentile(data[:,i],50) for i in range(m)] xerr = [[ medians[i]-percentile(data[:,i],100*(alpha/2.)), percentile(data[:,i],100*(1-alpha/2.))-medians[i] ] for i in range(m)] xerr=np.array(xerr).transpose() y_marks = np.array(range(len(xlabels)))-y_offset plt.errorbar(y=y_marks, x=medians,xerr=xerr,fmt='|',capsize=0,color=line_color, ecolor=line_color,elinewidth=0.3,markersize=2) plt.xlabel('% cases used', fontsize=8) ax.tick_params(axis='x', which='both', labelsize=8) ax.set_yticks(np.array(range(len(xlabels)))) ax.set_yticklabels(xlabels,fontsize=6) plt.ylim((min(y_marks)-0.5,max(y_marks)+0.5)) spines_to_remove = ['top', 'right','left'] for spine in spines_to_remove: ax.spines[spine].set_visible(False) ppl.utils.remove_chartjunk(ax, ['top', 'right', 'bottom'], show_ticks=False) if legend: rect = legend.get_frame() rect.set_facecolor(light_grey) rect.set_linewidth(0.0)
def calculateMeans(self): self.synHist = ma.masked_values(self.synHist, -9999.0) self.synHistMean = ma.mean(self.synHist, axis=0) self.medSynHist = ma.median(self.synHist, axis=0) self.synHistUpper = percentile(self.synHist, per=95, axis=0) self.synHistLower = percentile(self.synHist, per=5, axis=0)
def getMedian(self, inArticleDict): """returns dict of medians for subjects""" medDict = {} for subject in inArticleDict: medDict[subject] = (round(numpy.median(self.inArticleDict[subject])), round(percentile(self.inArticleDict[subject],25)), round(percentile(self.inArticleDict[subject],75))) return(medDict)
def calculateCI(Vr, years, nodata, minRecords, yrsPerSim=1, sample_size=50, prange=90): """ Fit a GEV to the wind speed records for a 2-D extent of wind speed values, providing a confidence range by resampling at random from the input values. :param Vr: `numpy.ndarray` of wind speeds (3-D - event, lat, lon) :param years: `numpy.ndarray` of years for which to evaluate return period values. :param float nodata: missing data value. :param int minRecords: minimum number of valid wind speed values required to fit distribution. :param int yrsPerSim: Values represent block maxima - this value indicates the time span of the block (default 1). :param int sample_size: number of records to randomly sample for calculating confidence interval of the fit. :param float prange: percentile range. :return: `numpy.ndarray` of return period wind speed values """ lower = (100 - prange) / 2. upper = 100. - lower nrecords = Vr.shape[0] nsamples = nrecords / sample_size RpUpper = nodata*np.ones((len(years), Vr.shape[1], Vr.shape[2]), dtype='f') RpLower = nodata*np.ones((len(years), Vr.shape[1], Vr.shape[2]), dtype='f') w = np.zeros((len(years), nsamples), dtype='f') wUpper = np.zeros((len(years)), dtype='f') wLower = np.zeros((len(years)), dtype='f') for i in xrange(Vr.shape[1]): for j in xrange(Vr.shape[2]): if Vr[:, i, j].max() > 0.0: random.shuffle(Vr[:, i, j]) for n in xrange(nsamples): nstart = n*sample_size nend = (n + 1)*sample_size - 1 vsub = Vr[nstart:nend, i, j] vsub.sort() if vsub.max( ) > 0.: w[:, n], loc, scale, shp = evd.estimateEVD(vsub, years, nodata, minRecords/10, yrsPerSim) for n in range(len(years)): wUpper[n] = percentile(w[n,:], upper) wLower[n] = percentile(w[n,:], lower) RpUpper[:, i, j] = wUpper RpLower[:, i, j] = wLower return RpUpper, RpLower
def calculateStats(self): self.synMeanLandfall = np.mean(self.synLandfall, axis=0) self.synMeanOffshore = np.mean(self.synOffshore, axis=0) self.synUpperLF = percentile(self.synLandfall, per=95, axis=0) self.synLowerLF = percentile(self.synLandfall, per=5, axis=0) self.synUpperOF = percentile(self.synOffshore, per=95, axis=0) self.synLowerOF = percentile(self.synOffshore, per=5, axis=0)
def calculateMeans(self): """ Calculate the mean, median and percentiles of the synthetic values """ self.synHist = ma.masked_values(self.synHist, -9999.) self.synHistMean = ma.mean(self.synHist, axis=0) self.medSynHist = ma.median(self.synHist, axis=0) self.synHistUpper = percentile(self.synHist, per=95, axis=0) self.synHistLower = percentile(self.synHist, per=5, axis=0)
def calculateMeans(self): """ Calculate mean, median and percentiles of the :attr:`self.synHist` attribute. """ self.synHist = ma.masked_values(self.synHist, -9999.) self.synHistMean = ma.mean(self.synHist, axis=0) self.medSynHist = ma.median(self.synHist, axis=0) self.synHistUpper = percentile(self.synHist, per=95, axis=0) self.synHistLower = percentile(self.synHist, per=5, axis=0)
def calculateStats(self): """ Calculate mean and percentiels of landfall/offshore transition rates. Operates on the :attr:`self.synLandfall` and :attr:`self.synOffshore` attributes. """ self.synMeanLandfall = np.mean(self.synLandfall, axis=0) self.synMeanOffshore = np.mean(self.synOffshore, axis=0) self.synUpperLF = percentile(self.synLandfall, per=95, axis=0) self.synLowerLF = percentile(self.synLandfall, per=5, axis=0) self.synUpperOF = percentile(self.synOffshore, per=95, axis=0) self.synLowerOF = percentile(self.synOffshore, per=5, axis=0)
def get_player_lists(): position_attribute_scores = {position: {} for position in POSITIONS} player_lists = {position: [] for position in POSITIONS} for position in position_attribute_scores: for attribute in ATTRIBUTES: position_attribute_scores[position][attribute] = [] with open('players.csv', mode='r') as csvfile: reader = csv.DictReader(csvfile) for player_row in reader: for attribute in ATTRIBUTES: score = int(player_row[attribute]) position = player_row['position'] position_attribute_scores[position][attribute].append(score) with open('players.csv', mode='r') as csvfile: reader = csv.DictReader(csvfile) for player_row in reader: player = {} position = player_row['position'] player['name'] = player_row['name'] player['position'] = position for attribute in ATTRIBUTES: player_score = int(player_row[attribute]) scores = position_attribute_scores[player_row['position']][attribute] player[attribute] = percentile(scores, player_score, 'weak') player_lists[position].append(player) return player_lists
def get_method_bounds(X): sample_counts = array_sample(X) percentsat = lambda x,k: np.array([percentile(x[:,i],k) for i,m in enumerate(xlabels)]) median=percentsat(sample_counts,50) yerr = np.vstack([median-percentsat(sample_counts,05), percentsat(sample_counts,95)-median]).transpose() return median,yerr
def print_percentiles(label, values): try: from scipy.stats import scoreatpercentile as percentile except ImportError: print('WARN: no scipy means no percentile stats printed') return d = { 0: min(values), 5: percentile(values, 5), 25: percentile(values, 25), 50: percentile(values, 50), 75: percentile(values, 75), 95: percentile(values, 95), 100: max(values), } for k, v in d.items(): print('%s: %d percentile: %f' % (label, k, v))
def get_method_bounds(X): sample_counts = array_sample(X) percentsat = lambda x, k: np.array( [percentile(x[:, i], k) for i, m in enumerate(xlabels)]) median = percentsat(sample_counts, 50) yerr = np.vstack([ median - percentsat(sample_counts, 05), percentsat(sample_counts, 95) - median ]).transpose() return median, yerr
def plot_boolean_frequency(data,labels,**kwargs): alpha=0.05 boolean_percent = lambda X: np.count_nonzero(X)/float(len(X)) boolean_sample = lambda x: np.array([boolean_percent(resample(x)) for i in range(bootstrap_num)]) medians=[] yerr=[] for d,l in zip(data,labels): d_samples = boolean_sample(d) low=percentile(d_samples,100*alpha/2.) med=percentile(d_samples,50) high=percentile(d_samples,100*(1-alpha/2.)) print '[%.2f,%.2f,%.2f]:%s'%(low,med,high,l) medians.append(med) yerr.append([med-low,high-med]) yerr=np.array(yerr) kwargs['width']=0.2 kwargs['xfontsize']=10 method_bar(medians,yerr,labels,**kwargs) plt.ylim(0,1)
def method_errorbar(data, xlabels, line_color=default_color, med_color=None, legend=None, y_offset=0.0, alpha=0.05): if not med_color: med_color = line_color ax.grid(axis='x', color='0.9', linestyle='-', linewidth=0.2) ax.set_axisbelow(True) n, m = data.shape medians = [percentile(data[:, i], 50) for i in range(m)] xerr = [[ medians[i] - percentile(data[:, i], 100 * (alpha / 2.)), percentile(data[:, i], 100 * (1 - alpha / 2.)) - medians[i] ] for i in range(m)] xerr = np.array(xerr).transpose() y_marks = np.array(range(len(xlabels))) - y_offset plt.errorbar(y=y_marks, x=medians, xerr=xerr, fmt='|', capsize=0, color=line_color, ecolor=line_color, elinewidth=0.3, markersize=2) plt.xlabel('% cases used', fontsize=8) ax.tick_params(axis='x', which='both', labelsize=8) ax.set_yticks(np.array(range(len(xlabels)))) ax.set_yticklabels(xlabels, fontsize=6) plt.ylim((min(y_marks) - 0.5, max(y_marks) + 0.5)) spines_to_remove = ['top', 'right', 'left'] for spine in spines_to_remove: ax.spines[spine].set_visible(False) ppl.utils.remove_chartjunk(ax, ['top', 'right', 'bottom'], show_ticks=False) if legend: rect = legend.get_frame() rect.set_facecolor(light_grey) rect.set_linewidth(0.0)
def plot_boolean_frequency(data, labels, **kwargs): alpha = 0.05 boolean_percent = lambda X: np.count_nonzero(X) / float(len(X)) boolean_sample = lambda x: np.array( [boolean_percent(resample(x)) for i in range(bootstrap_num)]) medians = [] yerr = [] for d, l in zip(data, labels): d_samples = boolean_sample(d) low = percentile(d_samples, 100 * alpha / 2.) med = percentile(d_samples, 50) high = percentile(d_samples, 100 * (1 - alpha / 2.)) print '[%.2f,%.2f,%.2f]:%s' % (low, med, high, l) medians.append(med) yerr.append([med - low, high - med]) yerr = np.array(yerr) kwargs['width'] = 0.2 kwargs['xfontsize'] = 10 method_bar(medians, yerr, labels, **kwargs) plt.ylim(0, 1)
def plotStatistics(self, output_file): p = stats.statRemoveNum(np.array(self.param), self.missingValue) a = p - np.mean(p) pmin = p.min() pmax = p.max() amin = a.min() amax = a.max() abins = np.linspace(amin, amax, 50) bins = np.linspace(pmin, pmax, 50) hist = np.empty((len(bins) - 1, self.maxCell)) ahist = np.empty((len(abins) - 1, self.maxCell)) x = np.arange(11) alpha = np.empty((11, self.maxCell)) aalpha = np.empty((11, self.maxCell)) for i in xrange(self.maxCell + 1): p = self.extractParameter(i, 0) a = p - np.mean(p) hist[:, i - 1], b = np.histogram(p, bins, normed=True) ahist[:, i - 1], b = np.histogram(a, abins, normed=True) alpha[:, i - 1] = acf(p, 10) aalpha[:, i - 1] = acf(a, 10) mhist = np.mean(hist, axis=1) uhist = percentile(hist, per=95, axis=1) lhist = percentile(hist, per=5, axis=1) mahist = np.mean(ahist, axis=1) uahist = percentile(ahist, per=95, axis=1) lahist = percentile(ahist, per=5, axis=1) malpha = np.mean(alpha, axis=1) ualpha = percentile(alpha, per=95, axis=1) lalpha = percentile(alpha, per=5, axis=1) maalpha = np.mean(aalpha, axis=1) uaalpha = percentile(aalpha, per=95, axis=1) laalpha = percentile(aalpha, per=5, axis=1) fig = RangeCurve() fig.add(bins[:-1], mhist, uhist, lhist, "Values", "Probability", "") fig.add(abins[:-1], mahist, uahist, lahist, "Anomalies", "Probability", "") fig.add(x, malpha, ualpha, lalpha, "Lag", "Autocorrelation", "ACF of values") fig.add(x, maalpha, uaalpha, laalpha, "Lag", "Autocorrelation", "ACF of anomalies") fig.plot() saveFigure(fig, output_file + '.png')
def calculateMeans(self, synMean, synMin, synMed, synMax, synMinCP): """ Calculate mean, median, minimum, maximum and percentiles of pressure values from synthetic events. :param synMean: `numpy.ndarray` :param synMin: `numpy.ndarray` :param synMed: `numpy.ndarray` :param synMax: `numpy.ndarray` :param synMinCP: `numpy.ndarray` """ synMean = ma.masked_values(synMean, -9999.) synMin = ma.masked_values(synMin, -9999.) synMed = ma.masked_values(synMed, -9999.) synMax = ma.masked_values(synMax, -9999.) self.synMean = ma.mean(synMean, axis=0) self.synMed = ma.mean(synMed, axis=0) self.synMin = ma.mean(synMin, axis=0) self.synMax = ma.mean(synMax, axis=0) self.synMeanUpper = percentile(ma.compressed(synMean), per=95, axis=0) self.synMeanLower = percentile(ma.compressed(synMean), per=5, axis=0) self.synMinUpper = percentile(ma.compressed(synMin), per=95, axis=0) self.synMinLower = percentile(ma.compressed(synMin), per=5, axis=0) self.synMinCPDist = np.mean(synMinCP, axis=0) self.synMinCPLower = percentile(synMinCP, per=5, axis=0) self.synMinCPUpper = percentile(synMinCP, per=95, axis=0) r = list(np.random.uniform(high=synMean.shape[0], size=3).astype(int)) self.synRandomMinima = synMean[r, :, :]
def median(map): """Function to calculate median of a map map Input PCRaster map""" return percentile(map, 50) OrderMap = order(map) Mid = roundoff(mean(OrderMap)) MidMap = ifthenelse(OrderMap == Mid, map, 0) Median = cellvalue(mapmaximum(MidMap), 0, 0) assert Median[0] > 0.0 return Median[0]
def calcStats(self, lonCrossHist, lonCrossEW, lonCrossWE): """Calculate means and percentiles of synthetic event sets""" self.synCrossMean = np.mean(lonCrossHist, axis=0) self.synCrossEW = np.mean(lonCrossEW, axis=0) self.synCrossWE = np.mean(lonCrossWE, axis=0) self.synCrossUpper = percentile(lonCrossHist, per=95, axis=0) self.synCrossEWUpper = percentile(lonCrossEW, per=95, axis=0) self.synCrossWEUpper = percentile(lonCrossWE, per=95, axis=0) self.synCrossLower = percentile(lonCrossHist, per=5, axis=0) self.synCrossEWLower = percentile(lonCrossEW, per=5, axis=0) self.synCrossWELower = percentile(lonCrossWE, per=5, axis=0)
def getTeamPercentile(season): team_percentile = defaultdict(lambda: []) for idx, game in enumerate(season): game_stats = game["stats"]["game"] team_percentile["goals_for"].append(game_stats["goals_for"]) team_percentile["goals_against"].append(game_stats["goals_against"]) team_percentile["shots_for"].append(game_stats["shots_for"]) team_percentile["shots_against"].append(game_stats["shots_against"]) team_percentile["hits_for"].append(game_stats["hits_for"]) team_percentile["hits_against"].append(game_stats["hits_against"]) team_percentile["giveaways"].append(game_stats["giveaways"]) team_percentile["takeaways"].append(game_stats["takeaways"]) team_percentile["pim_for"].append(game_stats["pim_for"]) team_percentile["pim_against"].append(game_stats["pim_against"]) team_percentile["power_plays"].append(game_stats["power_plays"]) team_percentile["power_play_goals"].append( game_stats["power_play_goals"]) team_percentile["penalty_kills"].append(game_stats["penalty_kills"]) team_percentile["penalty_kill_goals"].append( game_stats["penalty_kill_goals"]) team_percentile["power_play_percentage"].append( game_stats["power_play_percentage"]) team_percentile["penalty_kill_percentage"].append( game_stats["penalty_kill_percentage"]) team_percentile["shooting_percentage"].append( game_stats["shooting_percentage"]) team_percentile["save_percentage"].append( game_stats["save_percentage"]) team_percentile["PDO"].append(game_stats["PDO"]) game["stats"]["team_percentile"] = { i: float( percentile(team_percentile[i], game_stats[i], kind='mean') / 100) for i in dict(team_percentile) } return season
def calculateMeans(self, synMean, synMin, synMed, synMax, synMinCP): synMean = ma.masked_values(synMean, -9999.) synMin = ma.masked_values(synMin, -9999.) synMed = ma.masked_values(synMed, -9999.) synMax = ma.masked_values(synMax, -9999.) self.synMean = ma.mean(synMean, axis=0) self.synMed = ma.mean(synMed, axis=0) self.synMin = ma.mean(synMin, axis=0) self.synMax = ma.mean(synMax, axis=0) self.synMeanUpper = percentile(synMean, per=95, axis=0) self.synMeanLower = percentile(synMean, per=5, axis=0) self.synMinUpper = percentile(synMin, per=95, axis=0) self.synMinLower = percentile(synMin, per=5, axis=0) self.synMinCPDist = np.mean(synMinCP, axis=0) self.synMinCPLower = percentile(synMinCP, per=5, axis=0) self.synMinCPUpper = percentile(synMinCP, per=95, axis=0) r = list(np.random.uniform(high=synMean.shape[0], size=3).astype(int)) self.synRandomMinima = synMean[r, :, :]
def get_rank(x): return percentile(bg, x) / 100.0
def main(args): # Parse the command line ## Baseline list if args.baseline is not None: ## Fill the baseline list with the conjugates, if needed newBaselines = [] for pair in args.baseline: newBaselines.append((pair[1], pair[0])) args.baseline.extend(newBaselines) ## Polarization if args.xx: args.polToPlot = 'XX' elif args.xy: args.polToPlot = 'XY' elif args.yx: args.polToPlot = 'YX' elif args.yy: args.polToPlot = 'YY' elif args.stokes_i: args.polToPlot = 'I' elif args.stokes_v: args.polToPlot = 'V' filenames = args.filename filenames.sort() if args.limit != -1: filenames = filenames[:args.limit] nInt = len(filenames) dataDict = numpy.load(filenames[0]) tInt = dataDict['tInt'] nBL, nchan = dataDict['vis1XX'].shape freq = dataDict['freq1'] junk0, refSrc, junk1, junk2, junk3, junk4, antennas = read_correlator_configuration( dataDict) dataDict.close() # Make sure the reference antenna is in there if args.ref_ant is not None: found = False for ant in antennas: if ant.stand.id == args.ref_ant: found = True break if not found: raise RuntimeError("Cannot file reference antenna %i in the data" % args.ref_ant) bls = [] l = 0 cross = [] for i in xrange(0, len(antennas), 2): ant1 = antennas[i].stand.id for j in xrange(i, len(antennas), 2): ant2 = antennas[j].stand.id if args.include_auto or ant1 != ant2: if args.baseline is not None: if (ant1, ant2) in args.baseline: bls.append((ant1, ant2)) cross.append(l) elif args.ref_ant is not None: if ant1 == args.ref_ant or ant2 == args.ref_ant: bls.append((ant1, ant2)) cross.append(l) else: bls.append((ant1, ant2)) cross.append(l) l += 1 nBL = len(cross) if args.decimate > 1: if nchan % args.decimate != 0: raise RuntimeError( "Invalid freqeunce decimation factor: %i %% %i = %i" % (nchan, args.decimate, nchan % args.decimate)) nchan //= args.decimate freq.shape = (freq.size // args.decimate, args.decimate) freq = freq.mean(axis=1) times = numpy.zeros(nInt, dtype=numpy.float64) visToPlot = numpy.zeros((nInt, nBL, nchan), dtype=numpy.complex64) visToMask = numpy.zeros((nInt, nBL, nchan), dtype=numpy.bool) for i, filename in enumerate(filenames): dataDict = numpy.load(filename) tStart = dataDict['tStart'] if args.polToPlot == 'I': cvis = dataDict['vis1XX'][cross, :] + dataDict['vis1YY'][cross, :] elif args.polToPlot == 'V': cvis = dataDict['vis1XY'][cross, :] - dataDict['vis1YX'][cross, :] cvis /= 1j else: cvis = dataDict['vis1%s' % args.polToPlot][cross, :] if args.decimate > 1: cvis.shape = (cvis.shape[0], cvis.shape[1] // args.decimate, args.decimate) cvis = cvis.mean(axis=2) visToPlot[i, :, :] = cvis if not args.drop: try: delayStepApplied = dataDict['delayStepApplied'] try: len(delayStepApplied) except TypeError: delayStepApplied = [ delayStepApplied if ant.stand.id > 50 else False for ant in antennas if ant.pol == 0 ] except KeyError: delayStepApplied = [False for ant in antennas if ant.pol == 0] delayStepAppliedBL = [] for j in xrange(len(delayStepApplied)): for k in xrange(j, len(delayStepApplied)): delayStepAppliedBL.append(delayStepApplied[j] or delayStepApplied[k]) visToMask[i, :, :] = [[ delayStepAppliedBL[c], ] for c in cross] times[i] = tStart dataDict.close() print("Got %i files from %s to %s (%.1f s)" % (len(filenames), datetime.utcfromtimestamp( times[0]).strftime("%Y/%m/%d %H:%M:%S"), datetime.utcfromtimestamp(times[-1]).strftime("%Y/%m/%d %H:%M:%S"), (times[-1] - times[0]))) iTimes = numpy.zeros(nInt - 1, dtype=times.dtype) for i in xrange(1, len(times)): iTimes[i - 1] = times[i] - times[i - 1] print(" -> Interval: %.3f +/- %.3f seconds (%.3f to %.3f seconds)" % (iTimes.mean(), iTimes.std(), iTimes.min(), iTimes.max())) print("Number of frequency channels: %i (~%.1f Hz/channel)" % (len(freq), freq[1] - freq[0])) dTimes = times - times[0] delay = numpy.linspace(-350e-6, 350e-6, 301) # s drate = numpy.linspace(-150e-3, 150e-3, 301) # Hz good = numpy.arange(freq.size // 8, freq.size * 7 // 8) # Inner 75% of the band fig1 = plt.figure() fig2 = plt.figure() fig3 = plt.figure() fig4 = plt.figure() fig5 = plt.figure() k = 0 nRow = int(numpy.sqrt(len(bls))) nCol = int(numpy.ceil(len(bls) * 1.0 / nRow)) for b in xrange(len(bls)): i, j = bls[b] vis = numpy.ma.array(visToPlot[:, b, :], mask=visToMask[:, b, :]) ax = fig1.add_subplot(nRow, nCol, k + 1) ax.imshow(numpy.ma.angle(vis), extent=(freq[0] / 1e6, freq[-1] / 1e6, dTimes[0], dTimes[-1]), origin='lower', vmin=-numpy.pi, vmax=numpy.pi, interpolation='nearest') ax.axis('auto') ax.set_xlabel('Frequency [MHz]') ax.set_ylabel('Elapsed Time [s]') ax.set_title("%i,%i - %s" % (i, j, args.polToPlot)) ax.set_xlim((freq[0] / 1e6, freq[-1] / 1e6)) ax.set_ylim((dTimes[0], dTimes[-1])) ax = fig2.add_subplot(nRow, nCol, k + 1) amp = numpy.ma.abs(vis) vmin, vmax = percentile(amp, 1), percentile(amp, 99) ax.imshow(amp, extent=(freq[0] / 1e6, freq[-1] / 1e6, dTimes[0], dTimes[-1]), origin='lower', interpolation='nearest', vmin=vmin, vmax=vmax) ax.axis('auto') ax.set_xlabel('Frequency [MHz]') ax.set_ylabel('Elapsed Time [s]') ax.set_title("%i,%i - %s" % (i, j, args.polToPlot)) ax.set_xlim((freq[0] / 1e6, freq[-1] / 1e6)) ax.set_ylim((dTimes[0], dTimes[-1])) ax = fig3.add_subplot(nRow, nCol, k + 1) ax.plot(freq / 1e6, numpy.ma.abs(vis.mean(axis=0))) ax.set_xlabel('Frequency [MHz]') ax.set_ylabel('Mean Vis. Amp. [lin.]') ax.set_title("%i,%i - %s" % (i, j, args.polToPlot)) ax.set_xlim((freq[0] / 1e6, freq[-1] / 1e6)) ax = fig4.add_subplot(nRow, nCol, k + 1) ax.plot(numpy.ma.angle(vis[:, good].mean(axis=1)) * 180 / numpy.pi, dTimes, linestyle='', marker='+') ax.set_xlim((-180, 180)) ax.set_xlabel('Mean Vis. Phase [deg]') ax.set_ylabel('Elapsed Time [s]') ax.set_title("%i,%i - %s" % (i, j, args.polToPlot)) ax.set_ylim((dTimes[0], dTimes[-1])) ax = fig5.add_subplot(nRow, nCol, k + 1) ax.plot(numpy.ma.abs(vis[:, good].mean(axis=1)) * 180 / numpy.pi, dTimes, linestyle='', marker='+') ax.set_xlabel('Mean Vis. Amp. [lin.]') ax.set_ylabel('Elapsed Time [s]') ax.set_title("%i,%i - %s" % (i, j, args.polToPlot)) ax.set_ylim((dTimes[0], dTimes[-1])) k += 1 for f in (fig1, fig2, fig3, fig4, fig5): f.suptitle( "%s to %s UTC" % (datetime.utcfromtimestamp(times[0]).strftime("%Y/%m/%d %H:%M"), datetime.utcfromtimestamp(times[-1]).strftime("%Y/%m/%d %H:%M"))) plt.show()
def main(args): # Parse the command line ## Baseline list if args.baseline is not None: ## Fill the baseline list with the conjugates, if needed newBaselines = [] for pair in args.baseline: newBaselines.append((pair[1], pair[0])) args.baseline.extend(newBaselines) ## Polarization plot_pols = [] if args.xx: plot_pols.append('XX') if args.xy: plot_pols.append('XY') if args.yx: plot_pols.append('YX') if args.yy: plot_pols.append('YY') filename = args.filename figs = {} first = True for filename in args.filename: print("Working on '%s'" % os.path.basename(filename)) # Open the FITS IDI file and access the UV_DATA extension hdulist = astrofits.open(filename, mode='readonly') andata = hdulist['ANTENNA'] fqdata = hdulist['FREQUENCY'] fgdata = None for hdu in hdulist[1:]: if hdu.header['EXTNAME'] == 'FLAG': fgdata = hdu uvdata = hdulist['UV_DATA'] # Pull out various bits of information we need to flag the file ## Antenna look-up table antLookup = {} for an, ai in zip(andata.data['ANNAME'], andata.data['ANTENNA_NO']): antLookup[an] = ai ## Frequency and polarization setup nBand, nFreq, nStk = uvdata.header['NO_BAND'], uvdata.header[ 'NO_CHAN'], uvdata.header['NO_STKD'] stk0 = uvdata.header['STK_1'] ## Baseline list bls = uvdata.data['BASELINE'] ## Time of each integration obsdates = uvdata.data['DATE'] obstimes = uvdata.data['TIME'] inttimes = uvdata.data['INTTIM'] ## Source list srcs = uvdata.data['SOURCE'] ## Band information fqoffsets = fqdata.data['BANDFREQ'].ravel() ## Frequency channels freq = (numpy.arange(nFreq) - (uvdata.header['CRPIX3'] - 1)) * uvdata.header['CDELT3'] freq += uvdata.header['CRVAL3'] ## UVW coordinates try: u, v, w = uvdata.data['UU'], uvdata.data['VV'], uvdata.data['WW'] except KeyError: u, v, w = uvdata.data['UU---SIN'], uvdata.data[ 'VV---SIN'], uvdata.data['WW---SIN'] uvw = numpy.array([u, v, w]).T ## The actual visibility data flux = uvdata.data['FLUX'].astype(numpy.float32) # Convert the visibilities to something that we can easily work with nComp = flux.shape[1] // nBand // nFreq // nStk if nComp == 2: ## Case 1) - Just real and imaginary data flux = flux.view(numpy.complex64) else: ## Case 2) - Real, imaginary data + weights (drop the weights) flux = flux[:, 0::nComp] + 1j * flux[:, 1::nComp] flux.shape = (flux.shape[0], nBand, nFreq, nStk) # Find unique baselines, times, and sources to work with ubls = numpy.unique(bls) utimes = numpy.unique(obstimes) usrc = numpy.unique(srcs) # Convert times to real times times = utcjd_to_unix(obsdates + obstimes) times = numpy.unique(times) # Build a mask mask = numpy.zeros(flux.shape, dtype=numpy.bool) if fgdata is not None and not args.drop: reltimes = obsdates - obsdates[0] + obstimes maxtimes = reltimes + inttimes / 2.0 / 86400.0 mintimes = reltimes - inttimes / 2.0 / 86400.0 bls_ant1 = bls // 256 bls_ant2 = bls % 256 for row in fgdata.data: ant1, ant2 = row['ANTS'] ## Only deal with flags that we need for the plots process_flag = False if args.include_auto or ant1 != ant2 or ant1 == 0 or ant2 == 0: if ant1 == 0 and ant2 == 0: process_flag = True elif args.baseline is not None: if ant2 == 0 and ant1 in [ a0 for a0, a1 in args.baseline ]: process_flag = True elif (ant1, ant2) in args.baseline: process_flag = True elif args.ref_ant is not None: if ant1 == args.ref_ant or ant2 == args.ref_ant: process_flag = True else: process_flag = True if not process_flag: continue tStart, tStop = row['TIMERANG'] band = row['BANDS'] try: len(band) except TypeError: band = [ band, ] cStart, cStop = row['CHANS'] if cStop == 0: cStop = -1 pol = row['PFLAGS'].astype(numpy.bool) if ant1 == 0 and ant2 == 0: btmask = numpy.where( ((maxtimes >= tStart) & (mintimes <= tStop)))[0] elif ant1 == 0 or ant2 == 0: ant1 = max([ant1, ant2]) btmask = numpy.where( ( (bls_ant1 == ant1) | (bls_ant2 == ant1) ) \ & ( (maxtimes >= tStart) & (mintimes <= tStop) ) )[0] else: btmask = numpy.where( ( (bls_ant1 == ant1) & (bls_ant2 == ant2) ) \ & ( (maxtimes >= tStart) & (mintimes <= tStop) ) )[0] for b, v in enumerate(band): if not v: continue mask[btmask, b, cStart - 1:cStop, :] |= pol plot_bls = [] cross = [] for i in xrange(len(ubls)): bl = ubls[i] ant1, ant2 = (bl >> 8) & 0xFF, bl & 0xFF if args.include_auto or ant1 != ant2: if args.baseline is not None: if (ant1, ant2) in args.baseline: plot_bls.append(bl) cross.append(i) elif args.ref_ant is not None: if ant1 == args.ref_ant or ant2 == args.ref_ant: plot_bls.append(bl) cross.append(i) else: plot_bls.append(bl) cross.append(i) nBL = len(cross) # Decimation, if needed if args.decimate > 1: if nFreq % args.decimate != 0: raise RuntimeError( "Invalid freqeunce decimation factor: %i %% %i = %i" % (nFreq, args.decimate, nFreq % args.decimate)) nFreq //= args.decimate freq.shape = (freq.size // args.decimate, args.decimate) freq = freq.mean(axis=1) flux.shape = (flux.shape[0], flux.shape[1], flux.shape[2] // args.decimate, args.decimate, flux.shape[3]) flux = flux.mean(axis=3) mask.shape = (mask.shape[0], mask.shape[1], mask.shape[2] // args.decimate, args.decimate, mask.shape[3]) mask = mask.mean(axis=3) good = numpy.arange(freq.size // 8, freq.size * 7 // 8) # Inner 75% of the band if first: ref_time = obsdates[0] + obstimes[0] # NOTE: Assumes that the Stokes parameters increment by -1 namMapper = {} for i in xrange(nStk): stk = stk0 - i namMapper[i] = NUMERIC_STOKES[stk] polMapper = {'XX': 0, 'YY': 1, 'XY': 2, 'YX': 3} for b in xrange(len(plot_bls)): bl = plot_bls[b] valid = numpy.where(bls == bl)[0] i, j = (bl >> 8) & 0xFF, bl & 0xFF dTimes = obsdates[valid] + obstimes[valid] dTimes -= ref_time dTimes *= 86400.0 for p in plot_pols: blName = (i, j) blName = '%s-%s - %s' % ( 'EA%02i' % blName[0] if blName[0] < 51 else 'LWA%i' % (blName[0] - 50), 'EA%02i' % blName[1] if blName[1] < 51 else 'LWA%i' % (blName[1] - 50), namMapper[polMapper[p]]) if first or blName not in figs: fig = plt.figure() fig.suptitle('%s' % blName) fig.subplots_adjust(hspace=0.001) axA = fig.add_subplot(1, 2, 1) axP = fig.add_subplot(1, 2, 2) figs[blName] = (fig, axA, axP) fig, axA, axP = figs[blName] for band, offset in enumerate(fqoffsets): frq = freq + offset vis = numpy.ma.array(flux[valid, band, :, polMapper[p]], mask=mask[valid, band, :, polMapper[p]]) amp = numpy.ma.abs(vis) vmin, vmax = percentile(amp, 1), percentile(amp, 99) axA.imshow(amp, extent=(frq[0] / 1e6, frq[-1] / 1e6, dTimes[0], dTimes[-1]), origin='lower', interpolation='nearest', vmin=vmin, vmax=vmax) axP.imshow(numpy.ma.angle(vis), extent=(frq[0] / 1e6, frq[-1] / 1e6, dTimes[0], dTimes[-1]), origin='lower', vmin=-numpy.pi, vmax=numpy.pi, interpolation='nearest') first = False for blName in figs: fig, axA, axP = figs[blName] fig.suptitle("%s UTC\n%s" % (datetime.utcfromtimestamp( times[0]).strftime("%Y/%m/%d %H:%M"), blName)) axA.axis('auto') axA.set_title('Amp.') axA.set_xlabel('Frequency [MHz]') axA.set_ylabel('Amp. - Elapsed Time [s]') axP.axis('auto') axP.set_title('Phase') axP.set_xlabel('Frequency [MHz]') if args.save_images: fig.savefig('fringes-%s.png' % (blName.replace(' ', ''), )) if not args.save_images: plt.show()
def get_rank(x): return percentile(sample, x) / 100.0
def getTeamGameStats(nhl_seasons): nhl_seasons = { i: sorted(nhl_seasons[i], key=lambda x: x.get("date")) for i in nhl_seasons } team_seasons = defaultdict(lambda: defaultdict(lambda: [])) for team_id in TEAMS: print(TEAMS[team_id]) for season in nhl_seasons: team_season_games = sorted([ game for game in nhl_seasons[season] if game["home"] == team_id or game["away"] == team_id ], key=lambda x: x.get("date")) for idx, game in enumerate(team_season_games): game_df = TEAMSTATS[TEAMSTATS.game_id == game["id"]] for_df = game_df[game_df.team_id == team_id] against_df = game_df[game_df.team_id != team_id] game["won"] = int(for_df["won"].values[0]) game["stats"]["travel"] = defaultdict(lambda: 0) game["stats"]["travel"]["home"] = int( "home" == for_df["HoA"].values[0]) game["stats"]["travel"]["game_day"] = datetime.strptime( game["date"], "%Y-%m-%d").weekday() game["stats"]["travel"]["game_reg"] = int( str(for_df["settled_in"]) == "REG") game["stats"]["travel"]["game_ot"] = int( str(for_df["settled_in"]) == "OT") game["stats"]["travel"]["game_so"] = int( str(for_df["settled_in"]) == "SO") if idx == 0: game["stats"]["travel"]["rest_days"] = 1.0 if game["stats"]["travel"]["home"]: game["stats"]["travel"]["timezone"] = 0 game["stats"]["travel"]["distance"] = 0 else: prev_tz = ARENA_ZONES[TEAMS[team_id]] curr_tz = ARENA_ZONES[TEAMS[game["home"]]] game["stats"]["travel"]["timezone"] = abs(curr_tz - prev_tz) prev_loc = ARENAS[TEAMS[team_id]] curr_loc = ARENAS[TEAMS[game["home"]]] game["stats"]["travel"]["distance"] = ( geodesic(prev_loc, curr_loc).miles / DST_MAX) else: prev_game = team_season_games[idx - 1] prev_date = prev_game["date"] curr_date = game["date"] d1 = datetime.strptime(prev_date, "%Y-%m-%d") d2 = datetime.strptime(curr_date, "%Y-%m-%d") game["stats"]["travel"]["rest_days"] = min( abs((d1 - d2).days), 10) / DAY_MAX prev_tz = ARENA_ZONES[TEAMS[prev_game["home"]]] curr_tz = ARENA_ZONES[TEAMS[game["home"]]] game["stats"]["travel"]["timezone"] = abs(curr_tz - prev_tz) prev_loc = ARENAS[TEAMS[prev_game["home"]]] curr_loc = ARENAS[TEAMS[game["home"]]] game["stats"]["travel"]["distance"] = geodesic( curr_loc, prev_loc).miles / DST_MAX game["stats"]["game"] = getGameStats(for_df, against_df) team_season_games = getCumulative(team_season_games) team_season_games = getTeamPercentile(team_season_games) team_seasons[team_id][season] = team_season_games for team_id in team_seasons: for season in team_seasons[team_id]: for idx, game in enumerate(team_seasons[team_id][season]): league_percentile = getLeagueDistribution( team_seasons, season, idx) game["stats"]["league_percentile"] = { i: float( percentile(league_percentile[i], game["stats"]["cumulative"][i]) / 100) for i in dict(league_percentile) } return team_seasons
def quicklook(filename, flatten, ant='252A'): h5 = tb.open_file(filename) T_ant = apply_calibration(h5) f_leda = T_ant['f'] ant_ids = [ ant, ] print("Plotting %s..." % ant_ids[0]) fig, axes = plt.subplots(figsize=(12, 6), nrows=1, ncols=1) #plt.suptitle(h5.filename) lst_stamps = T_ant['lst'] utc_stamps = T_ant['utc'] xlims = (f_leda[0], f_leda[-1]) #ylims = mdates.date2num((T_ant['utc'][0], T_ant['utc'][-1])) #hfmt = mdates.DateFormatter('%m/%d %H:%M') ylims = (T_ant['lst'][0], T_ant['lst'][-1]) T_flagged = T_ant[ant_ids[0]] #T_flagged = np.fft.fft(T_flagged, axis=0) #T_flagged -= T_flagged.mean(axis=0) #T_flagged = 10*np.log10(np.abs(np.fft.ifft(T_flagged))) T_flagged = rfi_flag(T_flagged, freqs=f_leda) if flatten: abp = np.ma.median(T_flagged.data, axis=0) abp /= np.ma.median(abp) T_flagged /= abp clim = (percentile(T_flagged.compressed(), 5), percentile(T_flagged.compressed(), 95)) im = plt.imshow( T_flagged, # / np.median(xx, axis=0), cmap='jet', aspect='auto', interpolation='nearest', clim=clim, extent=(xlims[0], xlims[1], ylims[1], ylims[0])) plt.title(ant_ids[0]) plt.xlabel("Frequency [MHz]") plt.ylabel("LST [hr]") plt.colorbar() plt.text(0.005, 0.005, get_repo_fingerprint(), transform=fig.transFigure, size=8) plt.savefig("figures/rfi-flagged.pdf") plt.show() plt.figure() #plt.plot(f_leda, np.sum(T_flagged.mask, axis=0).astype('float') / T_flagged.mask.shape[0], label='total') day = T_flagged[0:2000].mask night = T_flagged[2250:2750].mask plt.plot(f_leda, np.sum(night, axis=0).astype('float') / night.shape[0], label='night') plt.plot([0]) plt.plot(f_leda, np.sum(day, axis=0).astype('float') / day.shape[0], label='day') plt.xlim(40, 85) plt.ylim(-0.025, 0.25) plt.title(ant_ids[0]) plt.xlabel("Frequency [MHz]") plt.ylabel("Flagged fraction") plt.minorticks_on() plt.legend(frameon=True, loc=2) plt.tight_layout() plt.text(0.005, 0.005, get_repo_fingerprint(), transform=fig.transFigure, size=8) plt.savefig("figures/rfi-fraction.pdf") plt.show() plt.figure() plt.plot(f_leda, kurtosis(T_flagged, axis=0)) plt.title(ant_ids[0]) plt.ylabel("Kurtosis") plt.xlabel("Frequency [MHz]") plt.xlim(40, 85) plt.ylim(-50, 1600) plt.minorticks_on() plt.text(0.005, 0.005, get_repo_fingerprint(), transform=fig.transFigure, size=8) plt.show() plt.figure()
Y_hat, Y_true, auc_scores, bottom_k_auc_scores = run_classifier(clf,classifier_features,cases, bottom_inds, optimize_hyperparams=False) # Save the data for next time print ' saving data...' pickle.dump((Y_hat,Y_true,auc_scores,bottom_k_auc_scores), open(results_path+'clf_results_%s.pickle'%clf_name,'wb')) finally: if(plot_output): print ' plotting data...' # Plot the Precision Recall Curve # Scikit's Precision Recall p,r,thresh = precision_recall_curve(Y_true.flatten(), Y_hat.flatten()) plt.plot(r,p,label=clf_name,**plot_ops[i]) # Now get AUC bounds via bootstrap resampling print ' AUC bootstrap resampling...' print("[%.3f,%.3f]: %s AUC 95 bounds"%(percentile(auc_scores,2.5),percentile(auc_scores,97.5),clf_name)) print("[%.3f,%.3f]: %s AUC 95 bounds - bottom %d methods"%(percentile(bottom_k_auc_scores,2.5),percentile(bottom_k_auc_scores,97.5),clf_name,k)) # Save and display the overall figure if(plot_output): #plt.legend(loc=1,fontsize=20) fix_legend(handlelength=7) fix_axes() plt.ylim(0,1) plt.xlim(0,1) plt.hold(False) plt.tight_layout() PR_fig.savefig(figure_path+'precision_recall.pdf', bbox_inches=0) PR_fig.show() # Now print out all the results
def getPercentile(self, inArticleDict, percent): """returns dict of percentiles for subjects""" medDict = {} for subject in inArticleDict: medDict[subject] = percentile(self.inArticleDict[subject], percent) return(medDict)
def stat(self, a): #if self.mask != None: # a = np.compress(a.flatten(), self.mask.flatten() > 0) #vmin, vmax, vmid = a.min(), a.max(), a.mean() vmin, vmax, vmid = percentile(a.flatten(), 1), percentile(a.flatten(), 99), percentile(a.flatten(), 50) return vmin, vmax, vmid
def plot(out_fd, data_sets, title='Expected bankroll change over time', xlabel='Roll number', ylabel='Change in bankroll', file_format='png', transparent=False): ''' Plot the median value across many sets of data, as well as the area between the 1st and 3rd quartiles. Each input data set should be a dictionary of x,y pairs as specified below. All input data sets must have the same x values. At each specified x value, plot the median of the y values across all data sets. Also plot the 1st and 3rd quartiles for the y values at this x value. out_fd: the file-like object to which to write the graph in PNG file format data_sets: an iterable, containing one or more data set dictionary file_format: file format to output. Must be one of: - png - svg transparent: whether or not the file should have a transparent background An example data set dictionary: { 0: 1, 1: 4, 2: 2, 6: 7, 9: 10, ... } Where each key is an x value and the key's value is the corresponding y value. All data sets must have the same exact set of keys. ''' assert file_format in 'png svg svgz'.split(' ') plt.figure() d = None for data_set in data_sets: if d is None: d = {} for x in data_set: d[x] = [data_set[x]] continue for x in data_set: d[x].append(data_set[x]) stats_d = {} for x in d: stats_d[x] = ( min(d[x]), percentile(d[x], 5), percentile(d[x], 25), percentile(d[x], 50), percentile(d[x], 75), percentile(d[x], 95), max(d[x]), ) # colors selected to be good for colorblind people # http://www.somersault1824.com/tips-for-designing-scientific-figures-for-color-blind-readers/ # http://mkweb.bcgsc.ca/biovis2012/ # http://mkweb.bcgsc.ca/colorblind/ dark_purple = rgb_conv(73, 0, 146) dark_blue = rgb_conv(0, 109, 219) purple = rgb_conv(182, 109, 255) blue = rgb_conv(109, 182, 255) light_blue = rgb_conv(182, 219, 255) uppest_color = *dark_purple, 0.9 upper_color = *dark_blue, 0.9 med_color = *purple, 1 middle_color = *purple, 0.5 lower_color = *blue, 0.9 lowest_color = *light_blue, 0.9 per_0 = [v[0] for v in stats_d.values()] per_5 = [v[1] for v in stats_d.values()] per_25 = [v[2] for v in stats_d.values()] per_50 = [v[3] for v in stats_d.values()] per_75 = [v[4] for v in stats_d.values()] per_95 = [v[5] for v in stats_d.values()] per_100 = [v[6] for v in stats_d.values()] # plt.plot(stats_d.keys(), per_100, color=max_color, label='max') plt.plot(stats_d.keys(), per_50, color=med_color, label='median') # plt.plot(stats_d.keys(), per_0, color=min_color, label='min') plt.fill_between(stats_d.keys(), per_100, per_95, color=uppest_color, label='top 5%') plt.fill_between(stats_d.keys(), per_95, per_75, color=upper_color, label='next 20%') plt.fill_between(stats_d.keys(), per_75, per_25, color=middle_color, label='middle 50%') plt.fill_between(stats_d.keys(), per_25, per_5, color=lower_color, label='next 20%') plt.fill_between(stats_d.keys(), per_5, per_0, color=lowest_color, label='bottom 5%') plt.xlim(left=0, right=max(stats_d.keys())) ymag = max(max(per_100), -1 * min(per_0)) plt.ylim(top=ymag, bottom=-1 * ymag) plt.xlabel(xlabel) plt.ylabel(ylabel) plt.legend(loc='best', fontsize=8) plt.title(title) plt.savefig(out_fd, transparent=transparent, format=file_format)
def quicklook(filename, save, dump, flag, merge, flatten, no_show, all_lsts, new_cal, sky=False, lfsm=False, emp=False): h5 = tb.open_file(filename) if new_cal: T_ant = apply_new_calibration(h5) else: T_ant = apply_calibration(h5) f_leda = T_ant['f'] ant_ids = ['252', '254', '255'] print("Plotting...") fig = plt.figure(figsize=(20, 20)) #plt.suptitle(h5.filename) lst_stamps = T_ant['lst'] indexes = np.arange(len(lst_stamps), dtype=np.int) if len(lst_stamps) == 0: raise RuntimeError("No LSTs in file") # Report discontinuities in time for i in range(1, len(lst_stamps)): if lst_stamps[i] - lst_stamps[i - 1] > 1 / 60.0: # 1 minute print "Discontinuity at LST", lst_stamps[i], ( lst_stamps[i] - lst_stamps[i - 1]) * 60 * 60, "seconds" utc_stamps = T_ant['utc'] xlims = (f_leda[0], f_leda[-1]) #ylims = mdates.date2num((T_ant['utc'][0], T_ant['utc'][-1])) #hfmt = mdates.DateFormatter('%m/%d %H:%M') ylims = (T_ant['lst'][0], T_ant['lst'][-1]) # Work out altitude of Gal center and Sun. Use whichever is highest # and put that in the padding, which is the stripe. pad_length = 70 padding = np.full((len(lst_stamps), pad_length), 10000) timing = lst_timing.LST_Timing(lst_stamps, utc_stamps) border_bottom, night_bottom, night_top, border_top = timing.calc_night() padding[night_bottom:night_top, :] = 1000 #for ant in ant_ids: # lst_stamps, T_ant[ant+"A"] = timing.align(T_ant[ant+"A"]) # lst_stamps, T_ant[ant+"B"] = timing.align(T_ant[ant+"B"]) if night_bottom: print "Night", lst_stamps[night_bottom], "-", lst_stamps[night_top - 1] else: print "Night 0 - 0" # Use night only if not all_lsts: if not border_top: raise RuntimeError( "No LSTs available at night time (use --all_lsts to see all)") lst_stamps = lst_stamps[night_bottom:night_top] utc_stamps = utc_stamps[night_bottom:night_top] indexes = indexes[night_bottom:night_top] padding = padding[night_bottom:night_top] ylims = (lst_stamps[0], lst_stamps[-1]) print len(lst_stamps), "usable LSTs" else: print "Using all LSTs" if len(lst_stamps) == 0: raise RuntimeError( "There are no data to display (number of LSTs is 0)") yloc = [] ylabel = [] try: for i in range(0, len(lst_stamps), len(lst_stamps) / 7): yloc.append(lst_stamps[i]), ylabel.append(("%.1f" % lst_stamps[i])) except: yloc.append(lst_stamps[0]), ylabel.append(("%.1f" % lst_stamps[0])) yloc.append(lst_stamps[-1]), ylabel.append(("%.1f" % lst_stamps[-1])) if all_lsts: new_x_high = xlims[1] + pad_length * (xlims[1] - xlims[0]) / len(f_leda) else: new_x_high = xlims[1] dump_data = {} if sky: if lfsm and emp: smdl = SkyModelLFSMEmp smlbl = 'LFSM+Emp' elif lfsm and not emp: smdl = SkyModelLFSM smlbl = 'LFSM' elif not lfsm and emp: smdl = SkyModelGSMEmp smlbl = 'GSM+Emp' else: smdl = SkyModelGSM smlbl = 'GSM' sy = smdl(pol='y') sx = smdl(pol='x') T_y_asm = sy.generate_tsky(lst_stamps, f_leda * 1e6) T_x_asm = sx.generate_tsky(lst_stamps, f_leda * 1e6) if flag and merge: # If we are going to merge the flags across antennas, we need to flag them all now for p in (0, 1): for ii, key in enumerate(ant_ids): ant = key + ("B" if p else "A") T_flagged = T_ant[ant] if not all_lsts: # Do flagging with a border around the data in time masks = rfi_flag(T_flagged[border_bottom:border_top], freqs=f_leda) new_mask = masks.combine(do_not_excise_dtv=True) new_mask = new_mask[night_bottom - border_bottom:night_top - border_bottom] # remove border else: masks = rfi_flag(T_flagged, freqs=f_leda) new_mask = masks.combine(do_not_excise_dtv=True) print ant, "Biggest DTV gap", lst_stamps[biggest_gap( masks.dtv_tms)[1]], "-", lst_stamps[biggest_gap( masks.dtv_tms)[0]], "waterfall" try: merged_mask |= new_mask except NameError: merged_mask = new_mask for p in [0, 1]: for ii, key in enumerate(ant_ids): if p == 0 and ii == 0: ax = fig.add_subplot(2, 3, 3 * p + ii + 1) origAX = ax else: ax = fig.add_subplot(2, 3, 3 * p + ii + 1, sharex=origAX, sharey=origAX) if p == 0: ant = key + "A" else: ant = key + "B" T_flagged = T_ant[ant] if not all_lsts: T_flagged = T_flagged[night_bottom:night_top] print "Max", np.max(T_flagged), "Min", np.min(T_flagged) masks = {} if flag: if merge: ## Already done T_flagged = np.ma.array(T_flagged, mask=merged_mask) else: ## Need to do it now - there's probably a way to deal with ## this all in one pass if not all_lsts: masks = rfi_flag(T_ant[ant][border_bottom:border_top], freqs=f_leda) T_flagged = masks.apply_as_mask( T_ant[ant][border_bottom:border_top], do_not_excise_dtv=True) T_flagged = T_flagged[night_bottom - border_bottom:night_top - border_bottom] # Remove border masks.chop(night_bottom - border_bottom, night_top - border_bottom) else: masks = rfi_flag(T_flagged, freqs=f_leda) T_flagged = masks.apply_as_mask(T_flagged, do_not_excise_dtv=True) print ant, "Biggest DTV gap", lst_stamps[biggest_gap( masks.dtv_tms)[1]], "-", lst_stamps[biggest_gap( masks.dtv_tms)[0]], "waterfall" print "After flagging", "Max", np.ma.max( T_flagged), "Min", np.ma.min(T_flagged) try: T_asm = T_y_asm if p == 0 else T_x_asm scale_offset_asm = robust.mean(T_asm / T_flagged) T_flagged = T_flagged - T_asm / scale_offset_asm except NameError: pass T_flagged = pad_data(T_flagged) # Up to 2400 channels if dump: if not all_lsts: if masks: dump_data[ant + "_flagged"] = masks.apply_as_nan( T_ant[ant][night_bottom:night_top]) dump_data[ant] = T_ant[ant][night_bottom:night_top] else: if masks: dump_data[ant + "_flagged"] = masks.apply_as_nan( T_ant[ant]) dump_data[ant] = T_ant[ant] dump_data[ant + "_rms"] = add_uncertainties(T_flagged) av = np.ma.average(T_flagged, axis=0) weighted = av / dump_data[ant + "_rms"]**2 dump_data[ant + "_weighted"] = weighted if masks: dump_data[ant + "_dtv_times"] = np.array(masks.dtv_tms) dump_data[ant + "_masks"] = masks.masks if flag: total = T_flagged.shape[0] * T_flagged.shape[1] num_in = np.ma.MaskedArray.count(T_flagged) print ant, ("%.1f%%" % (100 * float(total - num_in) / total) ), "flagged.", "Count:", total - num_in # Add the stripe onto the right edge of the data and adjust the extent of the x-axis (frequency) to cover the stripe. if all_lsts: T_flagged_plot = np.ma.concatenate((T_flagged, padding), axis=1) else: T_flagged_plot = T_flagged ax.set_yticks(yloc) ax.set_yticklabels(ylabel) ax.tick_params(axis='y', pad=2) if flatten: if type(T_flagged_plot) is np.ma.core.MaskedArray: abp = np.ma.median(T_flagged_plot.data, axis=0) else: abp = np.ma.median(T_flagged_plot, axis=0) abp /= np.ma.median(abp) T_flagged_plot /= abp try: clim = (percentile(T_flagged_plot.compressed(), 5), percentile(T_flagged_plot.compressed(), 95)) except AttributeError: clim = (percentile(T_flagged_plot, 5), percentile(T_flagged_plot, 95)) elif sky: clim = (-250, 500) else: clim = (1000, 10000) if ant != "252B": im = ax.imshow( T_flagged_plot, # / np.median(xx, axis=0), cmap="viridis", aspect='auto', interpolation='nearest', clim=clim, extent=(xlims[0], new_x_high, ylims[1], ylims[0])) ax.set_title(ant) if p == 1: ax.set_xlabel("Frequency [MHz]") if ii == 0: ax.set_ylabel("LST [hr]") #ax.yaxis_date() #ax.yaxis.set_major_formatter(hfmt) # if not flatten: fig.subplots_adjust(left=0.07) fig.subplots_adjust(right=0.875) cbar_ax = fig.add_axes([0.9, 0.125, 0.025, 0.75]) cbar = fig.colorbar(im, cax=cbar_ax) #plt.subplot(2,3,3) #cbar = plt.colorbar() if sky: cbar.set_label("Temperature - %s [K]" % smlbl) else: cbar.set_label("Temperature [K]") cbar.ax.tick_params(axis='y', pad=2) #plt.tight_layout() plt.text(0.005, 0.005, get_repo_fingerprint(), transform=fig.transFigure, size=8) if save: plt.savefig(os.path.basename(filename)[:-3] + ".png") if not no_show: plt.show() if dump: dump_data["lsts"] = lst_stamps dump_data["utcs"] = np.array([str(pytime) for pytime in utc_stamps]) dump_data["indexes"] = indexes dump_data["frequencies"] = pad_frequencies(f_leda) dump_data["options"] = "Flag="+str(flag) \ + " Filename="+filename \ + " New cal="+str(new_cal) \ + " Merge="+str(merge) \ + " Flatten="+str(flatten) \ + " All LSTs="+str(all_lsts) \ + " Sky Model Substract="+str(sky) \ + " Use LFSM="+str(lfsm) \ + " Apply empirical gain correction="+str(emp) dump_data["fingerprint"] = get_repo_fingerprint() import json def jdefault(o): return o.__dict__ dump_data["params"] = json.dumps(params, default=jdefault) hickle.dump(dump_data, os.path.basename(filename)[:-3] + ".hkl")
def quicklook(filename, save, dump, flag, merge, flatten, no_show, all_lsts): h5 = tb.open_file(filename) T_ant = apply_calibration(h5) f_leda = T_ant['f'] ant_ids = ['252', '254', '255'] print("Plotting...") fig = plt.figure(figsize=(12, 12)) #plt.suptitle(h5.filename) lst_stamps = T_ant['lst'] if len(lst_stamps) == 0: print "No LSTS in file" exit(1) # Report discontinuities in time for i in range(1, len(lst_stamps)): if lst_stamps[i] - lst_stamps[i - 1] > 1 / 60.0: # 1 minute print "Discontinuity at LST", lst_stamps[i], ( lst_stamps[i] - lst_stamps[i - 1]) * 60 * 60, "seconds" utc_stamps = T_ant['utc'] xlims = (f_leda[0], f_leda[-1]) #ylims = mdates.date2num((T_ant['utc'][0], T_ant['utc'][-1])) #hfmt = mdates.DateFormatter('%m/%d %H:%M') ylims = (T_ant['lst'][0], T_ant['lst'][-1]) # Work out altitude of Gal center and Sun. Use whichever is highest # and put that in the padding, which is the stripe. unusable_lsts = [] pad_length = 70 padding = np.zeros((len(lst_stamps), pad_length)) for i, d in enumerate(utc_stamps): ovro.date = d sun.compute(ovro) gal_center.compute(ovro) if sun.alt > -15 * np.pi / 180 or gal_center.alt > -15 * np.pi / 180: padding[i, :] = 10000 unusable_lsts.append(i) else: padding[i, :] = 1000 # Delete sun up LSTS if not all_lsts: print "Cutting out times when sun/galaxy up" padding = np.delete(padding, unusable_lsts, axis=0) lst_stamps = np.delete(lst_stamps, unusable_lsts, axis=0) utc_stamps = np.delete(utc_stamps, unusable_lsts, axis=0) if len(lst_stamps) == 0: print "No LSTs available at night time (use --all_lsts to see all)" exit(1) ylims = (lst_stamps[0], lst_stamps[-1]) print len(lst_stamps), "usable LSTs" else: print "Using all LSTs" if len(lst_stamps) == 0: print "There is no data to display (number of LSTs is 0)" exit(1) yloc = [] ylabel = [] for i in range(0, len(lst_stamps), len(lst_stamps) / 7): yloc.append(lst_stamps[i]), ylabel.append(("%.1f" % lst_stamps[i])) if all_lsts: new_x_high = xlims[1] + pad_length * (xlims[1] - xlims[0]) / len(f_leda) else: new_x_high = xlims[1] dump_data = {} if flag and merge: # If we are going to merge the flags across antennas, we need to flag them all now for p in (0, 1): for ii, key in enumerate(ant_ids): ant = key + ("B" if p else "A") T_flagged = T_ant[ant] if not all_lsts: T_flagged = np.delete(T_flagged, unusable_lsts, axis=0) new_mask = rfi_flag(T_flagged, freqs=f_leda).mask try: merged_mask |= new_mask except NameError: merged_mask = new_mask for p in [0, 1]: for ii, key in enumerate(ant_ids): if p == 0 and ii == 0: ax = fig.add_subplot(2, 3, 3 * p + ii + 1) origAX = ax else: ax = fig.add_subplot(2, 3, 3 * p + ii + 1, sharex=origAX, sharey=origAX) if p == 0: ant = key + "A" else: ant = key + "B" T_flagged = T_ant[ant] if not all_lsts: T_flagged = np.delete(T_flagged, unusable_lsts, axis=0) print "Max", np.max(T_flagged), "Min", np.min(T_flagged) if flag: if merge: ## Already done T_flagged = np.ma.array(T_flagged, mask=merged_mask) else: ## Need to do it now - there's probably a way to deal with ## this all in one pass T_flagged = rfi_flag(T_flagged, freqs=f_leda) print "After flagging", "Max", np.ma.max( T_flagged), "Min", np.ma.min(T_flagged) if dump: dump_data[ant] = T_flagged dump_data[ant + "_rms"] = add_uncertainties(T_flagged) av = np.ma.average(T_flagged, axis=0) weighted = av / dump_data[ant + "_rms"]**2 dump_data[ant + "_weighted"] = weighted if flag: total = T_flagged.shape[0] * T_flagged.shape[1] num_in = np.ma.MaskedArray.count(T_flagged) print ant, ("%.1f%%" % (100 * (total - num_in) / total) ), "flagged.", "Count:", total - num_in # Add the stripe onto the right edge of the data and adjust the extent of the x-axis (frequency) to cover the stripe. if all_lsts: T_flagged_plot = np.ma.concatenate((T_flagged, padding), axis=1) else: T_flagged_plot = T_flagged ax.set_yticks(yloc) ax.set_yticklabels(ylabel) ax.tick_params(axis='y', pad=2) if flatten: if type(T_flagged_plot) is np.ma.core.MaskedArray: abp = np.ma.median(T_flagged_plot.data, axis=0) else: abp = np.ma.median(T_flagged_plot, axis=0) abp /= np.ma.median(abp) T_flagged_plot /= abp try: clim = (percentile(T_flagged_plot.compressed(), 5), percentile(T_flagged_plot.compressed(), 95)) except AttributeError: clim = (percentile(T_flagged_plot, 5), percentile(T_flagged_plot, 95)) else: clim = (1000, 10000) im = ax.imshow( T_flagged_plot, # / np.median(xx, axis=0), cmap='jet', aspect='auto', interpolation='nearest', clim=clim, extent=(xlims[0], new_x_high, ylims[1], ylims[0])) ax.set_title(ant) if p == 1: ax.set_xlabel("Frequency [MHz]") if ii == 0: ax.set_ylabel("LST [hr]") #ax.yaxis_date() #ax.yaxis.set_major_formatter(hfmt) # if not flatten: fig.subplots_adjust(left=0.07) fig.subplots_adjust(right=0.875) cbar_ax = fig.add_axes([0.9, 0.125, 0.025, 0.75]) cbar = fig.colorbar(im, cax=cbar_ax) #plt.subplot(2,3,3) #cbar = plt.colorbar() cbar.set_label("Temperature [K]") cbar.ax.tick_params(axis='y', pad=2) #plt.tight_layout() if save: plt.savefig(os.path.basename(filename)[:-3] + ".png") if not no_show: plt.show() if dump: dump_data["lsts"] = lst_stamps dump_data["utcs"] = np.array([str(pytime) for pytime in utc_stamps]) dump_data["frequencies"] = f_leda dump_data["options"] = "Flag=" + str(flag) + " Merge=" + str( merge) + " Flatten=" + str(flatten) + " All LSTSs=" + str(all_lsts) hickle.dump(dump_data, os.path.basename(filename)[:-3] + ".hkl")
def main(args): # Parse the command line ## Baseline list if args.baseline is not None: ## Fill the baseline list with the conjugates, if needed newBaselines = [] for pair in args.baseline: newBaselines.append((pair[1], pair[0])) args.baseline.extend(newBaselines) ## Polarization args.polToPlot = 'XX' if args.xy: args.polToPlot = 'XY' elif args.yx: args.polToPlot = 'YX' elif args.yy: args.polToPlot = 'YY' filename = args.filename print("Working on '%s'" % os.path.basename(filename)) # Open the FITS IDI file and access the UV_DATA extension hdulist = astrofits.open(filename, mode='readonly') andata = hdulist['ANTENNA'] fqdata = hdulist['FREQUENCY'] fgdata = None for hdu in hdulist[1:]: if hdu.header['EXTNAME'] == 'FLAG': fgdata = hdu uvdata = hdulist['UV_DATA'] # Pull out various bits of information we need to flag the file ## Antenna look-up table antLookup = {} for an, ai in zip(andata.data['ANNAME'], andata.data['ANTENNA_NO']): antLookup[an] = ai ## Frequency and polarization setup nBand, nFreq, nStk = uvdata.header['NO_BAND'], uvdata.header[ 'NO_CHAN'], uvdata.header['NO_STKD'] stk0 = uvdata.header['STK_1'] ## Baseline list bls = uvdata.data['BASELINE'] ## Time of each integration obsdates = uvdata.data['DATE'] obstimes = uvdata.data['TIME'] inttimes = uvdata.data['INTTIM'] ## Source list srcs = uvdata.data['SOURCE'] ## Band information fqoffsets = fqdata.data['BANDFREQ'].ravel() ## Frequency channels freq = (numpy.arange(nFreq) - (uvdata.header['CRPIX3'] - 1)) * uvdata.header['CDELT3'] freq += uvdata.header['CRVAL3'] ## UVW coordinates try: u, v, w = uvdata.data['UU'], uvdata.data['VV'], uvdata.data['WW'] except KeyError: u, v, w = uvdata.data['UU---SIN'], uvdata.data[ 'VV---SIN'], uvdata.data['WW---SIN'] uvw = numpy.array([u, v, w]).T ## The actual visibility data flux = uvdata.data['FLUX'].astype(numpy.float32) # Convert the visibilities to something that we can easily work with nComp = flux.shape[1] // nBand // nFreq // nStk if nComp == 2: ## Case 1) - Just real and imaginary data flux = flux.view(numpy.complex64) else: ## Case 2) - Real, imaginary data + weights (drop the weights) flux = flux[:, 0::nComp] + 1j * flux[:, 1::nComp] flux.shape = (flux.shape[0], nBand, nFreq, nStk) # Find unique baselines, times, and sources to work with ubls = numpy.unique(bls) utimes = numpy.unique(obstimes) usrc = numpy.unique(srcs) # Convert times to real times times = utcjd_to_unix(obsdates + obstimes) times = numpy.unique(times) # Build a mask mask = numpy.zeros(flux.shape, dtype=numpy.bool) if fgdata is not None and not args.drop: reltimes = obsdates - obsdates[0] + obstimes maxtimes = reltimes + inttimes / 2.0 / 86400.0 mintimes = reltimes - inttimes / 2.0 / 86400.0 bls_ant1 = bls // 256 bls_ant2 = bls % 256 for row in fgdata.data: ant1, ant2 = row['ANTS'] ## Only deal with flags that we need for the plots process_flag = False if args.include_auto or ant1 != ant2 or ant1 == 0 or ant2 == 0: if ant1 == 0 and ant2 == 0: process_flag = True elif args.baseline is not None: if ant2 == 0 and ant1 in [a0 for a0, a1 in args.baseline]: process_flag = True elif (ant1, ant2) in args.baseline: process_flag = True elif args.ref_ant is not None: if ant1 == args.ref_ant or ant2 == args.ref_ant: process_flag = True else: process_flag = True if not process_flag: continue tStart, tStop = row['TIMERANG'] band = row['BANDS'] try: len(band) except TypeError: band = [ band, ] cStart, cStop = row['CHANS'] if cStop == 0: cStop = -1 pol = row['PFLAGS'].astype(numpy.bool) if ant1 == 0 and ant2 == 0: btmask = numpy.where( ((maxtimes >= tStart) & (mintimes <= tStop)))[0] elif ant1 == 0 or ant2 == 0: ant1 = max([ant1, ant2]) btmask = numpy.where( ( (bls_ant1 == ant1) | (bls_ant2 == ant1) ) \ & ( (maxtimes >= tStart) & (mintimes <= tStop) ) )[0] else: btmask = numpy.where( ( (bls_ant1 == ant1) & (bls_ant2 == ant2) ) \ & ( (maxtimes >= tStart) & (mintimes <= tStop) ) )[0] for b, v in enumerate(band): if not v: continue mask[btmask, b, cStart - 1:cStop, :] |= pol plot_bls = [] cross = [] for i in xrange(len(ubls)): bl = ubls[i] ant1, ant2 = (bl >> 8) & 0xFF, bl & 0xFF if args.include_auto or ant1 != ant2: if args.baseline is not None: if (ant1, ant2) in args.baseline: plot_bls.append(bl) cross.append(i) elif args.ref_ant is not None: if ant1 == args.ref_ant or ant2 == args.ref_ant: plot_bls.append(bl) cross.append(i) else: plot_bls.append(bl) cross.append(i) nBL = len(cross) # Decimation, if needed if args.decimate > 1: if nFreq % args.decimate != 0: raise RuntimeError( "Invalid freqeunce decimation factor: %i %% %i = %i" % (nFreq, args.decimate, nFreq % args.decimate)) nFreq //= args.decimate freq.shape = (freq.size // args.decimate, args.decimate) freq = freq.mean(axis=1) flux.shape = (flux.shape[0], flux.shape[1], flux.shape[2] // args.decimate, args.decimate, flux.shape[3]) flux = flux.mean(axis=3) mask.shape = (mask.shape[0], mask.shape[1], mask.shape[2] // args.decimate, args.decimate, mask.shape[3]) mask = mask.mean(axis=3) good = numpy.arange(freq.size // 8, freq.size * 7 // 8) # Inner 75% of the band # NOTE: Assumes that the Stokes parameters increment by -1 namMapper = {} for i in xrange(nStk): stk = stk0 - i namMapper[i] = NUMERIC_STOKES[stk] polMapper = {'XX': 0, 'YY': 1, 'XY': 2, 'YX': 3} fig1 = plt.figure() fig2 = plt.figure() fig3 = plt.figure() fig4 = plt.figure() fig5 = plt.figure() k = 0 nRow = int(numpy.sqrt(len(plot_bls))) nCol = int(numpy.ceil(len(plot_bls) * 1.0 / nRow)) for b in xrange(len(plot_bls)): bl = plot_bls[b] valid = numpy.where(bls == bl)[0] i, j = (bl >> 8) & 0xFF, bl & 0xFF dTimes = obsdates[valid] + obstimes[valid] dTimes -= dTimes[0] dTimes *= 86400.0 ax1, ax2, ax3, ax4, ax5 = None, None, None, None, None for band, offset in enumerate(fqoffsets): frq = freq + offset vis = numpy.ma.array(flux[valid, band, :, polMapper[args.polToPlot]], mask=mask[valid, band, :, polMapper[args.polToPlot]]) ax1 = fig1.add_subplot(nRow, nCol * nBand, nBand * k + 1 + band, sharey=ax1) ax1.imshow(numpy.ma.angle(vis), extent=(frq[0] / 1e6, frq[-1] / 1e6, dTimes[0], dTimes[-1]), origin='lower', vmin=-numpy.pi, vmax=numpy.pi, interpolation='nearest') ax1.axis('auto') ax1.set_xlabel('Frequency [MHz]') if band == 0: ax1.set_ylabel('Elapsed Time [s]') ax1.set_title("%i,%i - %s" % (i, j, namMapper[polMapper[args.polToPlot]])) ax1.set_xlim((frq[0] / 1e6, frq[-1] / 1e6)) ax1.set_ylim((dTimes[0], dTimes[-1])) ax2 = fig2.add_subplot(nRow, nCol * nBand, nBand * k + 1 + band, sharey=ax2) amp = numpy.ma.abs(vis) vmin, vmax = percentile(amp, 1), percentile(amp, 99) ax2.imshow(amp, extent=(frq[0] / 1e6, frq[-1] / 1e6, dTimes[0], dTimes[-1]), origin='lower', interpolation='nearest', vmin=vmin, vmax=vmax) ax2.axis('auto') ax2.set_xlabel('Frequency [MHz]') if band == 0: ax2.set_ylabel('Elapsed Time [s]') ax2.set_title("%i,%i - %s" % (i, j, namMapper[polMapper[args.polToPlot]])) ax2.set_xlim((frq[0] / 1e6, frq[-1] / 1e6)) ax2.set_ylim((dTimes[0], dTimes[-1])) ax3 = fig3.add_subplot(nRow, nCol * nBand, nBand * k + 1 + band, sharey=ax3) ax3.plot(frq / 1e6, numpy.ma.abs(vis.mean(axis=0))) ax3.set_xlabel('Frequency [MHz]') if band == 0: ax3.set_ylabel('Mean Vis. Amp. [lin.]') ax3.set_title("%i,%i - %s" % (i, j, namMapper[polMapper[args.polToPlot]])) ax3.set_xlim((frq[0] / 1e6, frq[-1] / 1e6)) ax4 = fig4.add_subplot(nRow, nCol * nBand, nBand * k + 1 + band, sharey=ax4) ax4.plot(numpy.ma.angle(vis[:, good].mean(axis=1)) * 180 / numpy.pi, dTimes, linestyle='', marker='+') ax4.set_xlim((-180, 180)) ax4.set_xlabel('Mean Vis. Phase [deg]') if band == 0: ax4.set_ylabel('Elapsed Time [s]') ax4.set_title("%i,%i - %s" % (i, j, namMapper[polMapper[args.polToPlot]])) ax4.set_ylim((dTimes[0], dTimes[-1])) ax5 = fig5.add_subplot(nRow, nCol * nBand, nBand * k + 1 + band, sharey=ax5) ax5.plot(numpy.ma.abs(vis[:, good].mean(axis=1)) * 180 / numpy.pi, dTimes, linestyle='', marker='+') ax5.set_xlabel('Mean Vis. Amp. [lin.]') if band == 0: ax5.set_ylabel('Elapsed Time [s]') ax5.set_title("%i,%i - %s" % (i, j, namMapper[polMapper[args.polToPlot]])) ax5.set_ylim((dTimes[0], dTimes[-1])) if band > 0: for ax in (ax1, ax2, ax3, ax4, ax5): plt.setp(ax.get_yticklabels(), visible=False) if band < nBand - 1: for ax in (ax1, ax2, ax3, ax4, ax5): xticks = ax.xaxis.get_major_ticks() xticks[-1].label1.set_visible(False) k += 1 for f in (fig1, fig2, fig3, fig4, fig5): f.suptitle( "%s to %s UTC" % (datetime.utcfromtimestamp(times[0]).strftime("%Y/%m/%d %H:%M"), datetime.utcfromtimestamp(times[-1]).strftime("%Y/%m/%d %H:%M"))) if nBand > 1: f.subplots_adjust(wspace=0.0) plt.show()
def main(args): # Set the station if args.metadata is not None: station = stations.parse_ssmif(args.metadata) ssmifContents = open(args.metadata).readlines() else: station = stations.lwa1 ssmifContents = open(os.path.join(dataPath, 'lwa1-ssmif.txt')).readlines() antennas = station.antennas toKeep = [] for g in (1, 10, 54, 248, 251, 258): for i, ant in enumerate(antennas): if ant.stand.id == g and ant.pol == 0: toKeep.append(i) for i, j in enumerate(toKeep): print(i, j, antennas[j].stand.id) # Length of the FFT LFFT = args.fft_length # Make sure that the file chunk size contains is an integer multiple # of the FFT length so that no data gets dropped maxFrames = int((30000 * 260) / float(LFFT)) * LFFT # It seems like that would be a good idea, however... TBW data comes one # capture at a time so doing something like this actually truncates data # from the last set of stands for the first integration. So, we really # should stick with maxFrames = (30000 * 260) fh = open(args.filename, "rb") nFrames = os.path.getsize(args.filename) // tbw.FRAME_SIZE dataBits = tbw.get_data_bits(fh) # The number of ant/pols in the file is hard coded because I cannot figure out # a way to get this number in a systematic fashion antpols = len(antennas) nChunks = int(math.ceil(1.0 * nFrames / maxFrames)) if dataBits == 12: nSamples = 400 else: nSamples = 1200 # Read in the first frame and get the date/time of the first sample # of the frame. This is needed to get the list of stands. junkFrame = tbw.read_frame(fh) fh.seek(0) beginTime = junkFrame.time beginDate = junkFrame.time.datetime # File summary print("Filename: %s" % args.filename) print("Date of First Frame: %s" % str(beginDate)) print("Ant/Pols: %i" % antpols) print("Sample Length: %i-bit" % dataBits) print("Frames: %i" % nFrames) print("Chunks: %i" % nChunks) print("===") nChunks = 1 # Skip over any non-TBW frames at the beginning of the file i = 0 junkFrame = tbw.read_frame(fh) while not junkFrame.header.is_tbw: try: junkFrame = tbw.read_frame(fh) except errors.SyncError: fh.seek(0) while True: try: junkFrame = tbn.read_frame(fh) i += 1 except errors.SyncError: break fh.seek(-2 * tbn.FRAME_SIZE, 1) junkFrame = tbw.read_frame(fh) i += 1 fh.seek(-tbw.FRAME_SIZE, 1) print("Skipped %i non-TBW frames at the beginning of the file" % i) # Master loop over all of the file chunks masterSpectra = numpy.zeros((nChunks, antpols, LFFT)) for i in range(nChunks): # Find out how many frames remain in the file. If this number is larger # than the maximum of frames we can work with at a time (maxFrames), # only deal with that chunk framesRemaining = nFrames - i * maxFrames if framesRemaining > maxFrames: framesWork = maxFrames else: framesWork = framesRemaining print("Working on chunk %i, %i frames remaining" % ((i + 1), framesRemaining)) data = numpy.zeros((12, 12000000), dtype=numpy.int16) # If there are fewer frames than we need to fill an FFT, skip this chunk if data.shape[1] < 2 * LFFT: break # Inner loop that actually reads the frames into the data array for j in range(framesWork): # Read in the next frame and anticipate any problems that could occur try: cFrame = tbw.read_frame(fh) except errors.EOFError: break except errors.SyncError: #print("WARNING: Mark 5C sync error on frame #%i" % (int(fh.tell())/tbw.FRAME_SIZE-1)) continue if not cFrame.header.is_tbw: continue stand = cFrame.header.id # In the current configuration, stands start at 1 and go up to 10. So, we # can use this little trick to populate the data array aStand = 2 * (stand - 1) #if cFrame.header.frame_count % 10000 == 0 and config['verbose']: #print("%3i -> %3i %6.3f %5i %i" % (stand, aStand, cFrame.time, cFrame.header.frame_count, cFrame.payload.timetag)) # Actually load the data. x pol goes into the even numbers, y pol into the # odd numbers count = cFrame.header.frame_count - 1 if aStand not in toKeep: continue # Convert to reduced index aStand = 2 * toKeep.index(aStand) data[aStand, count * nSamples:(count + 1) * nSamples] = cFrame.payload.data[0, :] data[aStand + 1, count * nSamples:(count + 1) * nSamples] = cFrame.payload.data[1, :] # Time series analysis - mean, std. dev, saturation count tsMean = data.mean(axis=1) tsStd = data.std(axis=1) tsSat = numpy.where((data == 2047) | (data == -2047), 1, 0).sum(axis=1) # Time series analysis - percentiles p = [50, 75, 90, 95, 99] tsPct = numpy.zeros((data.shape[0], len(p))) for i in xrange(len(p)): for j in xrange(data.shape[0]): tsPct[j, i] = percentile(numpy.abs(data[j, :]), p[i]) # Frequency domain analysis - spectra freq = numpy.fft.fftfreq(2 * args.fft_length, d=1.0 / 196e6) freq = freq[:args.fft_length] delays = numpy.zeros((data.shape[0], freq.size)) signalsF, validF = FEngine(data, freq, delays, LFFT=args.fft_length, Overlap=1, sample_rate=196e6, clip_level=0) # Cleanup to save memory del validF, data print(signalsF.shape) # SK control values skM = signalsF.shape[2] skN = 1 # Frequency domain analysis - spectral kurtosis k = numpy.zeros((signalsF.shape[0], signalsF.shape[1])) for l in xrange(signalsF.shape[0]): for m in xrange(freq.size): k[l, m] = kurtosis.spectral_fft(signalsF[l, m, :]) kl, kh = kurtosis.get_limits(4, skM, skN) print(kl, kh) # Integrate the spectra for as long as we can masterSpectra = (numpy.abs(signalsF)**2).mean(axis=2) del signalsF # Mask out bad values (high spectral kurtosis) for the plot mask = numpy.where((k < kl) | (k > kh), 1, 0) mask = expandMask(mask, radius=4, merge=True) masterSpectra = numpy.ma.array(masterSpectra, mask=mask) # Save the data to an HDF5 file outname = os.path.splitext(args.filename)[0] outname = "%s-RFI.hdf5" % outname f = h5py.File(outname, 'w') f.attrs['filename'] = args.filename f.attrs['mode'] = 'TBW' f.attrs['station'] = 'LWA-1' f.attrs['dataBits'] = dataBits f.attrs['startTime'] = beginTime f.attrs['startTime_units'] = 's' f.attrs['startTime_sys'] = 'unix' f.attrs['sample_rate'] = 196e6 f.attrs['sample_rate_units'] = 'Hz' f.attrs['RBW'] = freq[1] - freq[0] f.attrs['RBW_Units'] = 'Hz' f.attrs['SK-M'] = skM f.attrs['SK-N'] = skN for l in xrange(len(toKeep)): antX = antennas[toKeep[l]] antY = antennas[toKeep[l] + 1] stand = f.create_group('Stand%03i' % antX.stand.id) stand['freq'] = freq stand['freq'].attrs['Units'] = 'Hz' polX = stand.create_group('X') polY = stand.create_group('Y') polX.attrs['tsMean'] = tsMean[2 * l] polY.attrs['tsMean'] = tsMean[2 * l + 1] polX.attrs['tsStd'] = tsStd[2 * l] polY.attrs['tsStd'] = tsStd[2 * l + 1] polX.attrs['tsSat'] = tsSat[2 * l] polY.attrs['tsSat'] = tsSat[2 * l + 1] for i, v in enumerate(p): polX.attrs['ts%02i' % v] = tsPct[2 * l][i] polY.attrs['ts%02i' % v] = tsPct[2 * l + 1][i] polX['spectrum'] = masterSpectra[2 * l, :] polX['spectrum'].attrs['axis0'] = 'frequency' polY['spectrum'] = masterSpectra[2 * l + 1, :] polY['spectrum'].attrs['axis0'] = 'frequency' polX['kurtosis'] = k[2 * l, :] polX['kurtosis'].attrs['axis0'] = 'frequency' polY['kurtosis'] = k[2 * l + 1, :] polY['kurtosis'].attrs['axis0'] = 'frequency' # The plot fig = plt.figure() ax1 = fig.add_subplot(2, 1, 1) ax2 = fig.add_subplot(2, 1, 2) for l in xrange(k.shape[0]): ant = antennas[toKeep[l / 2]] ax1.plot(freq / 1e6, numpy.log10(masterSpectra[l, :]) * 10, label='Stand %i, Pol %i' % (ant.stand.id, ant.pol + l % 2)) ax2.plot(freq / 1e6, k[l, :], label='Stand %i, Pol %i' % (ant.stand.id, ant.pol + l % 2)) ax2.hlines(kl, freq[0] / 1e6, freq[-1] / 1e6, linestyle=':', label='Kurtosis Limit 4$\sigma$') ax2.hlines(kh, freq[0] / 1e6, freq[-1] / 1e6, linestyle=':', label='Kurtosis Limit 4$\sigma$') ax1.set_xlabel('Frequency [MHz]') ax1.set_ylabel('PSD [arb. dB/RBW]') ax1.legend(loc=0) ax2.set_ylim((kl / 2, kh * 2)) ax2.set_xlabel('Frequency [MHz]') ax2.set_ylabel('Spectral Kurtosis') ax2.legend(loc=0) plt.show()
def calculateCI(Vr, years, nodata, minRecords, yrsPerSim=1, sample_size=50, prange=90): """ Fit a GEV to the wind speed records for a 2-D extent of wind speed values, providing a confidence range by resampling at random from the input values. :param Vr: `numpy.ndarray` of wind speeds (3-D - event, lat, lon) :param years: `numpy.ndarray` of years for which to evaluate return period values. :param float nodata: missing data value. :param int minRecords: minimum number of valid wind speed values required to fit distribution. :param int yrsPerSim: Values represent block maxima - this value indicates the time span of the block (default 1). :param int sample_size: number of records to randomly sample for calculating confidence interval of the fit. :param float prange: percentile range. :return: `numpy.ndarray` of return period wind speed values """ lower = (100 - prange) / 2. upper = 100. - lower nrecords = Vr.shape[0] nsamples = nrecords / sample_size RpUpper = nodata * np.ones( (len(years), Vr.shape[1], Vr.shape[2]), dtype='f') RpLower = nodata * np.ones( (len(years), Vr.shape[1], Vr.shape[2]), dtype='f') w = np.zeros((len(years), nsamples), dtype='f') wUpper = np.zeros((len(years)), dtype='f') wLower = np.zeros((len(years)), dtype='f') for i in xrange(Vr.shape[1]): for j in xrange(Vr.shape[2]): if Vr[:, i, j].max() > 0.0: random.shuffle(Vr[:, i, j]) for n in xrange(nsamples): nstart = n * sample_size nend = (n + 1) * sample_size - 1 vsub = Vr[nstart:nend, i, j] vsub.sort() if vsub.max() > 0.: w[:, n], loc, scale, shp = evd.estimateEVD( vsub, years, nodata, minRecords / 10, yrsPerSim) for n in range(len(years)): wUpper[n] = percentile(w[n, :], upper) wLower[n] = percentile(w[n, :], lower) RpUpper[:, i, j] = wUpper RpLower[:, i, j] = wLower return RpUpper, RpLower
def calculateCI(Vr, years, nodata, minRecords, yrsPerSim=1, sample_size=50, prange=90): """ Fit a GEV to the wind speed records for a 2-D extent of wind speed values, providing a confidence range by resampling at random from the input values. :param Vr: `numpy.ndarray` of wind speeds (3-D - event, lat, lon) :param years: `numpy.ndarray` of years for which to evaluate return period values. :param float nodata: missing data value. :param int minRecords: minimum number of valid wind speed values required to fit distribution. :param int yrsPerSim: Values represent block maxima - this value indicates the time span of the block (default 1). :param int sample_size: number of records to randomly sample for calculating confidence interval of the fit. :param float prange: percentile range. Return: ------- :param RpUpper: Upper CI return period wind speed values for each lat/lon :param RpLower: Lower CI return period wind speed values for each lat/lon """ lower = (100 - prange) / 2. # 5th percentile default upper = 100. - lower # 95th percentile default nrecords = Vr.shape[ 0] # number of years (since we have aggregated into 1/yr) nsamples = nrecords / sample_size # number of iterations to perform # RpUpper/RpLower = years x lat x lon RpUpper = nodata * np.ones( (len(years), Vr.shape[1], Vr.shape[2]), dtype='f') RpLower = nodata * np.ones( (len(years), Vr.shape[1], Vr.shape[2]), dtype='f') # w: years x number of iterations w = np.zeros((len(years), nsamples), dtype='f') wUpper = np.zeros((len(years)), dtype='f') wLower = np.zeros((len(years)), dtype='f') for i in xrange(Vr.shape[1]): # lat for j in xrange(Vr.shape[2]): # lon if Vr[:, i, j].max() > 0.0: # check for valid data random.shuffle(Vr[:, i, j]) # shuffle the years for n in xrange( nsamples): # iterate through fitting of random samples nstart = n * sample_size nend = (n + 1) * sample_size - 1 vsub = Vr[nstart:nend, i, j] # select random 50(default) events vsub.sort() if vsub.max() > 0.: # Perform the fitting on a random subset of samples w[:, n], loc, scale, shp = evd.gevfit( vsub, years, nodata, minRecords / 10, yrsPerSim) # Pull out the upper and lower percentiles from the random sample fits for n in range(len(years)): wUpper[n] = percentile(w[n, :], upper) wLower[n] = percentile(w[n, :], lower) # Store upper and lower percentiles for each return period, for each grid cell RpUpper[:, i, j] = wUpper RpLower[:, i, j] = wLower return RpUpper, RpLower