def plot_wise(cat_path): for catfile in find_files(cat_path, "*merged+wise.csv"): print("\nreading catalog: {}".format(catfile)) df = pd.read_csv(catfile) # convert to magnitudes nbadflux = (df.flux <= 0).sum() try: assert nbadflux == 0 except: print("warning: {} negative flux source(s)".format(nbadflux)) ch = catfile.split('/')[-1].split('_')[1] mags = spz_jy_to_mags(df.flux*1e-3, float(ch)) if ch == '1': plt.scatter(df.W1mag, mags) plt.xlabel('W1 [mag]') plt.ylabel('I1 [mag]') elif ch == '2': plt.scatter(df.W2mag, mags) plt.xlabel('W2 [mag]') plt.ylabel('I2 [mag]') ax = plt.gca() xlim, ylim = ax.get_xlim(), ax.get_ylim() plt.plot([-5, ylim[1]*2], [-5, ylim[1]*2], 'r-') ax.set_xlim(xlim) ; ax.set_ylim(ylim) reg = catfile.split('/')[-1].split('_')[0] name = '{}_{}_IRAC_vs_WISE.png'.format(reg, ch) outpath = '/'.join(catfile.split('/')[:-1]+[name]) plt.savefig(outpath, dpi=120) plt.close()
def plot_wise(cat_path): for catfile in find_files(cat_path, "*merged+wise.csv"): print("\nreading catalog: {}".format(catfile)) df = pd.read_csv(catfile) # convert to magnitudes nbadflux = (df.flux <= 0).sum() try: assert nbadflux == 0 except: print("warning: {} negative flux source(s)".format(nbadflux)) ch = catfile.split('/')[-1].split('_')[1] mags = spz_jy_to_mags(df.flux * 1e-3, float(ch)) if ch == '1': plt.scatter(df.W1mag, mags) plt.xlabel('W1 [mag]') plt.ylabel('I1 [mag]') elif ch == '2': plt.scatter(df.W2mag, mags) plt.xlabel('W2 [mag]') plt.ylabel('I2 [mag]') ax = plt.gca() xlim, ylim = ax.get_xlim(), ax.get_ylim() plt.plot([-5, ylim[1] * 2], [-5, ylim[1] * 2], 'r-') ax.set_xlim(xlim) ax.set_ylim(ylim) reg = catfile.split('/')[-1].split('_')[0] name = '{}_{}_IRAC_vs_WISE.png'.format(reg, ch) outpath = '/'.join(catfile.split('/')[:-1] + [name]) plt.savefig(outpath, dpi=120) plt.close()
def plot_learning_curves(num_points, X_train, Y_train, X_test, Y_test, positive_class=1, negative_class=0): train_set_sizes = [len(X_train) / k for k in range(num_points + 1, 0, -1)] test_errors = [] training_errors = [] for training_set_size in train_set_sizes: model = train(X_train, Y_train, training_set_size) test_error = evaluate(model, X_test, Y_test, positive_class, negative_class) training_error = evaluate(model, X_train, Y_train, positive_class, negative_class) test_errors.append(test_error) training_errors.append(training_error) plt.plot(train_set_sizes, training_errors, 'bs-', label='Training accuracy') plt.plot(train_set_sizes, test_errors, 'g^-', label='Test accuracy') plt.ylabel('Accuracy') plt.xlabel('Number of training samples') plt.title('Augmented Logistic Regression Learning Curve') plt.legend(loc='lower right') plt.savefig('../Figures/accuracyPlotAugmented.png', dpi=100) pylab.show()
def submit_time_histogram(arr): """ Use Matplotlib to plot a normalized histogram of submit times """ from math import ceil, log try: import matplotlib.mlab as mlab from prettyplotlib import plt except ImportError: print( 'You must have Matplotlib and Prettyplotlib installed to plot a histogram.' ) # Use Sturges' formula for number of bins: k = ceiling(log2 n + 1) k = ceil(log(len(arr), 2) + 1) n, bins, patches = plt.hist(arr, k, normed=1, facecolor='green', alpha=0.75) # throw a PDF plot on top of it #y = mlab.normpdf(bins, np.mean(arr), np.std(arr)) #l = plt.plot(bins, y, 'r--', linewidth=1) # Get a Bayesian confidence interval for mean, variance, standard deviation dmean, dvar, dsd = bayes_mvs(deltas) # drop a line in at the mean for fun plt.axvline(dmean[0], color='blue', alpha=0.5) plt.axvspan(dmean[1][0], dmean[1][1], color='blue', alpha=0.5) plt.axvline(np.median(deltas), color='y', alpha=0.5) # Caclulate a Kernel Density Estimate density = gaussian_kde(deltas) xs = np.arange(0., np.max(deltas), 0.1) density.covariance_factor = lambda: .25 density._compute_covariance() plt.plot(xs, density(xs), color='m') #FIXME: come up with better legend names #plt.legend(('Normal Curve', 'Mean', 'Median', 'KDE')) plt.legend(('Mean', 'Median', 'KDE')) plt.xlabel('Submit Times (in Seconds)') plt.ylabel('Probability') plt.title('Histogram of Worker submit times') plt.grid(True) plt.show()
limits = [[12, 18.5], [11.4, 16.5], [10, 16], [9.5, 16], [9.5, 16]] for i, band in enumerate(bands): lim_lo = limits[i][0] lim_hi = limits[i][1] x, m, b = makeDiagonalLine([lim_lo, lim_hi]) fig = plt.figure(figsize=(8, 8)) ax = plt.subplot(111) c = ppl.scatter(ax, petroMags[:, i], mags[:, i], s=8, c='k', edgecolor='k') ax.axis([lim_lo, lim_hi, lim_lo, lim_hi]) ax.errorbar(petroMags[:, i], mags[:, i], yerr=mag_err[:, i], mew=0, linestyle = "none", color="black") plt.plot(x,m*x + b, c='k') ax.xaxis.set_major_locator(majorLocator) ax.xaxis.set_minor_locator(minorLocator) ax.yaxis.set_major_locator(majorLocator) ax.yaxis.set_minor_locator(minorLocator) ax.minorticks_on() # Change the labels back to black ax.xaxis.label.set_color('black') ax.yaxis.label.set_color('black') # Change the axis title also back to black ax.title.set_color('black') # Get back the top and right axes lines ("spines") spines_to_remove = ['top', 'right'] for spine in spines_to_remove: ax.spines[spine].set_visible(True) # For all the spines, make their line thicker and return them to be black
def run_xsc_phot(bcdphot_out_path, mosaic_path): replaced = {} for cat in find_files(bcdphot_out_path, "*_combined_hdr_catalog.txt"): print("\n======================================================") print("\nadjusting photometry in: {}".format(cat.split('/')[-1])) print("------------------------------------------------------") outpath = cat.replace('combined_hdr_catalog.txt','2mass_xsc.tbl') # retrieve 2mass data if file doesn't already exist (from previous run) if not os.path.isfile(outpath): # get url and retrieve data url = query_2mass_xsc_polygon(*get_region_corners(cat)) print("\ndownloading 2MASS photometry from: {}".format(url)) text = urllib2.urlopen(url).read() # write to disk with open(outpath, 'w') as f: f.write(text) print("\ncreated file: {}".format(outpath)) # read back in as recarray print("\nreading: {}".format(outpath)) names = open(outpath).read().split('\n')[76].split('|')[1:-1] da = np.recfromtxt(outpath, skip_header=80, names=names) # write input file for xsc_phot.pro infile_outpath = '/'.join(cat.split('/')[:-1])+'/xsc.txt' with open(infile_outpath,'w') as w: for i in range(da.shape[0]): w.write("{} {} {} {}\n".format(da.designation[i], da.ra[i], da.dec[i], da.r_ext[i])) print("\ncreated input file for xsc_phot.pro: {}".format(infile_outpath)) # locate the FITS mosaic file for xsc_phot.pro to do photometry on reg, ch = cat.split('/')[-1].split('_')[:2] mosaicfile = filter(lambda x: 'dirbe{}/ch{}/long/full/Combine'\ .format(reg,ch) in x, find_files(mosaic_path, '*mosaic.fits'))[0] print("\nfound mosaic file: {}".format(mosaicfile)) # spawn IDL subprocess running xsc_phot.pro and catch stdout in file outpath = infile_outpath.replace('xsc.txt', 'xsc_phot_out.txt') if not os.path.isfile(outpath): outfile = open(outpath,'w') print("\nspawning xsc_phot.pro IDL subprocess") cmd = "xsc_phot,'"+mosaicfile+"','"+infile_outpath+"','long'" rc = subprocess.call(['/usr/local/itt/idl71/bin/idl','-quiet','-e',cmd], stderr = subprocess.PIPE, stdout = outfile) outfile.close() # read in output to recarray print("\nreading: {}".format(outpath)) phot = np.recfromtxt(outpath, names=['id','flux','unc','sky','skyunc']) # make sure rows are aligned assert (da.designation == phot.id).all() # ignore xsc sources we got a NaN or negative flux for bad = np.isnan(phot.flux) | (phot.flux < 0) print("\naper.pro returned NaN or negative flux for {} sources".format(bad.sum())) if bad.sum() > 0: for i in phot[bad].id: print(i) outpath = cat.replace('combined_hdr_catalog.txt','xsc_nan_phot.csv') with open(outpath,'w') as f: w = csv.writer(f) w.writerow(da.dtype.names) w.writerows(da[bad].tolist()) print('\ncreated file: {}'.format(outpath)) phot = phot[~bad] da = da[~bad] # read in pipeline catalog print("\nreading: {}".format(cat)) names = open(cat).readline().split()[1:] c = np.recfromtxt(cat, names=names) # loop through xsc sources and find matches in pipeline catalog print("\nfinding records associated with XSC sources in pipeline catalog") c_flux_total = [] n_in_aper = [] c_idx = [] coords = radec_to_coords(c.ra, c.dec) kdt = KDT(coords) for i in range(phot.size): radius = da.r_ext[i]/3600. # idx1, idx2, ds = spherematch(da.ra[i], da.dec[i], # c.ra, c.dec, tolerance=radius) idx, ds = spherematch2(da.ra[i], da.dec[i], c.ra, c.dec, kdt, tolerance=radius, k=500) # c_flux_total.append(c.flux[idx2].sum()) # n_in_aper.append(c.flux[idx2].size) # c_idx.append(idx2.tolist()) c_flux_total.append(c.flux[idx].sum()) n_in_aper.append(ds.size) c_idx.append(idx.tolist()) print("\nhistogram of source counts in r_ext aperture") for i in [(i,n_in_aper.count(i)) for i in set(n_in_aper)]: print i # create new version of catalog file with xsc-associated entries replaced c_idx = np.array(flatten(c_idx)) print("\nremoving {}, adding {}".format(c_idx.size, phot.size)) replaced[cat] = {'old':c_idx.size, 'new':phot.size} replaced[cat]['hist'] = [(i,n_in_aper.count(i)) for i in set(n_in_aper)] c = np.delete(c, c_idx) newrows = np.rec.array([(-i, da.ra[i], da.dec[i], phot.flux[i], phot.unc[i], 1) for i in \ range(phot.size)], dtype=c.dtype) newcat = np.hstack((c, newrows)) # write new version of catalog to disk fmt = ['%i']+['%0.8f']*2+['%.4e']*2+['%i'] outpath = cat.replace('catalog.txt', 'catalog_xsc_cor.txt') np.savetxt(outpath, newcat, fmt = fmt, header = ' '.join(names)) print('\ncreated file: {}'.format(outpath)) # make plot of total old vs. new flux plt.scatter(c_flux_total, phot.flux) ylim = plt.gca().get_ylim() plt.xlim(*ylim) max_y = ylim[1] plt.plot(ylim, ylim, 'r-') plt.xlabel('old flux [mJy]') plt.ylabel('new flux [mJy]') name = ' '.join(cat.split('/')[-1].split('_')[:2]) plt.title(name) outpath = cat.replace('combined_hdr_catalog.txt','xsc_new_vs_old_phot.png') plt.savefig(outpath, dpi=200) plt.close() print('\ncreated file: {}'.format(outpath)) outfile = 'xsc_replaced.json' json.dump(replaced, open(outfile,'w')) print("\ncreated file: {}".format(outfile)) print("\nremoved / added") for k,v in replaced.iteritems(): print k.split('/')[-1], v['old'], v['new'] m = np.mean([i['old']/float(i['new']) for i in replaced.values()]) print("average ratio: {}".format(m)) print("\nK mag and r_ext of sources with NaN photometry:") for i in find_files(bcdphot_out_path, "*xsc_nan_phot.csv"): reg = i.split('/')[-1] rec = np.recfromcsv(i) bad_id = rec.designation.tolist() bad_k = rec.k_m_k20fe.tolist() bad_r_ext = rec.r_ext.tolist() print reg print ("\tid\t\t\tKmag\tr_ext") if type(bad_id) is list: seq = sorted(zip(bad_id, bad_k, bad_r_ext), key=lambda x: x[0]) for j,k,l in seq: print("\t{}\t{}\t{}".format(j,k,l)) else: print("\t{}\t{}\t{}".format(bad_id, bad_k, bad_r_ext))
train, u.dot(scipy.linalg.diagsvd(s, u.shape[0], vT.shape[1]).dot(vT))) # See the loss in performance as we perform low-rank approximations for k in xrange(1, 101): low_s = [s[i] for i in xrange(k) ] + (min(u.shape[0], vT.shape[1]) - k) * [0] reconstruct = u.dot( scipy.linalg.diagsvd(low_s, u.shape[0], vT.shape[1]).dot(vT)) err = np.linalg.norm(train - reconstruct, 'fro') print 'Exact SVD with low-rank approximation {}'.format(k) svdX.append(k) svdY.append(err) orthoX.append(k) orthoY.append(check_orthogonality(u)) plt.plot(svdX, svdY, label="SVD", color='black', linewidth=2, linestyle='--') print print 'Testing incremental SVD' incr_ortho = [] for num in xrange(100, 1001, 300): print '... with block size of {}'.format(num) X, Y = [], [] incr_orthoY = [] uL, sL, vTL = incremental_SVD(train, range(1, 101), num) for i in xrange(len(uL)): reconstruct = uL[i].dot(sL[i].dot(vTL[i])) err = np.linalg.norm(train - reconstruct, 'fro') X.append(i + 1)
#copy_graph_attrs(G, approxG, ['enter']) generate_pageviews(approxG) prev_rerr = rerr rerr = sum(abs(approxG.node[n]['pageviews'] - G.node[n]['pageviews']) / (G.node[n]['pageviews'] + 1) for n in G.nodes()) / G.number_of_nodes() np.random.shuffle(nodes) print 'Pageviews from "real" edge weights' print '-=-=-=-=-' display_graph(G) print print 'Pageviews from evenly distributed edge weights' print '-=-=-=-=-' display_graph(approxG) plt.plot(np.arange(0, len(rerrs)), rerrs, label='Relative error over time') plt.xlabel('Iteration') plt.ylabel('Average pageview relative error per node') plt.legend() plt.savefig('error_over_time.pdf') plt.show(block=True) plt.plot(np.arange(0, len(werrs)), werrs, label='Weight error over time') plt.xlabel('Iteration') plt.ylabel('Average weight error per edge') plt.legend() plt.savefig('weight_over_time.pdf') plt.show(block=True) fig, ax = plt.subplots(1) ppl.bar(ax, *orig_weight_data, alpha=0.5, color='black', label='Weight error before')
svdY = [] orthoX = [] orthoY = [] u, s, vT = scipy.linalg.svd(train) assert np.allclose(train, u.dot(scipy.linalg.diagsvd(s, u.shape[0], vT.shape[1]).dot(vT))) # See the loss in performance as we perform low-rank approximations for k in xrange(1, 101): low_s = [s[i] for i in xrange(k)] + (min(u.shape[0], vT.shape[1]) - k) * [0] reconstruct = u.dot(scipy.linalg.diagsvd(low_s, u.shape[0], vT.shape[1]).dot(vT)) err = np.linalg.norm(train - reconstruct, 'fro') print 'Exact SVD with low-rank approximation {}'.format(k) svdX.append(k) svdY.append(err) orthoX.append(k) orthoY.append(check_orthogonality(u)) plt.plot(svdX, svdY, label="SVD", color='black', linewidth=2, linestyle='--') print print 'Testing incremental SVD' incr_ortho = [] for num in xrange(100, 1001, 300): print '... with block size of {}'.format(num) X, Y = [], [] incr_orthoY = [] uL, sL, vTL = incremental_SVD(train, range(1, 101), num) for i in xrange(len(uL)): reconstruct = uL[i].dot(sL[i].dot(vTL[i])) err = np.linalg.norm(train - reconstruct, 'fro') X.append(i + 1) Y.append(err) incr_orthoY.append(check_orthogonality(uL[i]))
epi.append(z[dz[i]:dz[i+1]]) else : # Otherwise, slice at local minima using smoothed zero-crossings in the derivative z = range(len(C)) z2 = np.diff(np.convolve(C, np.hanning(19), "same")) dz = np.append(np.insert((np.where((z2[:-1] < 0) * (z2[1:] > 0) == True)[0]), 0, 0), len(C)) for i in range(len(dz)-1) : epi.append(range(dz[i], dz[i+1])) epi = np.array(epi) # Plots subplot(211) plt.plot(t, C, linewidth=3) for e in epi : axvline(t[e[0]], color="red", linewidth=2) axvline(t[e[-1]], color="red", linewidth=2) axhline(1.04*np.max(C), xmin=(t[e[0]]-t[0])/(t[-1]-t[0]), xmax=(t[e[-1]]-t[0])/(t[-1]-t[0]), linewidth=3, color="red") title("Observed Cases, $C_t$. Red lines delimit epidemic.") xlim([t[0], t[-1]]) ylim([-5, 1.05*np.max(C)]) xlabel("Time (index)") ylabel("Cases") subplot(212) title("Birth Rates, $B_i$") xlabel("Time (Years)") ylabel("Births") plt.plot(t, B, linewidth=3)
import numpy from prettyplotlib import plt if __name__=='__main__': xs = numpy.arange(-5.0,5.0,0.001) def f(x): return 1.0*numpy.exp(-x**2/2) def fdash(x): return -x*numpy.exp(-x**2/2) tuning = f(xs) info = fdash(xs)**2/f(xs) plt.plot(xs,tuning,label='Tuning function') plt.plot(xs,info,label='Fisher Information') plt.gca().set_xlabel('x') plt.legend() plt.savefig('../figures/figure_3_5.eps')
if times: G = MakeGram(times,K) C = G + alpha**2*numpy.eye(G.shape[0]) eps[i] = K(0) - numpy.dot(K(times),numpy.linalg.solve(C,K(times))) if numpy.random.rand() < la*dt: times.append(0) if len(times) > maxtimes: times = times[1:] return numpy.mean(eps) if __name__=="__main__": k = 2.0 K_rbf = lambda x : numpy.exp(-k*numpy.array(x)**2) plt.plot(GetStochasticEps(( 0.3,0.4,0.01,10000))) plt.show() K_matern = lambda x : (1.0+k*numpy.abs(x))*numpy.exp(-k*numpy.abs(x)) K_ou = lambda x : numpy.exp(-k*numpy.abs(x)) dx = 0.0005 xs = numpy.arange(0.0,6000*dx,dx) rbf0 = K_rbf(xs) rbf1 = IterateOnce(rbf0, K_rbf, dx,alpha=0.1) dist = SquaredDistance(rbf0,rbf1) while dist > 1e-10: rbf0 = rbf1 rbf1 = IterateOnce(rbf0, K_rbf, dx,alpha =0.1) dist = SquaredDistance(rbf0,rbf1) print dist matern0 = K_matern(xs)
## import sys if __name__ == '__main__': # Accumulate the counts fed into standard in (stdin) counts = [] for line in sys.stdin: topic, count = line.split() # Shift the ones up by a tiny amount to allow them to be visible on the graph counts.append(int(count) + 0.05) print 'Total page view counts: {}'.format(len(counts)) # Display the hourly page view distribution on a log-log plot # This matches our friendly and well understood Zipf distribution fig = plt.figure(figsize=(9, 5)) ax = fig.add_subplot(1, 1, 1) plt.plot(xrange(len(counts)), counts, linewidth=3, label='Pageviews per page') # ax.set_xscale('log') ax.set_yscale('log') # plt.title('Log-log plot of hourly Wikipedia page view distribution') plt.xlabel('Rank order') plt.ylabel('Frequency') plt.grid(color='black') plt.legend() # plt.savefig('hourly_wikipedia_zipf.pdf') plt.show(block=True)
svdX = [] svdY = [] u, s, vT = scipy.linalg.svd(train) for k in xrange(1, 100): low_s = [s[i] for i in xrange(k)] # + (min(u.shape[0], vT.shape[1]) - k) * [0] print 'Exact SVD with low-rank approximation {}'.format(k) svdX.append(k) svdY.append(get_error(u, np.diag(low_s), vT, train, test)) plt.plot(svdX, svdY, label="SVD", color='black', linewidth='2', linestyle='--') """ print print 'Testing incremental SVD' for num in xrange(400, 1001, 300): print '... with block size of {}'.format(num) X, Y = [], [] for k in xrange(1, 91, 10): print k u, s, vT = incremental_SVD(train, k, num) X.append(k) Y.append(get_error(u, s, vT, train, test, prod_avg)) plt.plot(X, Y, label='iSVD u={}'.format(num)) ## plt.title('Recommendation system RMSE on {}x{} matrix'.format(*train.shape)) plt.xlabel('Low rank approximation (k)') plt.ylabel('Root Mean Squared Error') #plt.ylim(0, max(svdY)) plt.legend(loc='best') plt.savefig('recommend_rmse_{}x{}.pdf'.format(*train.shape)) plt.show(block=True)
epi.append(z[dz[i]:dz[i + 1]]) else: # Otherwise, slice at local minima using smoothed zero-crossings in the derivative z = range(len(C)) z2 = np.diff(np.convolve(C, np.hanning(19), "same")) dz = np.append( np.insert((np.where((z2[:-1] < 0) * (z2[1:] > 0) == True)[0]), 0, 0), len(C)) for i in range(len(dz) - 1): epi.append(range(dz[i], dz[i + 1])) epi = np.array(epi) # Plots subplot(211) plt.plot(t, C, linewidth=3) for e in epi: axvline(t[e[0]], color="red", linewidth=2) axvline(t[e[-1]], color="red", linewidth=2) axhline(1.04 * np.max(C), xmin=(t[e[0]] - t[0]) / (t[-1] - t[0]), xmax=(t[e[-1]] - t[0]) / (t[-1] - t[0]), linewidth=3, color="red") title("Observed Cases, $C_t$. Red lines delimit epidemic.") xlim([t[0], t[-1]]) ylim([-5, 1.05 * np.max(C)]) xlabel("Time (index)") ylabel("Cases") subplot(212)
plt.xlabel("valor de $tau$") fig.set_size_inches(7,6) fig.tight_layout() fig.savefig("plot1.png") fig, ax = plt.subplots(nrows=1, ncols=1) N = tau_samples.shape[0] expected_texts_per_day = np.zeros(n_datos) for day in range(0, n_datos): ix = day < tau_samples expected_texts_per_day[day] = (lambda_1_samples[ix].sum() + lambda_2_samples[~ix].sum()) / N anhos = ["2005","2006","2007","2008","2009","2010","2011","2012"] plt.plot(range(n_datos), expected_texts_per_day, lw=4, color="#E24A33", label="expected number of text-messages received") plt.xlim(0, n_datos) plt.xticks(np.arange(n_datos) + 0.4, anhos) plt.xlabel(u'Años') plt.ylabel(u'Número esperado de delitos') plt.title(u'''Cambio en el número esperado de delitos por año''') plt.ylim(0, 300000) plt.bar(np.arange(len(datos)), datos, color="#348ABD", alpha=0.65) #plt.legend(loc="upper left") fig.savefig("plot2.png")
fig.set_size_inches(7, 6) fig.tight_layout() fig.savefig("plot1.png") fig, ax = plt.subplots(nrows=1, ncols=1) N = tau_samples.shape[0] expected_texts_per_day = np.zeros(n_datos) for day in range(0, n_datos): ix = day < tau_samples expected_texts_per_day[day] = (lambda_1_samples[ix].sum() + lambda_2_samples[~ix].sum()) / N anhos = ["2005", "2006", "2007", "2008", "2009", "2010", "2011", "2012"] plt.plot(range(n_datos), expected_texts_per_day, lw=4, color="#E24A33", label="expected number of text-messages received") plt.xlim(0, n_datos) plt.xticks(np.arange(n_datos) + 0.4, anhos) plt.xlabel(u'Años') plt.ylabel(u'Número esperado de delitos') plt.title(u'''Cambio en el número esperado de delitos por año''') plt.ylim(0, 300000) plt.bar(np.arange(len(datos)), datos, color="#348ABD", alpha=0.65) #plt.legend(loc="upper left") fig.savefig("plot2.png")
svdY = [] u, s, vT = scipy.linalg.svd(train) for k in xrange(1, 100): low_s = [s[i] for i in xrange(k)] # + (min(u.shape[0], vT.shape[1]) - k) * [0] print 'Exact SVD with low-rank approximation {}'.format(k) svdX.append(k) svdY.append(get_error(u, np.diag(low_s), vT, train, test)) plt.plot(svdX, svdY, label="SVD", color='black', linewidth='2', linestyle='--') """ print print 'Testing incremental SVD' for num in xrange(400, 1001, 300): print '... with block size of {}'.format(num) X, Y = [], [] for k in xrange(1, 91, 10): print k u, s, vT = incremental_SVD(train, k, num) X.append(k) Y.append(get_error(u, s, vT, train, test, prod_avg)) plt.plot(X, Y, label='iSVD u={}'.format(num)) ## plt.title( 'Recommendation system RMSE on {}x{} matrix'.format(*train.shape)) plt.xlabel('Low rank approximation (k)') plt.ylabel('Root Mean Squared Error') #plt.ylim(0, max(svdY)) plt.legend(loc='best') plt.savefig('recommend_rmse_{}x{}.pdf'.format(*train.shape)) plt.show(block=True)
def run_xsc_phot(bcdphot_out_path, mosaic_path): replaced = {} for cat in find_files(bcdphot_out_path, "*_combined_hdr_catalog.txt"): print("\n======================================================") print("\nadjusting photometry in: {}".format(cat.split('/')[-1])) print("------------------------------------------------------") outpath = cat.replace('combined_hdr_catalog.txt', '2mass_xsc.tbl') # retrieve 2mass data if file doesn't already exist (from previous run) if not os.path.isfile(outpath): # get url and retrieve data url = query_2mass_xsc_polygon(*get_region_corners(cat)) print("\ndownloading 2MASS photometry from: {}".format(url)) text = urllib2.urlopen(url).read() # write to disk with open(outpath, 'w') as f: f.write(text) print("\ncreated file: {}".format(outpath)) # read back in as recarray print("\nreading: {}".format(outpath)) names = open(outpath).read().split('\n')[76].split('|')[1:-1] da = np.recfromtxt(outpath, skip_header=80, names=names) # write input file for xsc_phot.pro infile_outpath = '/'.join(cat.split('/')[:-1]) + '/xsc.txt' with open(infile_outpath, 'w') as w: for i in range(da.shape[0]): w.write("{} {} {} {}\n".format(da.designation[i], da.ra[i], da.dec[i], da.r_ext[i])) print( "\ncreated input file for xsc_phot.pro: {}".format(infile_outpath)) # locate the FITS mosaic file for xsc_phot.pro to do photometry on reg, ch = cat.split('/')[-1].split('_')[:2] mosaicfile = filter(lambda x: 'dirbe{}/ch{}/long/full/Combine'\ .format(reg,ch) in x, find_files(mosaic_path, '*mosaic.fits'))[0] print("\nfound mosaic file: {}".format(mosaicfile)) # spawn IDL subprocess running xsc_phot.pro and catch stdout in file outpath = infile_outpath.replace('xsc.txt', 'xsc_phot_out.txt') if not os.path.isfile(outpath): outfile = open(outpath, 'w') print("\nspawning xsc_phot.pro IDL subprocess") cmd = "xsc_phot,'" + mosaicfile + "','" + infile_outpath + "','long'" rc = subprocess.call( ['/usr/local/itt/idl71/bin/idl', '-quiet', '-e', cmd], stderr=subprocess.PIPE, stdout=outfile) outfile.close() # read in output to recarray print("\nreading: {}".format(outpath)) phot = np.recfromtxt(outpath, names=['id', 'flux', 'unc', 'sky', 'skyunc']) # make sure rows are aligned assert (da.designation == phot.id).all() # ignore xsc sources we got a NaN or negative flux for bad = np.isnan(phot.flux) | (phot.flux < 0) print("\naper.pro returned NaN or negative flux for {} sources".format( bad.sum())) if bad.sum() > 0: for i in phot[bad].id: print(i) outpath = cat.replace('combined_hdr_catalog.txt', 'xsc_nan_phot.csv') with open(outpath, 'w') as f: w = csv.writer(f) w.writerow(da.dtype.names) w.writerows(da[bad].tolist()) print('\ncreated file: {}'.format(outpath)) phot = phot[~bad] da = da[~bad] # read in pipeline catalog print("\nreading: {}".format(cat)) names = open(cat).readline().split()[1:] c = np.recfromtxt(cat, names=names) # loop through xsc sources and find matches in pipeline catalog print( "\nfinding records associated with XSC sources in pipeline catalog" ) c_flux_total = [] n_in_aper = [] c_idx = [] coords = radec_to_coords(c.ra, c.dec) kdt = KDT(coords) for i in range(phot.size): radius = da.r_ext[i] / 3600. # idx1, idx2, ds = spherematch(da.ra[i], da.dec[i], # c.ra, c.dec, tolerance=radius) idx, ds = spherematch2(da.ra[i], da.dec[i], c.ra, c.dec, kdt, tolerance=radius, k=500) # c_flux_total.append(c.flux[idx2].sum()) # n_in_aper.append(c.flux[idx2].size) # c_idx.append(idx2.tolist()) c_flux_total.append(c.flux[idx].sum()) n_in_aper.append(ds.size) c_idx.append(idx.tolist()) print("\nhistogram of source counts in r_ext aperture") for i in [(i, n_in_aper.count(i)) for i in set(n_in_aper)]: print i # create new version of catalog file with xsc-associated entries replaced c_idx = np.array(flatten(c_idx)) print("\nremoving {}, adding {}".format(c_idx.size, phot.size)) replaced[cat] = {'old': c_idx.size, 'new': phot.size} replaced[cat]['hist'] = [(i, n_in_aper.count(i)) for i in set(n_in_aper)] c = np.delete(c, c_idx) newrows = np.rec.array([(-i, da.ra[i], da.dec[i], phot.flux[i], phot.unc[i], 1) for i in \ range(phot.size)], dtype=c.dtype) newcat = np.hstack((c, newrows)) # write new version of catalog to disk fmt = ['%i'] + ['%0.8f'] * 2 + ['%.4e'] * 2 + ['%i'] outpath = cat.replace('catalog.txt', 'catalog_xsc_cor.txt') np.savetxt(outpath, newcat, fmt=fmt, header=' '.join(names)) print('\ncreated file: {}'.format(outpath)) # make plot of total old vs. new flux plt.scatter(c_flux_total, phot.flux) ylim = plt.gca().get_ylim() plt.xlim(*ylim) max_y = ylim[1] plt.plot(ylim, ylim, 'r-') plt.xlabel('old flux [mJy]') plt.ylabel('new flux [mJy]') name = ' '.join(cat.split('/')[-1].split('_')[:2]) plt.title(name) outpath = cat.replace('combined_hdr_catalog.txt', 'xsc_new_vs_old_phot.png') plt.savefig(outpath, dpi=200) plt.close() print('\ncreated file: {}'.format(outpath)) outfile = 'xsc_replaced.json' json.dump(replaced, open(outfile, 'w')) print("\ncreated file: {}".format(outfile)) print("\nremoved / added") for k, v in replaced.iteritems(): print k.split('/')[-1], v['old'], v['new'] m = np.mean([i['old'] / float(i['new']) for i in replaced.values()]) print("average ratio: {}".format(m)) print("\nK mag and r_ext of sources with NaN photometry:") for i in find_files(bcdphot_out_path, "*xsc_nan_phot.csv"): reg = i.split('/')[-1] rec = np.recfromcsv(i) bad_id = rec.designation.tolist() bad_k = rec.k_m_k20fe.tolist() bad_r_ext = rec.r_ext.tolist() print reg print("\tid\t\t\tKmag\tr_ext") if type(bad_id) is list: seq = sorted(zip(bad_id, bad_k, bad_r_ext), key=lambda x: x[0]) for j, k, l in seq: print("\t{}\t{}\t{}".format(j, k, l)) else: print("\t{}\t{}\t{}".format(bad_id, bad_k, bad_r_ext))