def _normalize(spectra): stars = spectra.index wavelengths = spectra.flux.columns.values.copy() flux = spectra.flux.values.copy() error = spectra.error.reindex(columns=wavelengths).values.copy() #TODO: Should negative fluxes be zero'd too? bad_flux = sp.isnan(flux) | sp.isinf(flux) bad_error = sp.isnan(error) | sp.isinf(error) | (error < 0) bad = bad_flux | bad_error flux[bad] = 1 error[bad] = ERROR_LIM #TODO: Where does pixlist come from? pixlist = sp.loadtxt('pixlist.txt', dtype=int) var = sp.full_like(error, ERROR_LIM**2) var[:, pixlist] = 0 inv_var = 1 / (var**2 + error**2) norm_flux = sp.full_like(flux, 1) norm_error = sp.full_like(error, ERROR_LIM) for star in range(len(stars)): for _, (left, right) in CHIPS.items(): mask = (left < wavelengths) & (wavelengths < right) #TODO: Why are we using Chebyshev polynomials rather than smoothing splines? #TODO: Why are we using three polynomials rather than one? Are spectra discontinuous between chips? #TODO: Is the denominator being zero/negative ever an issue? fit = Chebyshev.fit(x=wavelengths[mask], y=flux[star][mask], w=inv_var[star][mask], deg=2) norm_flux[star][mask] = flux[star][mask] / fit(wavelengths[mask]) norm_error[star][mask] = error[star][mask] / fit(wavelengths[mask]) #TODO: Why is the unreliability threshold different from the limit value? unreliable = (norm_error > .3) norm_flux[unreliable] = 1 norm_error[unreliable] = ERROR_LIM # In the original, the masking is done in the parallax fitting code. # Gonna do it earlier here to save a bit of memory. mask = sp.any( sp.vstack([(l < wavelengths) & (wavelengths < u) for l, u in CHIPS.values()]), 0) norm_flux = pd.DataFrame(norm_flux[:, mask], stars, wavelengths[mask]) norm_error = pd.DataFrame(norm_error[:, mask], stars, wavelengths[mask]) return pd.concat({'flux': norm_flux, 'error': norm_error}, 1)
def incidence_matrices(relation): results = {} with tqdm(total=len(relation)) as pbar: for i, (subreddit, group) in enumerate(relation.groupby('subreddit')): links = sp.array(sorted(sp.concatenate(group.link_ids.tolist()))) authors = sp.array(sorted(group.author)) rs, cs = [], [] for _, row in group.iterrows(): r = sp.searchsorted(authors, row.author) c = sp.searchsorted(links, row.link_ids) rs.append(sp.full_like(c, r)) cs.append(c) rs, cs = sp.concatenate(rs), sp.concatenate(cs) vals = sp.ones_like(rs) incidence = sp.sparse.csr_matrix((vals, (rs, cs)), (len(authors), len(links))) results[subreddit] = { 'incidence': incidence, 'authors': authors, 'links': links } pbar.update(len(group)) return results
def evaluate(category, number, model): """Uses a NN to generate a perfect image from a previously-unseen source image""" loader = torch.utils.data.DataLoader(Dataset(category, numbers=[number]), batch_size=64, shuffle=False, num_workers=2) bs = [] rows, cols = [], [] for batch in tqdm(loader, desc='eval'): rows.extend(batch['row'].detach().numpy()) cols.extend(batch['col'].detach().numpy()) bs.extend(BUMP_SCALE * model(batch['region']).detach().numpy()) bs = sp.array(bs) rows, cols = sp.array(rows), sp.array(cols) im = tools.image(category, number) bumps = sp.full_like(im, sp.nan) bumps[rows, cols] = bs return im, bumps
def bottom_boundary_coords(x): bt = sc.full_like(x[0], x[1][-1]) return sc.vstack((x[0], bt)).transpose()
def top_boundary_coords(x): ut = sc.full_like(x[0], x[1][0]) return sc.vstack((x[0], ut)).transpose()
def right_boundary_coords(x): rx = sc.full_like(x[1], x[0][-1]) return sc.vstack((rx, x[1])).transpose()
def left_boundary_coords(x): lx = sc.full_like(x[1], x[0][0]) return sc.vstack((lx, x[1])).transpose()