def knn_test(Xtrain, Ytrain, Xtest, pp, opts): for metric in opts.metric: tic = time() D = pairwise_dists(Xtest, Xtrain, metric, num_procs=opts.parallel) d_time = time() - tic for k, w in cart_product(opts.k, opts.weights): yield _test_knn(D, Ytrain, k, w, metric, d_time, pp, opts)
def hyper_parameter_grid_search( data: dataUtils.Data, split_index: int, hyper_parameter_space: dict, show_status: bool = False, ): """Perform hyperparamter grid search optimization :param data: Data object :param split_index: split the data into training and test set :param hyper_parameter_space: a dictionary containing the limitis of the hyper parameter search of the form {"hyperparamter": min, max, no. steps} :param show_status: determine whether you want the result of each run printed to the console or not. :return: The set of optimal hyperparameters """ print("Run Grid Search") data.split_data_by_index(split_index) search_points = dict() for param in hyper_parameter_space.keys(): p_start = hyper_parameter_space[param][0] p_stop = hyper_parameter_space[param][1] p_num = hyper_parameter_space[param][2] search_points[param] = np.linspace(start=p_start, stop=p_stop, num=p_num) if param in ["internal_nodes", "seed"]: search_points[param] = list(map(lambda x: int(x), search_points[param])) parameter_combinations = cart_product( *[search_points[param] for param in sorted(search_points.keys())] ) min_error: float = float("inf") optimal_hyper_parameter: dict = dict() run_id: int = 0 for combination in parameter_combinations: run_id += 1 hyper_parameter_sample = dict(zip(sorted(search_points.keys()), combination)) test_error = __evaluate_esn_model(data, hyper_parameter_sample) if np.abs(test_error) < min_error: min_error = np.abs(test_error) optimal_hyper_parameter = hyper_parameter_sample if show_status: status = " %.d " % run_id for value in combination: status += " %.2f" % value print( status + " Error: %.4f Global Optimum: %.4f" % (np.abs(test_error), min_error) ) print("Grid Search Completed. Min Error %f" % min_error) pprint(optimal_hyper_parameter) return min_error, optimal_hyper_parameter
def generate_boxes(shape, kernel): if len(shape) != len(kernel): raise AssertionError( "Incompatible shape ({}) and kernel ({})".format( shape, kernel)) ticks = [[(x, x + step) for x in range(0, axis, step)] for axis, step in zip(shape, kernel)] return list(cart_product(*ticks))
def test_series_group_min_max(self): for op, level, skipna in cart_product(self.AGG_FUNCTIONS, range(2), [False, True]): grouped = self.series.groupby(level=level) aggf = lambda x: getattr(x, op)(skipna=skipna) # skipna=True leftside = grouped.agg(aggf) rightside = getattr(self.series, op)(level=level, skipna=skipna) assert_series_equal(leftside, rightside)
def factor_at_nonzero(P): n = len(P) Z = point_factors(P, 0) for i in range(2, n // 2 + 1): M = point_factors(P, i) Z = [flatten(z) for z in cart_product(Z, M)] for u in Z: Q = Polynomial(u) S = P / Q if len(S) == i and S.coef[-1] == 0: return Q return P
def test_frame_group_ops(self): self.frame.ix[1, [1, 2]] = np.nan self.frame.ix[7, [0, 1]] = np.nan for op, level, axis, skipna in cart_product(self.AGG_FUNCTIONS, range(2), range(2), [False, True]): if axis == 0: frame = self.frame else: frame = self.frame.T grouped = frame.groupby(level=level, axis=axis) aggf = lambda x: getattr(x, op)(skipna=skipna, axis=axis) leftside = grouped.agg(aggf) rightside = getattr(frame, op)(level=level, axis=axis, skipna=skipna) assert_frame_equal(leftside, rightside)
def removePatternNoise(self, adinputs=None, **params): """ This attempts to remove the pattern noise in NIRI/GNIRS data. In each quadrant, boxes of a specified size are extracted and, for each pixel location in the box, the median across all the boxes is determined. The resultant median is then tiled to the size of the quadrant and subtracted. Optionally, the median of each box can be subtracted before performing the operation. Based on Andy Stephens's "cleanir" Parameters ---------- suffix: str suffix to be added to output files force: bool perform operation even if standard deviation in quadrant increases? hsigma/lsigma: float sigma-clipping limits pattern_x_size: int size of pattern "box" in x direction pattern_y_size: int size of pattern "box" in y direction subtract_background: bool remove median of each "box" before calculating pattern noise? """ log = self.log log.debug(gt.log_message("primitive", self.myself(), "starting")) timestamp_key = self.timestamp_keys[self.myself()] hsigma, lsigma = params["hsigma"], params["lsigma"] pxsize, pysize = params["pattern_x_size"], params["pattern_y_size"] bgsub = params["subtract_background"] force = params["force"] stack_function = NDStacker(combine='median', reject='sigclip', hsigma=hsigma, lsigma=lsigma) sigclip = partial(sigma_clip, sigma_lower=lsigma, sigma_upper=hsigma) zeros = None # will remain unchanged if not subtract_background for ad in adinputs: if ad.phu.get(timestamp_key): log.warning("No changes will be made to {}, since it has " "already been processed by removePatternNoise". format(ad.filename)) continue for ext in ad: qysize, qxsize = [size // 2 for size in ext.data.shape] yticks = [(y, y + pysize) for y in range(0, qysize, pysize)] xticks = [(x, x + pxsize) for x in range(0, qxsize, pxsize)] for ystart in (0, qysize): for xstart in (0, qxsize): quad = ext.nddata[ystart:ystart + qysize, xstart:xstart + qxsize] sigma_in = sigclip(np.ma.masked_array(quad.data, quad.mask)).std() # print sigma_in blocks = [quad[tuple(slice(start, end) for (start, end) in coords)] for coords in cart_product(yticks, xticks)] if bgsub: # If all pixels are masked in a box, we'll get no # result from the mean. Suppress warning. with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UserWarning) zeros = np.nan_to_num([-np.ma.masked_array(block.data, block.mask).mean() for block in blocks]) out = stack_function(blocks, zero=zeros).data out_quad = (quad.data + np.mean(out) - np.tile(out, (len(yticks), len(xticks)))) sigma_out = sigclip(np.ma.masked_array(out_quad, quad.mask)).std() if sigma_out > sigma_in: qstr = (f"{ad.filename} extension {ext.id} " f"quadrant ({xstart},{ystart})") if force: log.stdinfo("Forcing cleaning on " + qstr) else: log.stdinfo("No improvement for "+qstr) continue ext.data[ystart:ystart + qysize, xstart:xstart + qxsize] = out_quad # Timestamp and update filename gt.mark_history(ad, primname=self.myself(), keyword=timestamp_key) ad.update_filename(suffix=params["suffix"], strip=True) return adinputs
def init_default_extensions(self, num_ext=12, binning=1, overscan=True, read_speed="slow", gain_setting="low"): if num_ext != 12: raise NotImplementedError("Only tested for full array ROI") if binning not in (1, 2, 4): raise ValueError("Binning must be 1, 2, or 4") del self[:] shape = ((4224 if self.phu['DETID'].startswith('BI') else 4608) // binning, 512 // binning + (BIAS_WIDTH if overscan else 0)) # If the overscan is present, assume it's raw data dtype = np.uint16 if overscan else np.float32 pixel_scale = lookup.gmosPixelScales[self.instrument(), self.phu['DETTYPE']] * binning north = self.instrument() == 'GMOS-N' crpix1 = 3132.69 if north else 3133.5 crpix1 = (crpix1 - 0.5) / binning + 0.5 crpix2_list = CRPIX2N if north else CRPIX2S chip_gap = 67. if north else 61. extra_keywords = {'CRVAL1': self.phu['RA'], 'CRVAL2': self.phu['DEC'], 'CTYPE1': 'RA---TAN', 'CTYPE2': 'DEC--TAN', 'CCDSUM': '{} {}'.format(binning, binning)} self.phu['NAMPS'] = num_ext for i in range(num_ext): ccd = i // 4 crpix2 = (crpix2_list[ccd] - 0.5) / binning + 0.5 detx1 = i * 512 detx2 = detx1 + 512 detsec = '[{}:{},1:4224]'.format(detx1 + 1, detx2) datx1 = BIAS_WIDTH if (overscan and i % 2 == 1) else 0 datx2 = datx1 + 512 // binning datasec = '[{}:{},1:{}]'.format(datx1 + 1, datx2, shape[0]) if overscan: biasx1 = BIAS_WIDTH - datx1 biassec = '[{}:{},1:{}]'.format(biasx1 + 1, biasx1 + BIAS_WIDTH, shape[0]) extra_keywords[self._keyword_for('overscan_section')] = biassec arrx1 = detx1 % 2048 arrx2 = arrx1 + 512 arraysec = '[{}:{},1:{}]'.format(arrx1 + 1, arrx2, shape[0]) extra_keywords.update({self._keyword_for('detector_section'): detsec, self._keyword_for('data_section'): datasec, self._keyword_for('array_section'): arraysec, 'CRPIX1': crpix1 + datx1, 'CRPIX2': crpix2}) crpix1 -= (datx2 - datx1) # This isn't entirely right but it'll do if i % 4 == 3: crpix1 -= chip_gap / binning self.add_extension(shape=shape, pixel_scale=pixel_scale, dtype=dtype, extra_keywords=extra_keywords) # GAIN and READNOISE # not the correct values, but makes the descriptors work self.phu['AMPINTEG'] = 10000 if read_speed == "slow" else 1000 self.hdr['GAIN'] = 1 if gain_setting == "low" else 5 ccdnames = self.phu['DETID'].split(",") if len(ccdnames) > 1: if ccdnames[0].startswith("e2v"): ccdnames[1] = "e2v " + ccdnames[1] ccdnames[2] = "e2v " + ccdnames[2] else: ccdnames = ["EEV"+x for x in self.phu['DETID'].split("EEV")[1:]] if num_ext == 12: amps = ("1", "2", "3", "4") else: amps = ("left", "right") for i, (ccd, amp) in enumerate(cart_product(ccdnames, amps)): self[i].hdr['AMPNAME'] = f"{ccd}, {amp}"
def removePatternNoise(self, adinputs=None, **params): """ This attempts to remove the pattern noise in NIRI/GNIRS data. In each quadrant, boxes of a specified size are extracted and, for each pixel location in the box, the median across all the boxes is determined. The resultant median is then tiled to the size of the quadrant and subtracted. Optionally, the median of each box can be subtracted before performing the operation. Based on Andy Stephens's "cleanir" Parameters ---------- suffix: str suffix to be added to output files force: bool perform operation even if standard deviation in quadrant increases? hsigma/lsigma: float sigma-clipping limits pattern_x_size: int size of pattern "box" in x direction pattern_y_size: int size of pattern "box" in y direction subtract_background: bool remove median of each "box" before calculating pattern noise? """ log = self.log log.debug(gt.log_message("primitive", self.myself(), "starting")) timestamp_key = self.timestamp_keys[self.myself()] hsigma, lsigma = params["hsigma"], params["lsigma"] pxsize, pysize = params["pattern_x_size"], params["pattern_y_size"] bgsub = params["subtract_background"] force = params["force"] stack_function = NDStacker(combine='median', reject='sigclip', hsigma=hsigma, lsigma=lsigma) sigclip = partial(sigma_clip, sigma_lower=lsigma, sigma_upper=hsigma) zeros = None # will remain unchanged if not subtract_background for ad in adinputs: if ad.phu.get(timestamp_key): log.warning("No changes will be made to {}, since it has " "already been processed by removePatternNoise". format(ad.filename)) continue for ext in ad: qysize, qxsize = [size // 2 for size in ext.data.shape] yticks = [(y, y + pysize) for y in range(0, qysize, pysize)] xticks = [(x, x + pxsize) for x in range(0, qxsize, pxsize)] for ystart in (0, qysize): for xstart in (0, qxsize): quad = ext.nddata[ystart:ystart + qysize, xstart:xstart + qxsize] sigma_in = sigclip(np.ma.masked_array(quad.data, quad.mask)).std() # print sigma_in blocks = [quad[tuple(slice(start, end) for (start, end) in coords)] for coords in cart_product(yticks, xticks)] if bgsub: # If all pixels are masked in a box, we'll get no # result from the mean. Suppress warning. with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UserWarning) zeros = np.nan_to_num([-np.ma.masked_array(block.data, block.mask).mean() for block in blocks]) out = stack_function(blocks, zero=zeros).data out_quad = (quad.data + np.mean(out) - np.tile(out, (len(yticks), len(xticks)))) sigma_out = sigclip(np.ma.masked_array(out_quad, quad.mask)).std() if sigma_out > sigma_in: qstr = "{}:{} quadrant ({},{})".format(ad.filename, ext.hdr['EXTVER'], xstart, ystart) if force: log.stdinfo("Forcing cleaning on " + qstr) else: log.stdinfo("No improvement for "+qstr) continue ext.data[ystart:ystart + qysize, xstart:xstart + qxsize] = out_quad # Timestamp and update filename gt.mark_history(ad, primname=self.myself(), keyword=timestamp_key) ad.update_filename(suffix=params["suffix"], strip=True) return adinputs