Пример #1
0
def knn_test(Xtrain, Ytrain, Xtest, pp, opts):
    for metric in opts.metric:
        tic = time()
        D = pairwise_dists(Xtest, Xtrain, metric, num_procs=opts.parallel)
        d_time = time() - tic
        for k, w in cart_product(opts.k, opts.weights):
            yield _test_knn(D, Ytrain, k, w, metric, d_time, pp, opts)
def hyper_parameter_grid_search(
    data: dataUtils.Data,
    split_index: int,
    hyper_parameter_space: dict,
    show_status: bool = False,
):
    """Perform hyperparamter grid search optimization
    :param data: Data object
    :param split_index: split the data into training and test set
    :param hyper_parameter_space: a dictionary containing the limitis of the hyper parameter search
            of the form {"hyperparamter": min, max, no. steps}
    :param show_status: determine whether you want the result of each run printed to the console or not.
    :return: The set of optimal hyperparameters
    """

    print("Run Grid Search")
    data.split_data_by_index(split_index)

    search_points = dict()
    for param in hyper_parameter_space.keys():
        p_start = hyper_parameter_space[param][0]
        p_stop = hyper_parameter_space[param][1]
        p_num = hyper_parameter_space[param][2]

        search_points[param] = np.linspace(start=p_start, stop=p_stop, num=p_num)
        if param in ["internal_nodes", "seed"]:
            search_points[param] = list(map(lambda x: int(x), search_points[param]))

    parameter_combinations = cart_product(
        *[search_points[param] for param in sorted(search_points.keys())]
    )

    min_error: float = float("inf")
    optimal_hyper_parameter: dict = dict()
    run_id: int = 0

    for combination in parameter_combinations:
        run_id += 1

        hyper_parameter_sample = dict(zip(sorted(search_points.keys()), combination))
        test_error = __evaluate_esn_model(data, hyper_parameter_sample)

        if np.abs(test_error) < min_error:
            min_error = np.abs(test_error)
            optimal_hyper_parameter = hyper_parameter_sample

        if show_status:
            status = " %.d " % run_id
            for value in combination:
                status += " %.2f" % value

            print(
                status
                + "  Error: %.4f Global Optimum: %.4f" % (np.abs(test_error), min_error)
            )

    print("Grid Search Completed. Min Error %f" % min_error)
    pprint(optimal_hyper_parameter)

    return min_error, optimal_hyper_parameter
Пример #3
0
 def generate_boxes(shape, kernel):
     if len(shape) != len(kernel):
         raise AssertionError(
             "Incompatible shape ({}) and kernel ({})".format(
                 shape, kernel))
     ticks = [[(x, x + step) for x in range(0, axis, step)]
              for axis, step in zip(shape, kernel)]
     return list(cart_product(*ticks))
Пример #4
0
 def test_series_group_min_max(self):
     for op, level, skipna in cart_product(self.AGG_FUNCTIONS,
                                           range(2),
                                           [False, True]):
         grouped = self.series.groupby(level=level)
         aggf = lambda x: getattr(x, op)(skipna=skipna)
         # skipna=True
         leftside = grouped.agg(aggf)
         rightside = getattr(self.series, op)(level=level, skipna=skipna)
         assert_series_equal(leftside, rightside)
def factor_at_nonzero(P):
    n = len(P)
    Z = point_factors(P, 0)
    for i in range(2, n // 2 + 1):
        M = point_factors(P, i)
        Z = [flatten(z) for z in cart_product(Z, M)]
        for u in Z:
            Q = Polynomial(u)
            S = P / Q
            if len(S) == i and S.coef[-1] == 0:
                return Q
    return P
Пример #6
0
    def test_frame_group_ops(self):
        self.frame.ix[1, [1, 2]] = np.nan
        self.frame.ix[7, [0, 1]] = np.nan

        for op, level, axis, skipna in cart_product(self.AGG_FUNCTIONS,
                                                    range(2), range(2),
                                                    [False, True]):
            if axis == 0:
                frame = self.frame
            else:
                frame = self.frame.T

            grouped = frame.groupby(level=level, axis=axis)

            aggf = lambda x: getattr(x, op)(skipna=skipna, axis=axis)
            leftside = grouped.agg(aggf)
            rightside = getattr(frame, op)(level=level, axis=axis,
                                           skipna=skipna)
            assert_frame_equal(leftside, rightside)
Пример #7
0
    def removePatternNoise(self, adinputs=None, **params):
        """
        This attempts to remove the pattern noise in NIRI/GNIRS data. In each
        quadrant, boxes of a specified size are extracted and, for each pixel
        location in the box, the median across all the boxes is determined.
        The resultant median is then tiled to the size of the quadrant and
        subtracted. Optionally, the median of each box can be subtracted
        before performing the operation.

        Based on Andy Stephens's "cleanir"

        Parameters
        ----------
        suffix: str
            suffix to be added to output files
        force: bool
            perform operation even if standard deviation in quadrant increases?
        hsigma/lsigma: float
            sigma-clipping limits
        pattern_x_size: int
            size of pattern "box" in x direction
        pattern_y_size: int
            size of pattern "box" in y direction
        subtract_background: bool
            remove median of each "box" before calculating pattern noise?
        """
        log = self.log
        log.debug(gt.log_message("primitive", self.myself(), "starting"))
        timestamp_key = self.timestamp_keys[self.myself()]

        hsigma, lsigma = params["hsigma"], params["lsigma"]
        pxsize, pysize = params["pattern_x_size"], params["pattern_y_size"]
        bgsub = params["subtract_background"]
        force = params["force"]
        stack_function = NDStacker(combine='median', reject='sigclip',
                                   hsigma=hsigma, lsigma=lsigma)
        sigclip = partial(sigma_clip, sigma_lower=lsigma, sigma_upper=hsigma)
        zeros = None  # will remain unchanged if not subtract_background

        for ad in adinputs:
            if ad.phu.get(timestamp_key):
                log.warning("No changes will be made to {}, since it has "
                            "already been processed by removePatternNoise".
                            format(ad.filename))
                continue

            for ext in ad:
                qysize, qxsize = [size // 2 for size in ext.data.shape]
                yticks = [(y, y + pysize) for y in range(0, qysize, pysize)]
                xticks = [(x, x + pxsize) for x in range(0, qxsize, pxsize)]
                for ystart in (0, qysize):
                    for xstart in (0, qxsize):
                        quad = ext.nddata[ystart:ystart + qysize, xstart:xstart + qxsize]
                        sigma_in = sigclip(np.ma.masked_array(quad.data, quad.mask)).std()
                        # print sigma_in
                        blocks = [quad[tuple(slice(start, end)
                                             for (start, end) in coords)]
                                  for coords in cart_product(yticks, xticks)]
                        if bgsub:
                            # If all pixels are masked in a box, we'll get no
                            # result from the mean. Suppress warning.
                            with warnings.catch_warnings():
                                warnings.simplefilter("ignore", category=UserWarning)
                                zeros = np.nan_to_num([-np.ma.masked_array(block.data, block.mask).mean()
                                                       for block in blocks])
                        out = stack_function(blocks, zero=zeros).data
                        out_quad = (quad.data + np.mean(out) -
                                    np.tile(out, (len(yticks), len(xticks))))
                        sigma_out = sigclip(np.ma.masked_array(out_quad, quad.mask)).std()
                        if sigma_out > sigma_in:
                            qstr = (f"{ad.filename} extension {ext.id} "
                                    f"quadrant ({xstart},{ystart})")
                            if force:
                                log.stdinfo("Forcing cleaning on " + qstr)
                            else:
                                log.stdinfo("No improvement for "+qstr)
                                continue
                        ext.data[ystart:ystart + qysize, xstart:xstart + qxsize] = out_quad

            # Timestamp and update filename
            gt.mark_history(ad, primname=self.myself(), keyword=timestamp_key)
            ad.update_filename(suffix=params["suffix"], strip=True)
        return adinputs
Пример #8
0
    def init_default_extensions(self, num_ext=12, binning=1, overscan=True,
                                read_speed="slow", gain_setting="low"):
        if num_ext != 12:
            raise NotImplementedError("Only tested for full array ROI")
        if binning not in (1, 2, 4):
            raise ValueError("Binning must be 1, 2, or 4")

        del self[:]
        shape = ((4224 if self.phu['DETID'].startswith('BI') else 4608) // binning,
                 512 // binning + (BIAS_WIDTH if overscan else 0))
        # If the overscan is present, assume it's raw data
        dtype = np.uint16 if overscan else np.float32
        pixel_scale = lookup.gmosPixelScales[self.instrument(),
                                             self.phu['DETTYPE']] * binning

        north = self.instrument() == 'GMOS-N'
        crpix1 = 3132.69 if north else 3133.5
        crpix1 = (crpix1 - 0.5) / binning + 0.5
        crpix2_list = CRPIX2N if north else CRPIX2S
        chip_gap = 67. if north else 61.

        extra_keywords = {'CRVAL1': self.phu['RA'], 'CRVAL2': self.phu['DEC'],
                          'CTYPE1': 'RA---TAN', 'CTYPE2': 'DEC--TAN',
                          'CCDSUM': '{} {}'.format(binning, binning)}
        self.phu['NAMPS'] = num_ext
        for i in range(num_ext):
            ccd = i // 4
            crpix2 = (crpix2_list[ccd] - 0.5) / binning + 0.5

            detx1 = i * 512
            detx2 = detx1 + 512
            detsec = '[{}:{},1:4224]'.format(detx1 + 1, detx2)

            datx1 = BIAS_WIDTH if (overscan and i % 2 == 1) else 0
            datx2 = datx1 + 512 // binning
            datasec = '[{}:{},1:{}]'.format(datx1 + 1, datx2, shape[0])

            if overscan:
                biasx1 = BIAS_WIDTH - datx1
                biassec = '[{}:{},1:{}]'.format(biasx1 + 1, biasx1 + BIAS_WIDTH, shape[0])
                extra_keywords[self._keyword_for('overscan_section')] = biassec

            arrx1 = detx1 % 2048
            arrx2 = arrx1 + 512
            arraysec = '[{}:{},1:{}]'.format(arrx1 + 1, arrx2, shape[0])

            extra_keywords.update({self._keyword_for('detector_section'): detsec,
                                   self._keyword_for('data_section'): datasec,
                                   self._keyword_for('array_section'): arraysec,
                                   'CRPIX1': crpix1 + datx1, 'CRPIX2': crpix2})

            crpix1 -= (datx2 - datx1)
            # This isn't entirely right but it'll do
            if i % 4 == 3:
                crpix1 -= chip_gap / binning

            self.add_extension(shape=shape, pixel_scale=pixel_scale,
                               dtype=dtype, extra_keywords=extra_keywords)

        # GAIN and READNOISE
        # not the correct values, but makes the descriptors work
        self.phu['AMPINTEG'] = 10000 if read_speed == "slow" else 1000
        self.hdr['GAIN'] = 1 if gain_setting == "low" else 5
        ccdnames = self.phu['DETID'].split(",")
        if len(ccdnames) > 1:
            if ccdnames[0].startswith("e2v"):
                ccdnames[1] = "e2v " + ccdnames[1]
                ccdnames[2] = "e2v " + ccdnames[2]
        else:
            ccdnames = ["EEV"+x for x in self.phu['DETID'].split("EEV")[1:]]
        if num_ext == 12:
            amps = ("1", "2", "3", "4")
        else:
            amps = ("left", "right")
        for i, (ccd, amp) in enumerate(cart_product(ccdnames, amps)):
            self[i].hdr['AMPNAME'] = f"{ccd}, {amp}"
Пример #9
0
    def removePatternNoise(self, adinputs=None, **params):
        """
        This attempts to remove the pattern noise in NIRI/GNIRS data. In each
        quadrant, boxes of a specified size are extracted and, for each pixel
        location in the box, the median across all the boxes is determined.
        The resultant median is then tiled to the size of the quadrant and
        subtracted. Optionally, the median of each box can be subtracted
        before performing the operation.

        Based on Andy Stephens's "cleanir"

        Parameters
        ----------
        suffix: str
            suffix to be added to output files
        force: bool
            perform operation even if standard deviation in quadrant increases?
        hsigma/lsigma: float
            sigma-clipping limits
        pattern_x_size: int
            size of pattern "box" in x direction
        pattern_y_size: int
            size of pattern "box" in y direction
        subtract_background: bool
            remove median of each "box" before calculating pattern noise?
        """
        log = self.log
        log.debug(gt.log_message("primitive", self.myself(), "starting"))
        timestamp_key = self.timestamp_keys[self.myself()]

        hsigma, lsigma = params["hsigma"], params["lsigma"]
        pxsize, pysize = params["pattern_x_size"], params["pattern_y_size"]
        bgsub = params["subtract_background"]
        force = params["force"]
        stack_function = NDStacker(combine='median', reject='sigclip',
                                   hsigma=hsigma, lsigma=lsigma)
        sigclip = partial(sigma_clip, sigma_lower=lsigma, sigma_upper=hsigma)
        zeros = None  # will remain unchanged if not subtract_background

        for ad in adinputs:
            if ad.phu.get(timestamp_key):
                log.warning("No changes will be made to {}, since it has "
                            "already been processed by removePatternNoise".
                            format(ad.filename))
                continue

            for ext in ad:
                qysize, qxsize = [size // 2 for size in ext.data.shape]
                yticks = [(y, y + pysize) for y in range(0, qysize, pysize)]
                xticks = [(x, x + pxsize) for x in range(0, qxsize, pxsize)]
                for ystart in (0, qysize):
                    for xstart in (0, qxsize):
                        quad = ext.nddata[ystart:ystart + qysize, xstart:xstart + qxsize]
                        sigma_in = sigclip(np.ma.masked_array(quad.data, quad.mask)).std()
                        # print sigma_in
                        blocks = [quad[tuple(slice(start, end)
                                             for (start, end) in coords)]
                                  for coords in cart_product(yticks, xticks)]
                        if bgsub:
                            # If all pixels are masked in a box, we'll get no
                            # result from the mean. Suppress warning.
                            with warnings.catch_warnings():
                                warnings.simplefilter("ignore", category=UserWarning)
                                zeros = np.nan_to_num([-np.ma.masked_array(block.data, block.mask).mean()
                                                       for block in blocks])
                        out = stack_function(blocks, zero=zeros).data
                        out_quad = (quad.data + np.mean(out) -
                                    np.tile(out, (len(yticks), len(xticks))))
                        sigma_out = sigclip(np.ma.masked_array(out_quad, quad.mask)).std()
                        if sigma_out > sigma_in:
                            qstr = "{}:{} quadrant ({},{})".format(ad.filename,
                                                                   ext.hdr['EXTVER'], xstart, ystart)
                            if force:
                                log.stdinfo("Forcing cleaning on " + qstr)
                            else:
                                log.stdinfo("No improvement for "+qstr)
                                continue
                        ext.data[ystart:ystart + qysize, xstart:xstart + qxsize] = out_quad

            # Timestamp and update filename
            gt.mark_history(ad, primname=self.myself(), keyword=timestamp_key)
            ad.update_filename(suffix=params["suffix"], strip=True)
        return adinputs