예제 #1
0
def test_commission_real_change(sim_real_change):
    """ Test commission test's ability to avoid merging real changes

    This test is run with a relatively large p value (very likely to reject H0
    and retain changes)
    """
    record = commission_test(sim_real_change, 0.10)
    assert len(record) == len(sim_real_change.record)
예제 #2
0
def test_commission_real_change(sim_real_change):
    """ Test commission test's ability to avoid merging real changes

    This test is run with a relatively large p value (very likely to reject H0
    and retain changes)
    """
    record = commission_test(sim_real_change, 0.10)
    assert len(record) == len(sim_real_change.record)
예제 #3
0
def test_commission_nochange(sim_nochange):
    """ In no change situation, we should get back exactly what we gave in
    """
    record = commission_test(sim_nochange, 0.10)
    assert len(record) == 1
    np.testing.assert_array_equal(record, sim_nochange.record)
예제 #4
0
def test_commission_no_real_change_2(sim_no_real_change_2):
    """ Test commission test's ability to resolve two spurious changes
    """
    record = commission_test(sim_no_real_change_2, 0.01)
    assert len(record) == 1
    assert record[0]['break'] == 0
예제 #5
0
파일: line.py 프로젝트: johanez/yatsm
def line(ctx, config, job_number, total_jobs,
         resume, check_cache, do_not_run, verbose_yatsm):
    if verbose_yatsm:
        logger_algo.setLevel(logging.DEBUG)

    # Parse config
    cfg = parse_config_file(config)

    if ('phenology' in cfg and cfg['phenology'].get('enable')) and not pheno:
        click.secho('Could not import yatsm.phenology but phenology metrics '
                    'are requested', fg='red')
        click.secho('Error: %s' % pheno_exception, fg='red')
        raise click.Abort()

    # Make sure output directory exists and is writable
    output_dir = cfg['dataset']['output']
    try:
        os.makedirs(output_dir)
    except OSError as e:
        # File exists
        if e.errno == 17:
            pass
        elif e.errno == 13:
            click.secho('Cannot create output directory %s' % output_dir,
                        fg='red')
            raise click.Abort()

    if not os.access(output_dir, os.W_OK):
        click.secho('Cannot write to output directory %s' % output_dir,
                    fg='red')
        raise click.Abort()

    # Test existence of cache directory
    read_cache, write_cache = test_cache(cfg['dataset'])

    logger.info('Job {i} of {n} - using config file {f}'.format(i=job_number,
                                                                n=total_jobs,
                                                                f=config))
    df = csvfile_to_dataframe(cfg['dataset']['input_file'],
                              cfg['dataset']['date_format'])
    df['image_ID'] = get_image_IDs(df['filename'])

    # Get attributes of one of the images
    nrow, ncol, nband, dtype = get_image_attribute(df['filename'][0])

    # Calculate the lines this job ID works on
    job_lines = distribute_jobs(job_number, total_jobs, nrow)
    logger.debug('Responsible for lines: {l}'.format(l=job_lines))

    # Calculate X feature input
    dates = np.asarray(df['date'])
    kws = {'x': dates}
    kws.update(df.to_dict())
    X = patsy.dmatrix(cfg['YATSM']['design_matrix'], kws)
    cfg['YATSM']['design'] = X.design_info.column_name_indexes

    # Form YATSM class arguments
    fit_indices = np.arange(cfg['dataset']['n_bands'])
    if cfg['dataset']['mask_band'] is not None:
        fit_indices = fit_indices[:-1]

    if cfg['YATSM']['reverse']:
        X = np.flipud(X)

    # Create output metadata to save
    md = {
        'YATSM': cfg['YATSM'],
        cfg['YATSM']['algorithm']: cfg[cfg['YATSM']['algorithm']]
    }
    if cfg['phenology']['enable']:
        md.update({'phenology': cfg['phenology']})

    # Begin process
    start_time_all = time.time()
    for line in job_lines:
        out = get_output_name(cfg['dataset'], line)

        if resume:
            try:
                np.load(out)
            except:
                pass
            else:
                logger.debug('Already processed line %s' % line)
                continue

        logger.debug('Running line %s' % line)
        start_time = time.time()

        Y = read_line(line, df['filename'], df['image_ID'], cfg['dataset'],
                      ncol, nband, dtype,
                      read_cache=read_cache, write_cache=write_cache,
                      validate_cache=False)
        if do_not_run:
            continue
        if cfg['YATSM']['reverse']:
            Y = np.fliplr(Y)

        output = []
        for col in np.arange(Y.shape[-1]):
            _Y = Y.take(col, axis=2)
            # Mask
            idx_mask = cfg['dataset']['mask_band'] - 1
            valid = cyprep.get_valid_mask(
                _Y,
                cfg['dataset']['min_values'],
                cfg['dataset']['max_values']).astype(bool)

            valid *= np.in1d(_Y.take(idx_mask, axis=0),
                             cfg['dataset']['mask_values'],
                             invert=True).astype(np.bool)

            _Y = np.delete(_Y, idx_mask, axis=0)[:, valid]
            _X = X[valid, :]
            _dates = dates[valid]

            # Run model
            cls = cfg['YATSM']['algorithm_cls']
            algo_cfg = cfg[cfg['YATSM']['algorithm']]

            yatsm = cls(lm=cfg['YATSM']['prediction_object'],
                        **algo_cfg.get('init', {}))
            yatsm.px = col
            yatsm.py = line

            try:
                yatsm.fit(_X, _Y, _dates, **algo_cfg.get('fit', {}))
            except TSLengthException:
                continue

            if yatsm.record is None or len(yatsm.record) == 0:
                continue

            # Postprocess
            if cfg['YATSM'].get('commission_alpha'):
                yatsm.record = postprocess.commission_test(
                    yatsm, cfg['YATSM']['commission_alpha'])

            for prefix, lm in zip(cfg['YATSM']['refit']['prefix'],
                                  cfg['YATSM']['refit']['prediction_object']):
                yatsm.record = postprocess.refit_record(yatsm, prefix, lm,
                                                        keep_regularized=True)

            if cfg['phenology']['enable']:
                pcfg = cfg['phenology']
                ltm = pheno.LongTermMeanPhenology(**pcfg.get('init', {}))
                yatsm.record = ltm.fit(yatsm, **pcfg.get('fit', {}))

            output.extend(yatsm.record)

        logger.debug('    Saving YATSM output to %s' % out)
        np.savez(out,
                 record=np.array(output),
                 version=__version__,
                 metadata=md)

        run_time = time.time() - start_time
        logger.debug('Line %s took %ss to run' % (line, run_time))

    logger.info('Completed {n} lines in {m} minutes'.format(
                n=len(job_lines),
                m=round((time.time() - start_time_all) / 60.0, 2)))
예제 #6
0
def test_commission_nochange(sim_nochange):
    """ In no change situation, we should get back exactly what we gave in
    """
    record = commission_test(sim_nochange, 0.10)
    assert len(record) == 1
    np.testing.assert_array_equal(record, sim_nochange.record)
예제 #7
0
def test_commission_no_real_change_2(sim_no_real_change_2):
    """ Test commission test's ability to resolve two spurious changes
    """
    record = commission_test(sim_no_real_change_2, 0.01)
    assert len(record) == 1
    assert record[0]['break'] == 0
예제 #8
0
    def _fetch_results_live(self):
        """ Run YATSM and get results """
        logger.debug('Calculating YATSM results on the fly')
        # Setup design matrix, Y, and dates
        self.X = patsy.dmatrix(
            self.controls['design'].value, {
                'x': self.series[0].images['ordinal'],
                'sensor': self.series[0].sensor,
                'pr': self.series[0].pathrow
            })
        self._design_info = self.X.design_info.column_name_indexes
        self.Y = self.series[0].data.astype(np.int16)
        self.dates = np.asarray(self.series[0].images['ordinal'])

        mask = self.Y[self.config['mask_band'].value[0] - 1, :]
        Y_data = np.delete(self.Y,
                           self.config['mask_band'].value[0] - 1,
                           axis=0)

        # Mask out masked values
        clear = np.in1d(mask, self.mask_values, invert=True)
        valid = get_valid_mask(Y_data, self.config['min_values'].value,
                               self.config['max_values'].value).astype(np.bool)
        clear *= valid

        # Setup parameters
        estimator = sklearn.linear_model.Lasso(alpha=20)
        reg = self.controls['regression_type'].value
        if hasattr(yatsm.regression, 'packaged'):
            if reg in yatsm.regression.packaged.packaged_regressions:
                reg_fn = yatsm.regression.packaged.find_packaged_regressor(reg)
                try:
                    estimator = jl.load(reg_fn)
                except:
                    logger.error('Cannot load regressor: %s' % reg)
                else:
                    logger.debug('Loaded regressor %s from %s' % (reg, reg_fn))
            else:
                logger.error('Cannot use unknown regression %s' % reg)
        else:
            logger.warning(
                'Using failsafe Lasso(lambda=20) from scikit-learn. '
                'Upgrade to yatsm>=0.5.1 to access more regressors.')

        kwargs = dict(
            estimator=estimator,
            test_indices=self.controls['test_indices'].value,
            consecutive=self.controls['consecutive'].value,
            threshold=self.controls['threshold'].value,
            min_obs=self.controls['min_obs'].value,
            min_rmse=(None if self.controls['enable_min_rmse'].value else
                      self.controls['min_rmse'].value),
            screening_crit=self.controls['screen_crit'].value,
            remove_noise=self.controls['remove_noise'].value,
            dynamic_rmse=self.controls['dynamic_rmse'].value,
        )

        self.yatsm_model = CCDCesque(**version_kwargs(kwargs))
        # Don't want to have DEBUG logging when we run YATSM
        log_level = logger.level
        logger.setLevel(logging.INFO)

        if self.controls['reverse'].value:
            self.yatsm_model.fit(np.flipud(self.X[clear, :]),
                                 np.fliplr(Y_data[:, clear]),
                                 self.dates[clear][::-1])
        else:
            self.yatsm_model.fit(self.X[clear, :], Y_data[:, clear],
                                 self.dates[clear])

        if self.controls['commit_test'].value:
            self.yatsm_model.record = postprocess.commission_test(
                self.yatsm_model, self.controls['commit_alpha'].value)

        # if self.controls['robust_results'].value:
        #     self.coef_name = 'robust_coef'
        #     self.yatsm_model.record = postprocess.refit_record(
        #         self.yatsm_model, 'robust'
        # else:
        #     self.coef_name = 'coef'

        if self.config['calc_pheno'].value:
            # TODO: parameterize band indices & scale factor
            ltm = pheno.LongTermMeanPhenology()
            self.yatsm_model.record = ltm.fit(self.yatsm_model)

        # Restore log level
        logger.setLevel(log_level)
예제 #9
0
    def _fetch_results_live(self):
        """ Run YATSM and get results """
        logger.debug('Calculating YATSM results on the fly')
        # Setup design matrix, Y, and dates
        self.X = patsy.dmatrix(self.controls['design'].value,
                               {'x': self.series[0].images['ordinal'],
                                'sensor': self.series[0].sensor,
                                'pr': self.series[0].pathrow})
        self._design_info = self.X.design_info.column_name_indexes
        self.Y = self.series[0].data.astype(np.int16)
        self.dates = np.asarray(self.series[0].images['ordinal'])

        mask = self.Y[self.config['mask_band'].value[0] - 1, :]
        Y_data = np.delete(self.Y, self.config['mask_band'].value[0] - 1,
                           axis=0)

        # Mask out masked values
        clear = np.in1d(mask, self.mask_values, invert=True)
        valid = get_valid_mask(Y_data,
                               self.config['min_values'].value,
                               self.config['max_values'].value).astype(np.bool)
        clear *= valid

        # Setup parameters
        estimator = sklearn.linear_model.Lasso(alpha=20)
        reg = self.controls['regression_type'].value
        if hasattr(yatsm.regression, 'packaged'):
            if reg in yatsm.regression.packaged.packaged_regressions:
                reg_fn = yatsm.regression.packaged.find_packaged_regressor(reg)
                try:
                    estimator = jl.load(reg_fn)
                except:
                    logger.error('Cannot load regressor: %s' % reg)
                else:
                    logger.debug('Loaded regressor %s from %s' % (reg, reg_fn))
            else:
                logger.error('Cannot use unknown regression %s' % reg)
        else:
            logger.warning(
                'Using failsafe Lasso(lambda=20) from scikit-learn. '
                'Upgrade to yatsm>=0.5.1 to access more regressors.')

        kwargs = dict(
            estimator=estimator,
            test_indices=self.controls['test_indices'].value,
            consecutive=self.controls['consecutive'].value,
            threshold=self.controls['threshold'].value,
            min_obs=self.controls['min_obs'].value,
            min_rmse=(None if self.controls['enable_min_rmse'].value else
                      self.controls['min_rmse'].value),
            screening_crit=self.controls['screen_crit'].value,
            remove_noise=self.controls['remove_noise'].value,
            dynamic_rmse=self.controls['dynamic_rmse'].value,
        )

        self.yatsm_model = CCDCesque(**version_kwargs(kwargs))
        # Don't want to have DEBUG logging when we run YATSM
        log_level = logger.level
        logger.setLevel(logging.INFO)

        if self.controls['reverse'].value:
            self.yatsm_model.fit(
                np.flipud(self.X[clear, :]),
                np.fliplr(Y_data[:, clear]),
                self.dates[clear][::-1])
        else:
            self.yatsm_model.fit(
                self.X[clear, :],
                Y_data[:, clear],
                self.dates[clear])

        if self.controls['commit_test'].value:
            self.yatsm_model.record = postprocess.commission_test(
                self.yatsm_model, self.controls['commit_alpha'].value)

        # if self.controls['robust_results'].value:
        #     self.coef_name = 'robust_coef'
        #     self.yatsm_model.record = postprocess.refit_record(
        #         self.yatsm_model, 'robust'
        # else:
        #     self.coef_name = 'coef'

        if self.config['calc_pheno'].value:
            # TODO: parameterize band indices & scale factor
            ltm = pheno.LongTermMeanPhenology()
            self.yatsm_model.record = ltm.fit(self.yatsm_model)

        # Restore log level
        logger.setLevel(log_level)