def test_cache_permissions_readT_writeT(self): # False / False test_dir = 'test/test_t_t' create_dir(test_dir, read=True, write=True) self.assertEqual((True, True), cache.test_cache({'cache_line_dir': test_dir})) remove_dir(test_dir)
def test_cache_permissions_readF_writeF(self): # False / False test_dir = 'test/test_f_f' create_dir(test_dir, read=False, write=False) self.assertEqual((False, False), cache.test_cache({'cache_line_dir': test_dir})) remove_dir(test_dir)
def test_test_cache(mkdir_permissions): # Test when cache dir exists already path = mkdir_permissions(read=False, write=False) assert (False, False) == cache.test_cache(dict(cache_line_dir=path)) path = mkdir_permissions(read=False, write=True) assert (False, True) == cache.test_cache(dict(cache_line_dir=path)) path = mkdir_permissions(read=True, write=False) assert (True, False) == cache.test_cache(dict(cache_line_dir=path)) path = mkdir_permissions(read=True, write=True) assert (True, True) == cache.test_cache(dict(cache_line_dir=path)) # Test when cache dir doesn't exist tmp = os.path.join(tempfile.tempdir, next(tempfile._get_candidate_names()) + '_yatsm') read_write = cache.test_cache(dict(cache_line_dir=tmp)) os.removedirs(tmp) assert (True, True) == read_write
def line(ctx, config, job_number, total_jobs, resume, check_cache, do_not_run, verbose_yatsm): if verbose_yatsm: logger_algo.setLevel(logging.DEBUG) # Parse config cfg = parse_config_file(config) if ('phenology' in cfg and cfg['phenology'].get('enable')) and not pheno: click.secho('Could not import yatsm.phenology but phenology metrics ' 'are requested', fg='red') click.secho('Error: %s' % pheno_exception, fg='red') raise click.Abort() # Make sure output directory exists and is writable output_dir = cfg['dataset']['output'] try: os.makedirs(output_dir) except OSError as e: # File exists if e.errno == 17: pass elif e.errno == 13: click.secho('Cannot create output directory %s' % output_dir, fg='red') raise click.Abort() if not os.access(output_dir, os.W_OK): click.secho('Cannot write to output directory %s' % output_dir, fg='red') raise click.Abort() # Test existence of cache directory read_cache, write_cache = test_cache(cfg['dataset']) logger.info('Job {i} of {n} - using config file {f}'.format(i=job_number, n=total_jobs, f=config)) df = csvfile_to_dataframe(cfg['dataset']['input_file'], cfg['dataset']['date_format']) df['image_ID'] = get_image_IDs(df['filename']) # Get attributes of one of the images nrow, ncol, nband, dtype = get_image_attribute(df['filename'][0]) # Calculate the lines this job ID works on job_lines = distribute_jobs(job_number, total_jobs, nrow) logger.debug('Responsible for lines: {l}'.format(l=job_lines)) # Calculate X feature input dates = np.asarray(df['date']) kws = {'x': dates} kws.update(df.to_dict()) X = patsy.dmatrix(cfg['YATSM']['design_matrix'], kws) cfg['YATSM']['design'] = X.design_info.column_name_indexes # Form YATSM class arguments fit_indices = np.arange(cfg['dataset']['n_bands']) if cfg['dataset']['mask_band'] is not None: fit_indices = fit_indices[:-1] if cfg['YATSM']['reverse']: X = np.flipud(X) # Create output metadata to save md = { 'YATSM': cfg['YATSM'], cfg['YATSM']['algorithm']: cfg[cfg['YATSM']['algorithm']] } if cfg['phenology']['enable']: md.update({'phenology': cfg['phenology']}) # Begin process start_time_all = time.time() for line in job_lines: out = get_output_name(cfg['dataset'], line) if resume: try: np.load(out) except: pass else: logger.debug('Already processed line %s' % line) continue logger.debug('Running line %s' % line) start_time = time.time() Y = read_line(line, df['filename'], df['image_ID'], cfg['dataset'], ncol, nband, dtype, read_cache=read_cache, write_cache=write_cache, validate_cache=False) if do_not_run: continue if cfg['YATSM']['reverse']: Y = np.fliplr(Y) output = [] for col in np.arange(Y.shape[-1]): _Y = Y.take(col, axis=2) # Mask idx_mask = cfg['dataset']['mask_band'] - 1 valid = cyprep.get_valid_mask( _Y, cfg['dataset']['min_values'], cfg['dataset']['max_values']).astype(bool) valid *= np.in1d(_Y.take(idx_mask, axis=0), cfg['dataset']['mask_values'], invert=True).astype(np.bool) _Y = np.delete(_Y, idx_mask, axis=0)[:, valid] _X = X[valid, :] _dates = dates[valid] # Run model cls = cfg['YATSM']['algorithm_cls'] algo_cfg = cfg[cfg['YATSM']['algorithm']] yatsm = cls(lm=cfg['YATSM']['prediction_object'], **algo_cfg.get('init', {})) yatsm.px = col yatsm.py = line try: yatsm.fit(_X, _Y, _dates, **algo_cfg.get('fit', {})) except TSLengthException: continue if yatsm.record is None or len(yatsm.record) == 0: continue # Postprocess if cfg['YATSM'].get('commission_alpha'): yatsm.record = postprocess.commission_test( yatsm, cfg['YATSM']['commission_alpha']) for prefix, lm in zip(cfg['YATSM']['refit']['prefix'], cfg['YATSM']['refit']['prediction_object']): yatsm.record = postprocess.refit_record(yatsm, prefix, lm, keep_regularized=True) if cfg['phenology']['enable']: pcfg = cfg['phenology'] ltm = pheno.LongTermMeanPhenology(**pcfg.get('init', {})) yatsm.record = ltm.fit(yatsm, **pcfg.get('fit', {})) output.extend(yatsm.record) logger.debug(' Saving YATSM output to %s' % out) np.savez(out, record=np.array(output), version=__version__, metadata=md) run_time = time.time() - start_time logger.debug('Line %s took %ss to run' % (line, run_time)) logger.info('Completed {n} lines in {m} minutes'.format( n=len(job_lines), m=round((time.time() - start_time_all) / 60.0, 2)))
def test_cache_permissions_create(self): test_dir = 'test/' self.assertEqual((True, True), cache.test_cache({'cache_line_dir': test_dir})) remove_dir(test_dir)
# Make output directory try: os.makedirs(dataset_config['output']) except OSError as e: # File exists if e.errno == 17: pass elif e.errno == 13: print('Error - cannot create output directory {d}'.format( d=dataset_config['output'])) print(e.strerror) sys.exit(1) # Test write capability if not os.access(dataset_config['output'], os.W_OK): print('Error - cannot write to output directory {d}'.format( d=dataset_config['output'])) sys.exit(1) # Test existence of cache directory read_cache, write_cache = test_cache(dataset_config) # Run YATSM logger.info('Job {i} / {n} - using config file {f}'.format( i=job_number, n=total_jobs, f=config_file)) main(dataset_config, yatsm_config, check=check, resume=resume, do_not_run=do_not_run, read_cache=read_cache, write_cache=write_cache, validate_cache=check_cache)
# File exists if e.errno == 17: pass elif e.errno == 13: print('Error - cannot create output directory {d}'.format( d=dataset_config['output'])) print(e.strerror) sys.exit(1) # Test write capability if not os.access(dataset_config['output'], os.W_OK): print('Error - cannot write to output directory {d}'.format( d=dataset_config['output'])) sys.exit(1) # Test existence of cache directory read_cache, write_cache = test_cache(dataset_config) # Run YATSM logger.info('Job {i} / {n} - using config file {f}'.format(i=job_number, n=total_jobs, f=config_file)) main(dataset_config, yatsm_config, check=check, resume=resume, do_not_run=do_not_run, read_cache=read_cache, write_cache=write_cache, validate_cache=check_cache)