def main(): # because of ruffus, have to use some global variables # global variables: options, config, samples, env, logger, logger_mutex # minimize the number of global variables as much as possible global options, config options = parse_args_for_rp_run() config = misc.get_config(options.config_file) global samples G = PPR.gen_all_samples_from_soft_and_isamp samples = G(options.soft_files, options.isamp, config) PPR.init_sample_outdirs(samples, config['LOCAL_TOP_OUTDIR']) PPR.fetch_sras_info(samples, options.recreate_sras_info) top_outdir = config['LOCAL_TOP_OUTDIR'] cmd_df = config['LOCAL_CMD_DF'] min_free = misc.ugly_usage(config['LOCAL_MIN_FREE']) max_usage = misc.ugly_usage(config['LOCAL_MAX_USAGE']) free_to_use = calc_local_free_space_to_use(top_outdir, cmd_df, min_free, max_usage) logger.info('Selecting samples to process based their usage') samples = PPR.select_gsms_to_process(samples, free_to_use) if not samples: # when samples == [] logger.info('Cannot find a GSM that fits the disk usage rule') return logger.info('GSMs to process:') for k, gsm in enumerate(samples): logger.info('\t{0:3d} {1:30s} {2}'.format(k + 1, gsm, gsm.outdir)) if 'gen_qsub_script' in options.target_tasks: if not options.qsub_template: raise IOError( '-t/--qsub_template required when running gen_qsub_script') R.pipeline_run( logger=logger, target_tasks=options.target_tasks, forcedtorun_tasks=options.forced_tasks, multiprocess=options.jobs, verbose=options.verbose, touch_files_only=options.touch_files_only, # history_file=os.path.join('log', '.{0}.sqlite'.format( # '_'.join([_.name for _ in sorted(samples, key=lambda x: x.name)]))) )
def main(): # because of ruffus, have to use some global variables # global variables: options, config, samples, env, logger, logger_mutex # minimize the number of global variables as much as possible global options, config options = parse_args_for_rp_run() config = misc.get_config(options.config_file) global samples G = PPR.gen_all_samples_from_soft_and_isamp samples = G(options.soft_files, options.isamp, config) PPR.init_sample_outdirs(samples, config['LOCAL_TOP_OUTDIR']) PPR.fetch_sras_info(samples, options.recreate_sras_info) top_outdir = config['LOCAL_TOP_OUTDIR'] cmd_df = config['LOCAL_CMD_DF'] min_free = misc.ugly_usage(config['LOCAL_MIN_FREE']) max_usage = misc.ugly_usage(config['LOCAL_MAX_USAGE']) free_to_use = calc_local_free_space_to_use( top_outdir, cmd_df, min_free, max_usage) logger.info('Selecting samples to process based their usage') samples = PPR.select_gsms_to_process(samples, free_to_use) if not samples: # when samples == [] logger.info('Cannot find a GSM that fits the disk usage rule') return logger.info('GSMs to process:') for k, gsm in enumerate(samples): logger.info('\t{0:3d} {1:30s} {2}'.format(k+1, gsm, gsm.outdir)) if 'gen_qsub_script' in options.target_tasks: if not options.qsub_template: raise IOError('-t/--qsub_template required when running gen_qsub_script') R.pipeline_run( logger=logger, target_tasks=options.target_tasks, forcedtorun_tasks=options.forced_tasks, multiprocess=options.jobs, verbose=options.verbose, touch_files_only=options.touch_files_only, # history_file=os.path.join('log', '.{0}.sqlite'.format( # '_'.join([_.name for _ in sorted(samples, key=lambda x: x.name)]))) )
def test_select_gsms_to_process_ignore_disk_usage(self, mock_is_processed): mock_is_processed.return_value = False samples = [mock.Mock(), mock.Mock()] self.assertEqual(ppr.select_gsms_to_process(samples, 1024 ** 3, True), samples)
def test_select_gsms_to_process_fit_disk_usage(self, mock_is_processed, mock_estimate_sra2fastq_usage): mock_is_processed.return_value = False mock_estimate_sra2fastq_usage.return_value = 513 samples = [mock.Mock(), mock.Mock()] self.assertEqual(ppr.select_gsms_to_process(samples, 1024, False), [samples[0]])
def test_select_gsms_to_process_all_processed(self, mock_is_processed): mock_is_processed.return_value = True samples = [mock.Mock(), mock.Mock()] self.assertEqual(ppr.select_gsms_to_process(samples, 1024 ** 3, False), [])