def post(self, analysis_id): command_args = self.get_arguments("commands") split = [x.split("#") for x in command_args] analysis = Analysis(analysis_id) commands = [] # HARD CODED HACKY THING FOR DEMO, FIX Issue #164 fp, mapping_file = mkstemp(suffix="_map_file.txt") close(fp) SampleTemplate(1).to_file(mapping_file) study_fps = {} for pd in Study(1).processed_data: processed = ProcessedData(pd) study_fps[processed.data_type] = processed.get_filepaths()[0][0] for data_type, command in split: opts = { "--otu_table_fp": study_fps[data_type], "--mapping_fp": mapping_file } if command == "Beta Diversity" and data_type in {'16S', '18S'}: opts["--tree_fp"] = join(get_db_files_base_dir(), "reference", "gg_97_otus_4feb2011.tre") elif command == "Beta Diversity": opts["--parameter_fp"] = join(get_db_files_base_dir(), "reference", "params_qiime.txt") Job.create(data_type, command, opts, analysis) commands.append("%s: %s" % (data_type, command)) user = self.get_current_user() self.render("analysis_waiting.html", user=user, aid=analysis_id, aname=analysis.name, commands=commands) # fire off analysis run here # currently synch run so redirect done here. Will remove after demo run_analysis(user, analysis)
def test_create(self): """Makes sure creation works as expected""" # make first job new = Job.create("18S", "Alpha Rarefaction", {"opt1": 4}, Analysis(1)) self.assertEqual(new.id, 4) # make sure job inserted correctly obs = self.conn_handler.execute_fetchall("SELECT * FROM qiita.job " "WHERE job_id = 4") exp = [[4, 2, 1, 3, '{"opt1":4}', None]] self.assertEqual(obs, exp) # make sure job added to analysis correctly obs = self.conn_handler.execute_fetchall("SELECT * FROM " "qiita.analysis_job WHERE " "job_id = 4") exp = [[1, 4]] self.assertEqual(obs, exp) # make second job with diff datatype and command to test column insert new = Job.create("16S", "Beta Diversity", {"opt1": 4}, Analysis(1)) self.assertEqual(new.id, 5) # make sure job inserted correctly obs = self.conn_handler.execute_fetchall("SELECT * FROM qiita.job " "WHERE job_id = 5") exp = [[5, 1, 1, 2, '{"opt1":4}', None]] self.assertEqual(obs, exp) # make sure job added to analysis correctly obs = self.conn_handler.execute_fetchall("SELECT * FROM " "qiita.analysis_job WHERE " "job_id = 5") exp = [[1, 5]] self.assertEqual(obs, exp)
def test_delete_files(self): try: Job.delete(1) with self.assertRaises(QiitaDBUnknownIDError): Job(1) obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.filepath WHERE filepath_id = 12 OR " "filepath_id = 19") self.assertEqual(obs, []) obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.job_results_filepath WHERE job_id = 1") self.assertEqual(obs, []) obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.analysis_job WHERE job_id = 1") self.assertEqual(obs, []) self.assertFalse(exists(join(self._job_folder, "1_job_result.txt"))) finally: f = join(self._job_folder, "1_job_result.txt") if not exists(f): with open(f, 'w') as f: f.write("job1result.txt")
def _build_analysis_files(analysis, r_depth=None): """Creates the biom tables and mapping file, then adds to jobs Parameters ---------- analysis : Analysis object The analysis to build files for r_depth : int, optional Rarefaction depth for biom table creation. Default None """ # create the biom tables and add jobs to the analysis analysis.status = "running" analysis.build_files(r_depth) mapping_file = analysis.mapping_file biom_tables = analysis.biom_tables # add files to existing jobs for job_id in analysis.jobs: job = Job(job_id) if job.status == 'queued': opts = { "--otu_table_fp": biom_tables[job.datatype], "--mapping_fp": mapping_file } job_opts = job.options job_opts.update(opts) job.options = job_opts
def _build_analysis_files(analysis, r_depth=None, **kwargs): """Creates the biom tables and mapping file, then adds to jobs Parameters ---------- analysis : Analysis object The analysis to build files for r_depth : int, optional Rarefaction depth for biom table creation. Default None """ # create the biom tables and add jobs to the analysis analysis.status = "running" analysis.build_files(r_depth) mapping_file = analysis.mapping_file biom_tables = analysis.biom_tables # add files to existing jobs for job_id in analysis.jobs: job = Job(job_id) if job.status == 'queued': opts = { "--otu_table_fp": biom_tables[job.datatype], "--mapping_fp": mapping_file } job_opts = job.options job_opts.update(opts) job.options = job_opts
def test_delete_folders(self): try: Job.delete(2) with self.assertRaises(QiitaDBUnknownIDError): Job(2) obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.filepath WHERE filepath_id = 13") self.assertEqual(obs, []) obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.job_results_filepath WHERE job_id = 2") self.assertEqual(obs, []) obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.analysis_job WHERE job_id = 2") self.assertEqual(obs, []) self.assertFalse(exists(join(self._job_folder, "2_test_folder"))) finally: # put the test data back basedir = self._job_folder if not exists(join(basedir, "2_test_folder")): mkdir(join(basedir, "2_test_folder")) mkdir(join(basedir, "2_test_folder", "subdir")) with open(join(basedir, "2_test_folder", "testfile.txt"), 'w') as f: f.write("DATA") with open(join(basedir, "2_test_folder", "testres.htm"), 'w') as f: f.write("DATA") with open(join(basedir, "2_test_folder", "subdir", "subres.html"), 'w') as f: f.write("DATA")
def test_create_exists(self): """Makes sure creation doesn't duplicate a job""" with self.assertRaises(QiitaDBDuplicateError): Job.create("18S", "Beta Diversity", { "--otu_table_fp": 1, "--mapping_fp": 1 }, Analysis(1))
def _failure_callback(self, msg=None): """Executed if something fails""" # set the analysis to errored self.analysis.status = 'error' if self._update_status is not None: self._update_status("Failed") # set any jobs to errored if they didn't execute for job_id in self.analysis.jobs: job = Job(job_id) if job.status not in {'error', 'completed'}: job.status = 'error' LogEntry.create('Runtime', msg, info={'analysis': self.analysis.id})
def test_add_jobs_in_construct_job_graphs(self): analysis = Analysis(2) RunAnalysis()._construct_job_graph( analysis, [('18S', 'Summarize Taxa')], comm_opts={'Summarize Taxa': {'opt1': 5}}) self.assertEqual(analysis.jobs, [Job(3), Job(4)]) job = Job(4) self.assertEqual(job.datatype, '18S') self.assertEqual(job.command, ['Summarize Taxa', 'summarize_taxa_through_plots.py']) expopts = { '--output_dir': join( get_db_files_base_dir(), 'job', '4_summarize_taxa_through_plots.py_output_dir'), 'opt1': 5} self.assertEqual(job.options, expopts)
def test_set_options(self): new = Job.create("18S", "Alpha Rarefaction", {"opt1": 4}, Analysis(1)) new.options = self.options self.options['--output_dir'] = join(get_db_files_base_dir(), 'job/4_alpha_rarefaction.' 'py_output_dir') self.assertEqual(new.options, self.options)
def test_set_options(self): new = Job.create("18S", "Alpha Rarefaction", {"opt1": 4}, Analysis(1)) new.options = self.options self.options['--output_dir'] = join(self._job_folder, '4_alpha_rarefaction.' 'py_output_dir') self.assertEqual(new.options, self.options)
def test_exists_return_jobid(self): """tests that existing job returns true""" # need to insert matching sample data into analysis 2 self.conn_handler.execute( "DELETE FROM qiita.analysis_sample WHERE analysis_id = 2") self.conn_handler.execute( "INSERT INTO qiita.analysis_sample " "(analysis_id, processed_data_id, sample_id) VALUES " "(2, 1,'1.SKB8.640193'), (2, 1,'1.SKD8.640184'), " "(2, 1,'1.SKB7.640196'), (2, 1,'1.SKM9.640192'), " "(2, 1,'1.SKM4.640180')") exists, jid = Job.exists("18S", "Beta Diversity", {"--otu_table_fp": 1, "--mapping_fp": 1}, Analysis(1), return_existing=True) self.assertTrue(exists) self.assertEqual(jid, Job(2))
def get(self, analysis_id): analysis_id = int(analysis_id.split("/")[0]) analysis = Analysis(analysis_id) check_analysis_access(self.current_user, analysis) jobres = defaultdict(list) for job in analysis.jobs: jobject = Job(job) jobres[jobject.datatype].append( (jobject.command[0], jobject.results)) dropped = {} dropped_samples = analysis.dropped_samples if dropped_samples: for proc_data_id, samples in viewitems(dropped_samples): proc_data = ProcessedData(proc_data_id) key = "Data type %s, Study: %s" % (proc_data.data_type(), proc_data.study) dropped[key] = samples self.render("analysis_results.html", jobres=jobres, aname=analysis.name, dropped=dropped, basefolder=get_db_files_base_dir())
def test_set_options(self): new = Job.create("18S", "Alpha Rarefaction", {"opt1": 4}, Analysis(1)) new.options = self.options self.options['--output_dir'] = join( get_db_files_base_dir(), 'job/4_alpha_rarefaction.' 'py_output_dir') self.assertEqual(new.options, self.options)
def get(self, analysis_id): user = self.current_user analysis_id = int(analysis_id) check_analysis_access(User(user), analysis_id) analysis = Analysis(analysis_id) jobres = defaultdict(list) for job in analysis.jobs: jobject = Job(job) jobres[jobject.datatype].append( (jobject.command[0], jobject.results)) dropped = {} for proc_data_id, samples in viewitems(analysis.dropped_samples): proc_data = ProcessedData(proc_data_id) key = "Data type %s, Study: %s" % (proc_data.data_type(), proc_data.study) dropped[key] = samples self.render("analysis_results.html", user=self.current_user, jobres=jobres, aname=analysis.name, dropped=dropped, basefolder=get_db_files_base_dir()) # wipe out cached messages for this analysis r_server = Redis() key = '%s:messages' % self.current_user oldmessages = r_server.lrange(key, 0, -1) if oldmessages is not None: for message in oldmessages: if '"analysis": %d' % analysis_id in message: r_server.lrem(key, message, 1)
def _finish_analysis(user, analysis): """Checks job statuses and finalized analysis and redis communication Parameters ---------- user : str user running this analysis. analysis: Analysis object Analysis to finalize. """ from qiita_ware import r_server # check job exit statuses for analysis result status all_good = True for job_id in analysis.jobs: if Job(job_id).status == "error": all_good = False break # set final analysis status if all_good: analysis.status = "completed" else: analysis.status = "error" # send websockets message that we are done running all jobs msg = {"msg": "allcomplete", "analysis": analysis.id} r_server.rpush(user + ":messages", dumps(msg)) r_server.publish(user, dumps(msg))
def test_exists_noexist_return_jobid(self): """tests that non-existant job with bad samples returns false""" exists, jid = Job.exists( "16S", "Beta Diversity", {"--otu_table_fp": 1, "--mapping_fp": 27}, Analysis(1), return_existing=True) self.assertFalse(exists) self.assertEqual(jid, None)
def test_create_exists_return_existing(self): """Makes sure creation doesn't duplicate a job by returning existing""" Analysis.create(User("*****@*****.**"), "new", "desc") self.conn_handler.execute( "INSERT INTO qiita.analysis_sample (analysis_id, " "processed_data_id, sample_id) VALUES (3,1,'SKB8.640193'), " "(3,1,'SKD8.640184'), (3,1,'SKB7.640196'), (3,1,'SKM9.640192')," "(3,1,'SKM4.640180')") new = Job.create("18S", "Beta Diversity", {"--otu_table_fp": 1, "--mapping_fp": 1}, Analysis(3), return_existing=True) self.assertEqual(new.id, 2)
def test_exists(self): # need to insert matching sample data into analysis 2 self.conn_handler.execute( "DELETE FROM qiita.analysis_sample WHERE analysis_id = 2") self.conn_handler.execute( "INSERT INTO qiita.analysis_sample (analysis_id, " "processed_data_id, sample_id) VALUES (2,1,'SKB8.640193'), " "(2,1,'SKD8.640184'), (2,1,'SKB7.640196'), (2,1,'SKM9.640192')," "(2,1,'SKM4.640180')") """tests that existing job returns true""" self.assertTrue(Job.exists("18S", "Beta Diversity", {"--otu_table_fp": 1, "--mapping_fp": 1}, Analysis(1)))
def test_add_jobs_in_construct_job_graphs(self): analysis = Analysis(2) npt.assert_warns(QiitaDBWarning, analysis.build_files) RunAnalysis()._construct_job_graph( analysis, [('18S', 'Summarize Taxa')], comm_opts={'Summarize Taxa': {'opt1': 5}}) self.assertEqual(analysis.jobs, [Job(3), Job(4)]) job = Job(4) self.assertEqual(job.datatype, '18S') self.assertEqual(job.command, ['Summarize Taxa', 'summarize_taxa_through_plots.py']) expopts = { '--mapping_fp': join( get_db_files_base_dir(), 'analysis/2_analysis_mapping.txt'), '--otu_table_fp': join( get_db_files_base_dir(), 'analysis/2_analysis_dt-18S_r-1_c-3.biom'), '--output_dir': join( get_db_files_base_dir(), 'job', '4_summarize_taxa_through_plots.py_output_dir'), 'opt1': 5} self.assertEqual(job.options, expopts)
def test_create_exists_return_existing(self): """Makes sure creation doesn't duplicate a job by returning existing""" Analysis.create(User("*****@*****.**"), "new", "desc") self.conn_handler.execute( "INSERT INTO qiita.analysis_sample " "(analysis_id, processed_data_id, sample_id) VALUES " "(3, 1, '1.SKB8.640193'), (3, 1, '1.SKD8.640184'), " "(3, 1, '1.SKB7.640196'), (3, 1, '1.SKM9.640192'), " "(3, 1, '1.SKM4.640180')") new = Job.create("18S", "Beta Diversity", {"--otu_table_fp": 1, "--mapping_fp": 1}, Analysis(3), return_existing=True) self.assertEqual(new.id, 2)
def run_analysis(user, analysis): """Run the commands within an Analysis object and sends user messages""" analysis.status = "running" all_good = True pubsub = r_server.pubsub() pubsub.subscribe(user) for job_id in analysis.jobs: job = Job(job_id) if job.status == 'queued': name, command = job.command options = job.options # create json base for websocket messages msg = { "analysis": analysis.id, "msg": None, "command": "%s: %s" % (job.datatype, name) } o_fmt = ' '.join(['%s %s' % (k, v) for k, v in options.items()]) c_fmt = str("%s %s" % (command, o_fmt)) # send running message to user wait page job.status = 'running' msg["msg"] = "Running" r_server.rpush(user + ":messages", dumps(msg)) r_server.publish(user, dumps(msg)) # run the command try: qiita_compute.submit_sync(c_fmt) except Exception as e: all_good = False job.status = 'error' msg["msg"] = "ERROR" r_server.rpush(user + ":messages", dumps(msg)) r_server.publish(user, dumps(msg)) print("Failed compute on job id %d: %s\n%s" % (job_id, e, c_fmt)) continue msg["msg"] = "Completed" r_server.rpush(user + ":messages", dumps(msg)) r_server.publish(user, dumps(msg)) # FIX THIS Should not be hard coded job.add_results([(options["--output_dir"], "directory")]) job.status = 'completed' # send websockets message that we are done msg["msg"] = "allcomplete" msg["command"] = "" r_server.rpush(user + ":messages", dumps(msg)) r_server.publish(user, dumps(msg)) pubsub.unsubscribe() # set final analysis status if all_good: analysis.status = "completed" else: analysis.status = "error"
def test_exists_noexist_options(self): # need to insert matching sample data into analysis 2 # makes sure failure is because options and not samples self.conn_handler.execute( "DELETE FROM qiita.analysis_sample WHERE analysis_id = 2") self.conn_handler.execute( "INSERT INTO qiita.analysis_sample (analysis_id, " "processed_data_id, sample_id) VALUES (2,1,'SKB8.640193'), " "(2,1,'SKD8.640184'), (2,1,'SKB7.640196'), (2,1,'SKM9.640192')," "(2,1,'SKM4.640180')") """tests that non-existant job with bad options returns false""" self.assertFalse(Job.exists("18S", "Beta Diversity", {"--otu_table_fp": 1, "--mapping_fp": 27}, Analysis(1)))
def test_exists_noexist_options(self): """tests that non-existant job with bad options returns false""" # need to insert matching sample data into analysis 2 # makes sure failure is because options and not samples self.conn_handler.execute( "DELETE FROM qiita.analysis_sample WHERE analysis_id = 2") self.conn_handler.execute( "INSERT INTO qiita.analysis_sample " "(analysis_id, processed_data_id, sample_id) VALUES " "(2, 1,'1.SKB8.640193'), (2, 1,'1.SKD8.640184'), " "(2, 1,'1.SKB7.640196'), (2, 1,'1.SKM9.640192'), " "(2, 1,'1.SKM4.640180')") self.assertFalse(Job.exists("18S", "Beta Diversity", {"--otu_table_fp": 1, "--mapping_fp": 27}, Analysis(1)))
def test_exists(self): # need to insert matching sample data into analysis 2 self.conn_handler.execute( "DELETE FROM qiita.analysis_sample WHERE analysis_id = 2") self.conn_handler.execute( "INSERT INTO qiita.analysis_sample (analysis_id, " "processed_data_id, sample_id) VALUES (2,1,'SKB8.640193'), " "(2,1,'SKD8.640184'), (2,1,'SKB7.640196'), (2,1,'SKM9.640192')," "(2,1,'SKM4.640180')") """tests that existing job returns true""" self.assertTrue( Job.exists("18S", "Beta Diversity", { "--otu_table_fp": 1, "--mapping_fp": 1 }, Analysis(1)))
def system_call_from_job(job_id, **kwargs): """Executes a system call described by a Job Parameters ---------- job_id : int The job object ID """ job = Job(job_id) name, command = job.command options = job.options cmd = [command] cmd.extend(flatten(options.items())) cmd_fmt = ' '.join((str(i) for i in cmd)) try: so, se, status = system_call(cmd_fmt) except Exception as e: job.set_error(str(e)) raise # FIX THIS add_results should not be hard coded Issue #269 job.add_results([(job.options["--output_dir"], "directory")])
def get(self, analysis_id): user = self.current_user analysis_id = int(analysis_id) check_analysis_access(User(user), analysis_id) analysis = Analysis(analysis_id) commands = [] for job in analysis.jobs: jobject = Job(job) commands.append("%s: %s" % (jobject.datatype, jobject.command[0])) self.render("analysis_waiting.html", user=user, aid=analysis_id, aname=analysis.name, commands=commands)
def test_get_commands(self): exp = [ Command('Summarize Taxa', 'summarize_taxa_through_plots.py', '{"--otu_table_fp":null}', '{}', '{"--mapping_category":null, "--mapping_fp":null,' '"--sort":null}', '{"--output_dir":null}'), Command('Beta Diversity', 'beta_diversity_through_plots.py', '{"--otu_table_fp":null,"--mapping_fp":null}', '{}', '{"--tree_fp":null,"--color_by_all_fields":null,' '"--seqs_per_sample":null}', '{"--output_dir":null}'), Command('Alpha Rarefaction', 'alpha_rarefaction.py', '{"--otu_table_fp":null,"--mapping_fp":null}', '{}', '{"--tree_fp":null,"--num_steps":null,''"--min_rare_depth"' ':null,"--max_rare_depth":null,' '"--retain_intermediate_files":false}', '{"--output_dir":null}') ] self.assertEqual(Job.get_commands(), exp)
def test_build_files_job_comm_wrapper(self): # basic setup needed for test job = Job(3) # create the files needed for job, testing _build_analysis_files analysis = Analysis(2) _build_analysis_files(analysis, 100) self._del_files.append(join(get_db_files_base_dir(), "analysis", "2_analysis_mapping.txt")) self._del_files.append(join(get_db_files_base_dir(), "analysis", "2_analysis_18S.biom")) self.assertTrue(exists(join(get_db_files_base_dir(), "analysis", "2_analysis_mapping.txt"))) self.assertTrue(exists(join(get_db_files_base_dir(), "analysis", "2_analysis_18S.biom"))) self.assertEqual([3], analysis.jobs) _job_comm_wrapper("*****@*****.**", 2, job) self.assertEqual(job.status, "error")
def test_failure_callback(self): """Make sure failure at file creation step doesn't hang everything""" # rename a needed file for creating the biom table base = get_db_files_base_dir() rename(join(base, "processed_data", "1_study_1001_closed_reference_otu_table.biom"), join(base, "processed_data", "1_study_1001.bak")) analysis = Analysis(2) group = get_id_from_user("*****@*****.**") try: app = RunAnalysis(moi_context=ctx_default, moi_parent_id=group) app(analysis, [], rarefaction_depth=100) self.assertEqual(analysis.status, 'error') for job_id in analysis.jobs: self.assertEqual(Job(job_id).status, 'error') finally: rename(join(base, "processed_data", "1_study_1001.bak"), join(base, "processed_data", "1_study_1001_closed_reference_otu_table.biom"))
def _finish_analysis(analysis, **kwargs): """Checks job statuses and finalized analysis and redis communication Parameters ---------- analysis: Analysis Analysis to finalize. kwargs : ignored Necessary to have in parameters to support execution via moi. """ # check job exit statuses for analysis result status all_good = True for job_id in analysis.jobs: if Job(job_id).status == "error": all_good = False break # set final analysis status if all_good: analysis.status = "completed" else: analysis.status = "error"
def _construct_job_graph(self, user, analysis, commands, comm_opts=None, rarefaction_depth=None): """Builds the job graph for running an analysis Parameters ---------- user : str user running this analysis. analysis: Analysis object Analysis to finalize. commands : list of tuples Commands to add as jobs in the analysis. Format [(data_type, command name), ...] comm_opts : dict of dicts, optional Options for commands. Format {command name: {opt1: value,...},...} Default None (use default options). rarefaction_depth : int, optional Rarefaction depth for analysis' biom tables. Default None. """ self._logger = stderr # Add jobs to analysis if comm_opts is None: comm_opts = {} for data_type, command in commands: # get opts set by user, else make it empty dict opts = comm_opts.get(command, {}) # Add commands to analysis as jobs # HARD CODED HACKY THING FOR DEMO, FIX Issue #164 if (command == "Beta Diversity" or command == "Alpha Rarefaction"): if data_type in {'16S', '18S'}: opts["--tree_fp"] = join(get_db_files_base_dir(), "reference", "gg_97_otus_4feb2011.tre") else: opts["--parameter_fp"] = join( get_db_files_base_dir(), "reference", "params_qiime.txt") if command == "Alpha Rarefaction": opts["-n"] = 4 Job.create(data_type, command, opts, analysis, return_existing=True) # Create the files for the jobs files_node_name = "%d_ANALYSISFILES" % analysis.id self._job_graph.add_node(files_node_name, job=(_build_analysis_files, analysis, rarefaction_depth), requires_deps=False) # Add the jobs job_nodes = [] for job_id in analysis.jobs: job = Job(job_id) node_name = "%d_JOB_%d" % (analysis.id, job.id) job_nodes.append(node_name) self._job_graph.add_node(node_name, job=(_job_comm_wrapper, user, analysis.id, job), requires_deps=False) # Adding the dependency edges to the graph self._job_graph.add_edge(files_node_name, node_name) # Finalize the analysis node_name = "FINISH_ANALYSIS_%d" % analysis.id self._job_graph.add_node(node_name, job=(_finish_analysis, user, analysis), requires_deps=False) # Adding the dependency edges to the graph for job_node_name in job_nodes: self._job_graph.add_edge(job_node_name, node_name)
def test_not_equal(self): commands = Command.create_list() self.assertFalse(commands[1] != commands[1]) self.assertTrue(commands[1] != commands[2]) self.assertTrue(commands[1] != Job(1))
def setUp(self): self.job = Job(1) self.options = {"option1": False, "option2": 25, "option3": "NEW"} self._delete_path = [] self._delete_dir = [] _, self._job_folder = get_mountpoint("job")[0]
def test_retrieve_results_empty(self): new = Job.create("18S", "Beta Diversity", {"opt1": 4}, Analysis(1)) self.assertEqual(new.results, [])
def _construct_job_graph(self, analysis, commands, comm_opts=None, rarefaction_depth=None, merge_duplicated_sample_ids=False): """Builds the job graph for running an analysis Parameters ---------- analysis: Analysis object Analysis to finalize. commands : list of tuples Commands to add as jobs in the analysis. Format [(data_type, command name), ...] comm_opts : dict of dicts, optional Options for commands. Format {command name: {opt1: value,...},...} Default None (use default options). rarefaction_depth : int, optional Rarefaction depth for analysis' biom tables. Default None. merge_duplicated_sample_ids : bool, optional If the duplicated sample ids in the selected studies should be merged or prepended with the artifact ids. False (default) prepends the artifact id """ self._logger = stderr self.analysis = analysis analysis_id = analysis.id # Add jobs to analysis if comm_opts is None: comm_opts = {} analysis.status = "running" # creating bioms at this point cause all this section runs on a worker # node, currently an ipython job analysis.build_files(rarefaction_depth, merge_duplicated_sample_ids) mapping_file = analysis.mapping_file tree_commands = ["Beta Diversity", "Alpha Rarefaction"] for data_type, biom_fp in viewitems(analysis.biom_tables): biom_table = load_table(biom_fp) # getting reference_id and software_command_id from the first # sample of the biom. This decision was discussed on the qiita # meeting on 02/24/16 metadata = biom_table.metadata(biom_table.ids()[0]) rid = metadata['reference_id'] sci = metadata['command_id'] if rid != 'na': reference = Reference(rid) tree = reference.tree_fp else: reference = None tree = '' cmd = Command(sci) if sci != 'na' else None for cmd_data_type, command in commands: if data_type != cmd_data_type: continue # get opts set by user, else make it empty dict opts = comm_opts.get(command, {}) opts["--otu_table_fp"] = biom_fp opts["--mapping_fp"] = mapping_file if command in tree_commands: if tree != '': opts["--tree_fp"] = tree else: opts["--parameter_fp"] = join( get_db_files_base_dir(), "reference", "params_qiime.txt") if command == "Alpha Rarefaction": opts["-n"] = 4 Job.create(data_type, command, opts, analysis, reference, cmd, return_existing=True) # Add the jobs job_nodes = [] for job in analysis.jobs: node_name = "%d_JOB_%d" % (analysis_id, job.id) job_nodes.append(node_name) job_name = "%s: %s" % (job.datatype, job.command[0]) self._job_graph.add_node(node_name, func=system_call_from_job, args=(job.id,), job_name=job_name, requires_deps=False) # tgz-ing the analysis results tgz_node_name = "TGZ_ANALYSIS_%d" % (analysis_id) job_name = "tgz_analysis_%d" % (analysis_id) self._job_graph.add_node(tgz_node_name, func=_generate_analysis_tgz, args=(analysis,), job_name=job_name, requires_deps=False) # Adding the dependency edges to the graph for job_node_name in job_nodes: self._job_graph.add_edge(job_node_name, tgz_node_name) # Finalize the analysis. node_name = "FINISH_ANALYSIS_%d" % analysis.id self._job_graph.add_node(node_name, func=_finish_analysis, args=(analysis,), job_name='Finalize analysis', requires_deps=False) self._job_graph.add_edge(tgz_node_name, node_name)
class JobTest(TestCase): """Tests that the job object works as expected""" def setUp(self): self.job = Job(1) self.options = {"option1": False, "option2": 25, "option3": "NEW"} self._delete_path = [] self._delete_dir = [] _, self._job_folder = get_mountpoint("job")[0] def tearDown(self): # needs to be this way because map does not play well with remove and # rmtree for python3 for item in self._delete_path: remove(item) for item in self._delete_dir: rmtree(item) def test_exists(self): """tests that existing job returns true""" # need to insert matching sample data into analysis 2 self.conn_handler.execute( "DELETE FROM qiita.analysis_sample WHERE analysis_id = 2") self.conn_handler.execute( "INSERT INTO qiita.analysis_sample " "(analysis_id, processed_data_id, sample_id) VALUES " "(2, 1,'1.SKB8.640193'), (2, 1,'1.SKD8.640184'), " "(2, 1,'1.SKB7.640196'), (2, 1,'1.SKM9.640192'), " "(2, 1,'1.SKM4.640180')") self.assertTrue(Job.exists("18S", "Beta Diversity", {"--otu_table_fp": 1, "--mapping_fp": 1}, Analysis(1))) def test_exists_return_jobid(self): """tests that existing job returns true""" # need to insert matching sample data into analysis 2 self.conn_handler.execute( "DELETE FROM qiita.analysis_sample WHERE analysis_id = 2") self.conn_handler.execute( "INSERT INTO qiita.analysis_sample " "(analysis_id, processed_data_id, sample_id) VALUES " "(2, 1,'1.SKB8.640193'), (2, 1,'1.SKD8.640184'), " "(2, 1,'1.SKB7.640196'), (2, 1,'1.SKM9.640192'), " "(2, 1,'1.SKM4.640180')") exists, jid = Job.exists("18S", "Beta Diversity", {"--otu_table_fp": 1, "--mapping_fp": 1}, Analysis(1), return_existing=True) self.assertTrue(exists) self.assertEqual(jid, Job(2)) def test_exists_noexist_options(self): """tests that non-existant job with bad options returns false""" # need to insert matching sample data into analysis 2 # makes sure failure is because options and not samples self.conn_handler.execute( "DELETE FROM qiita.analysis_sample WHERE analysis_id = 2") self.conn_handler.execute( "INSERT INTO qiita.analysis_sample " "(analysis_id, processed_data_id, sample_id) VALUES " "(2, 1,'1.SKB8.640193'), (2, 1,'1.SKD8.640184'), " "(2, 1,'1.SKB7.640196'), (2, 1,'1.SKM9.640192'), " "(2, 1,'1.SKM4.640180')") self.assertFalse(Job.exists("18S", "Beta Diversity", {"--otu_table_fp": 1, "--mapping_fp": 27}, Analysis(1))) def test_exists_noexist_return_jobid(self): """tests that non-existant job with bad samples returns false""" exists, jid = Job.exists( "16S", "Beta Diversity", {"--otu_table_fp": 1, "--mapping_fp": 27}, Analysis(1), return_existing=True) self.assertFalse(exists) self.assertEqual(jid, None) def test_get_commands(self): exp = [ Command('Summarize Taxa', 'summarize_taxa_through_plots.py', '{"--otu_table_fp":null}', '{}', '{"--mapping_category":null, "--mapping_fp":null,' '"--sort":null}', '{"--output_dir":null}'), Command('Beta Diversity', 'beta_diversity_through_plots.py', '{"--otu_table_fp":null,"--mapping_fp":null}', '{}', '{"--tree_fp":null,"--color_by_all_fields":null,' '"--seqs_per_sample":null}', '{"--output_dir":null}'), Command('Alpha Rarefaction', 'alpha_rarefaction.py', '{"--otu_table_fp":null,"--mapping_fp":null}', '{}', '{"--tree_fp":null,"--num_steps":null,''"--min_rare_depth"' ':null,"--max_rare_depth":null,' '"--retain_intermediate_files":false}', '{"--output_dir":null}') ] self.assertEqual(Job.get_commands(), exp) def test_delete_files(self): try: Job.delete(1) with self.assertRaises(QiitaDBUnknownIDError): Job(1) obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.filepath WHERE filepath_id = 12 OR " "filepath_id = 19") self.assertEqual(obs, []) obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.job_results_filepath WHERE job_id = 1") self.assertEqual(obs, []) obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.analysis_job WHERE job_id = 1") self.assertEqual(obs, []) self.assertFalse(exists(join(self._job_folder, "1_job_result.txt"))) finally: f = join(self._job_folder, "1_job_result.txt") if not exists(f): with open(f, 'w') as f: f.write("job1result.txt") def test_delete_folders(self): try: Job.delete(2) with self.assertRaises(QiitaDBUnknownIDError): Job(2) obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.filepath WHERE filepath_id = 13") self.assertEqual(obs, []) obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.job_results_filepath WHERE job_id = 2") self.assertEqual(obs, []) obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.analysis_job WHERE job_id = 2") self.assertEqual(obs, []) self.assertFalse(exists(join(self._job_folder, "2_test_folder"))) finally: # put the test data back basedir = self._job_folder if not exists(join(basedir, "2_test_folder")): mkdir(join(basedir, "2_test_folder")) mkdir(join(basedir, "2_test_folder", "subdir")) with open(join(basedir, "2_test_folder", "testfile.txt"), 'w') as f: f.write("DATA") with open(join(basedir, "2_test_folder", "testres.htm"), 'w') as f: f.write("DATA") with open(join(basedir, "2_test_folder", "subdir", "subres.html"), 'w') as f: f.write("DATA") def test_create(self): """Makes sure creation works as expected""" # make first job new = Job.create("18S", "Alpha Rarefaction", {"opt1": 4}, Analysis(1)) self.assertEqual(new.id, 4) # make sure job inserted correctly obs = self.conn_handler.execute_fetchall("SELECT * FROM qiita.job " "WHERE job_id = 4") exp = [[4, 2, 1, 3, '{"opt1":4}', None]] self.assertEqual(obs, exp) # make sure job added to analysis correctly obs = self.conn_handler.execute_fetchall("SELECT * FROM " "qiita.analysis_job WHERE " "job_id = 4") exp = [[1, 4]] self.assertEqual(obs, exp) # make second job with diff datatype and command to test column insert new = Job.create("16S", "Beta Diversity", {"opt1": 4}, Analysis(1)) self.assertEqual(new.id, 5) # make sure job inserted correctly obs = self.conn_handler.execute_fetchall("SELECT * FROM qiita.job " "WHERE job_id = 5") exp = [[5, 1, 1, 2, '{"opt1":4}', None]] self.assertEqual(obs, exp) # make sure job added to analysis correctly obs = self.conn_handler.execute_fetchall("SELECT * FROM " "qiita.analysis_job WHERE " "job_id = 5") exp = [[1, 5]] self.assertEqual(obs, exp) def test_create_exists(self): """Makes sure creation doesn't duplicate a job""" with self.assertRaises(QiitaDBDuplicateError): Job.create("18S", "Beta Diversity", {"--otu_table_fp": 1, "--mapping_fp": 1}, Analysis(1)) def test_create_exists_return_existing(self): """Makes sure creation doesn't duplicate a job by returning existing""" Analysis.create(User("*****@*****.**"), "new", "desc") self.conn_handler.execute( "INSERT INTO qiita.analysis_sample " "(analysis_id, processed_data_id, sample_id) VALUES " "(3, 1, '1.SKB8.640193'), (3, 1, '1.SKD8.640184'), " "(3, 1, '1.SKB7.640196'), (3, 1, '1.SKM9.640192'), " "(3, 1, '1.SKM4.640180')") new = Job.create("18S", "Beta Diversity", {"--otu_table_fp": 1, "--mapping_fp": 1}, Analysis(3), return_existing=True) self.assertEqual(new.id, 2) def test_retrieve_datatype(self): """Makes sure datatype retrieval is correct""" self.assertEqual(self.job.datatype, '18S') def test_retrieve_command(self): """Makes sure command retrieval is correct""" self.assertEqual(self.job.command, ['Summarize Taxa', 'summarize_taxa_through_plots.py']) def test_retrieve_options(self): self.assertEqual(self.job.options, { '--otu_table_fp': 1, '--output_dir': join( self._job_folder, '1_summarize_taxa_through_plots.py_output_dir')}) def test_set_options(self): new = Job.create("18S", "Alpha Rarefaction", {"opt1": 4}, Analysis(1)) new.options = self.options self.options['--output_dir'] = join(self._job_folder, '4_alpha_rarefaction.' 'py_output_dir') self.assertEqual(new.options, self.options) def test_retrieve_results(self): self.assertEqual(self.job.results, ["1_job_result.txt"]) def test_retrieve_results_folder(self): job = Job(2) self.assertEqual(job.results, ['2_test_folder/testres.htm', '2_test_folder/subdir/subres.html']) def test_retrieve_results_empty(self): new = Job.create("18S", "Beta Diversity", {"opt1": 4}, Analysis(1)) self.assertEqual(new.results, []) def test_set_error(self): before = datetime.now() self.job.set_error("TESTERROR") after = datetime.now() self.assertEqual(self.job.status, "error") error = self.job.error self.assertEqual(error.severity, 2) self.assertEqual(error.msg, 'TESTERROR') self.assertTrue(before < error.time < after) def test_retrieve_error_blank(self): self.assertEqual(self.job.error, None) def test_set_error_completed(self): self.job.status = "error" with self.assertRaises(QiitaDBStatusError): self.job.set_error("TESTERROR") def test_retrieve_error_exists(self): self.job.set_error("TESTERROR") self.assertEqual(self.job.error.msg, "TESTERROR") def test_add_results(self): self.job.add_results([(join(self._job_folder, "1_job_result.txt"), "plain_text")]) # make sure files attached to job properly obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.job_results_filepath WHERE job_id = 1") self.assertEqual(obs, [[1, 12], [1, 19]]) def test_add_results_dir(self): # Create a test directory test_dir = join(self._job_folder, "2_test_folder") # add folder to job self.job.add_results([(test_dir, "directory")]) # make sure files attached to job properly obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.job_results_filepath WHERE job_id = 1") self.assertEqual(obs, [[1, 12], [1, 19]]) def test_add_results_completed(self): self.job.status = "completed" with self.assertRaises(QiitaDBStatusError): self.job.add_results([("/fake/dir/", "directory")])
def setUp(self): self.job = Job(1) self.options = {"option1": False, "option2": 25, "option3": "NEW"} self._delete_path = [] self._delete_dir = []
def test_create_exists(self): """Makes sure creation doesn't duplicate a job""" with self.assertRaises(QiitaDBDuplicateError): Job.create("18S", "Beta Diversity", {"--otu_table_fp": 1, "--mapping_fp": 1}, Analysis(1))
def _construct_job_graph(self, analysis, commands, comm_opts=None, rarefaction_depth=None, merge_duplicated_sample_ids=False): """Builds the job graph for running an analysis Parameters ---------- analysis: Analysis object Analysis to finalize. commands : list of tuples Commands to add as jobs in the analysis. Format [(data_type, command name), ...] comm_opts : dict of dicts, optional Options for commands. Format {command name: {opt1: value,...},...} Default None (use default options). rarefaction_depth : int, optional Rarefaction depth for analysis' biom tables. Default None. merge_duplicated_sample_ids : bool, optional If the duplicated sample ids in the selected studies should be merged or prepended with the artifact ids. False (default) prepends the artifact id """ self._logger = stderr self.analysis = analysis # Add jobs to analysis if comm_opts is None: comm_opts = {} for data_type, command in commands: # get opts set by user, else make it empty dict opts = comm_opts.get(command, {}) # Add commands to analysis as jobs # HARD CODED HACKY THING FOR DEMO, FIX Issue #164 if (command == "Beta Diversity" or command == "Alpha Rarefaction"): if data_type in {'16S', '18S'}: opts["--tree_fp"] = join(get_db_files_base_dir(), "reference", "gg_97_otus_4feb2011.tre") else: opts["--parameter_fp"] = join( get_db_files_base_dir(), "reference", "params_qiime.txt") if command == "Alpha Rarefaction": opts["-n"] = 4 Job.create(data_type, command, opts, analysis, return_existing=True) # Create the files for the jobs files_node_name = "%d_ANALYSISFILES" % analysis.id self._job_graph.add_node(files_node_name, func=_build_analysis_files, args=(analysis, rarefaction_depth, merge_duplicated_sample_ids), job_name='Build analysis', requires_deps=False) # Add the jobs job_nodes = [] for job in analysis.jobs: node_name = "%d_JOB_%d" % (analysis.id, job.id) job_nodes.append(node_name) job_name = "%s: %s" % (job.datatype, job.command[0]) self._job_graph.add_node(node_name, func=system_call_from_job, args=(job.id,), job_name=job_name, requires_deps=False) # Adding the dependency edges to the graph self._job_graph.add_edge(files_node_name, node_name) # Finalize the analysis. node_name = "FINISH_ANALYSIS_%d" % analysis.id self._job_graph.add_node(node_name, func=_finish_analysis, args=(analysis,), job_name='Finalize analysis', requires_deps=False) # Adding the dependency edges to the graph for job_node_name in job_nodes: self._job_graph.add_edge(job_node_name, node_name)
def test_retrieve_results_folder(self): job = Job(2) self.assertEqual(job.results, ['2_test_folder/testres.htm', '2_test_folder/subdir/subres.html'])