def test_start_worker_1seq_error(hf, capsys, monkeypatch): temp_dir = br.TempDir() temp_dir.copy_to("%swork_db.sqlite" % hf.resource_path) temp_dir.copy_to("%sheartbeat_db.sqlite" % hf.resource_path) worker = launch_worker.Worker(temp_dir.path, heartrate=1, max_wait=20) work_con = sqlite3.connect(os.path.join(temp_dir.path, "work_db.sqlite")) work_cursor = work_con.cursor() work_cursor.execute("INSERT INTO " "waiting (hash, master_id) " "VALUES ('foo', 2)") work_cursor.execute( "INSERT INTO " "queue (hash, psi_pred_dir, align_m, align_p, trimal, gap_open, gap_extend) " "VALUES ('foo', ?, 'clustalo', '', 'gappyout 50 90 clean', 0, 0)", (os.path.join(hf.resource_path, "psi_pred"), )) work_con.commit() # Only a single sequence present seqbuddy = hf.get_data("cteno_panxs") seqbuddy = Sb.pull_recs(seqbuddy, "Oma-PanxαC") seqbuddy.write(os.path.join(worker.output, "foo.seqs")) monkeypatch.setattr(launch_worker.Worker, "check_masters", lambda *_, **__: True) with pytest.raises(SystemExit): worker.start() out, err = capsys.readouterr() assert "Queued job of size 1 encountered: foo" in out work_con.close()
def test_worker_fetch_queue_job(hf): temp_dir = br.TempDir() temp_dir.copy_to("%swork_db.sqlite" % hf.resource_path) worker = launch_worker.Worker(temp_dir.path) work_con = sqlite3.connect(os.path.join(temp_dir.path, "work_db.sqlite")) work_cursor = work_con.cursor() work_cursor.execute( "INSERT INTO " "queue (hash, psi_pred_dir, align_m, align_p, trimal, gap_open, gap_extend) " "VALUES ('foo', './', '', '', 'gappyout 50 90 clean', 0, 0)") work_cursor.execute( "INSERT INTO " "queue (hash, psi_pred_dir, align_m, align_p, trimal, gap_open, gap_extend) " "VALUES ('bar', './', '', '', 'gappyout 50 90 clean', 0, 0)") work_cursor.execute( "INSERT INTO processing (hash, worker_id)" " VALUES (?, ?)", ( "foo", 1, )) work_con.commit() queued_job = worker.fetch_queue_job() assert queued_job == [ 'bar', './', '', '', ['gappyout', 50.0, 90.0, 'clean'], 0, 0 ] assert not work_cursor.execute("SELECT * FROM queue").fetchall() work_con.close()
def test_instantiate_worker(): temp_dir = br.TempDir() worker = launch_worker.Worker(temp_dir.path) assert worker.working_dir == temp_dir.path assert worker.wrkdb_path == os.path.join(temp_dir.path, "work_db.sqlite") assert worker.hbdb_path == os.path.join(temp_dir.path, "heartbeat_db.sqlite") assert worker.output == os.path.join(temp_dir.path, ".worker_output") assert worker.heartrate == 60 assert type(worker.heartbeat) == launch_worker.rdmcl.HeartBeat assert worker.heartbeat.hbdb_path == worker.hbdb_path assert worker.heartbeat.pulse_rate == worker.heartrate assert worker.heartbeat.thread_type == "worker" assert worker.max_wait == 600 assert worker.dead_thread_wait == 120 assert worker.cpus == br.cpu_count() - 1 assert worker.worker_file == "" assert worker.data_file == "" assert worker.start_time assert worker.split_time == 0 assert worker.idle == 1 assert worker.running == 1 assert worker.last_heartbeat_from_master == 0 assert worker.subjob_num == 1 assert worker.num_subjobs == 1 assert worker.job_id_hash is None worker.heartbeat.end()
def test_worker_terminate(hf, monkeypatch, capsys): monkeypatch.setattr(launch_worker.rdmcl.HeartBeat, "end", lambda *_: True) temp_dir = br.TempDir() temp_dir.copy_to("%swork_db.sqlite" % hf.resource_path) work_con = sqlite3.connect(os.path.join(temp_dir.path, "work_db.sqlite")) work_cursor = work_con.cursor() work_cursor.execute( "INSERT INTO processing (hash, worker_id) VALUES ('2_3_foo', 1)") work_cursor.execute( "INSERT INTO processing (hash, worker_id) VALUES ('1_3_foo', 2)") work_con.commit() worker = launch_worker.Worker(temp_dir.path) worker.heartbeat.id = 1 worker.data_file = temp_dir.subfile(".Worker_1.dat") assert os.path.isfile(worker.data_file) worker.worker_file = temp_dir.subfile("Worker_1") assert os.path.isfile(worker.worker_file) with pytest.raises(SystemExit): worker.terminate("unit test signal") out, err = capsys.readouterr() assert "Terminating Worker_1 because of unit test signal" in out assert not os.path.isfile(worker.data_file) assert not os.path.isfile(worker.worker_file) assert not work_cursor.execute( "SELECT * FROM processing WHERE worker_id=1").fetchall() assert work_cursor.execute( "SELECT * FROM processing WHERE worker_id=2").fetchall() work_con.close()
def run_psi_pred(seq_rec): temp_dir = br.TempDir() pwd = os.getcwd() psipred_dir = join(hlp.SCRIPT_PATH, "psipred") os.chdir(temp_dir.path) with open("sequence.fa", "w") as ofile: ofile.write(seq_rec.format("fasta")) if shutil.which("psipred"): command = '''\ seq2mtx sequence.fa > {1}{3}{2}.mtx; psipred {1}{3}{2}.mtx {0}{3}data{3}weights.dat {0}{3}data{3}weights.dat2 {0}{3}data{3}weights.dat3 > {1}{3}{2}.ss; psipass2 {0}{3}data{3}weights_p2.dat 1 1.0 1.0 {1}{3}{2}.ss2 {1}{3}{2}.ss > {1}{3}{2}.horiz; '''.format(psipred_dir, temp_dir.path, seq_rec.id, os.sep) else: data_weights = join(psipred_dir, "data", "weights") command = '''\ {0}{3}bin{3}seq2mtx sequence.fa > {1}{3}{2}.mtx; {0}{3}bin{3}psipred {1}{3}{2}.mtx {4}.dat {4}.dat2 {4}.dat3 > {1}{3}{2}.ss; {0}{3}bin{3}psipass2 {4}_p2.dat 1 1.0 1.0 {1}{3}{2}.ss2 {1}{3}{2}.ss > {1}{3}{2}.horiz; '''.format(psipred_dir, temp_dir.path, seq_rec.id, os.sep, data_weights) Popen(command, shell=True).wait() os.chdir(pwd) with open(join(temp_dir.path, "%s.ss2" % seq_rec.id), "r") as ifile: result = ifile.read() return result
def test_sqlitebroker_close(): tmpdir = br.TempDir() broker = helpers.SQLiteBroker(os.path.join(tmpdir.path, "db.sqlite")) assert broker.broker is None broker.start_broker() assert broker.broker.is_alive() broker.close() assert not broker.broker.is_alive()
def test_sqlitebroker_init(): tmpdir = br.TempDir() broker = helpers.SQLiteBroker(os.path.join(tmpdir.path, "db.sqlite")) assert broker.db_file == os.path.join(tmpdir.path, "db.sqlite") assert type(broker.connection) == sqlite3.Connection assert type(broker.broker_cursor) == sqlite3.Cursor assert type(broker.broker_queue) == SimpleQueue assert broker.broker is None
def test_argparse_init(monkeypatch): out_dir = br.TempDir() argv = ['launch_worker.py', '--workdb', out_dir.path] monkeypatch.setattr(launch_worker.sys, "argv", argv) temp_in_args = launch_worker.argparse_init() assert temp_in_args.workdb == out_dir.path assert temp_in_args.heart_rate == 60 assert temp_in_args.max_wait == 600 assert not temp_in_args.log assert not temp_in_args.quiet
def test_sqlitebroker_start_and_stop_broker(): tmpdir = br.TempDir() broker = helpers.SQLiteBroker(os.path.join(tmpdir.path, "db.sqlite")) assert broker.broker is None broker.start_broker() assert type(broker.broker) == Process assert broker.broker.is_alive() broker.stop_broker() assert not broker.broker.is_alive()
def test_start_worker_no_master(hf, capsys): temp_dir = br.TempDir() temp_dir.copy_to("%swork_db.sqlite" % hf.resource_path) temp_dir.copy_to("%sheartbeat_db.sqlite" % hf.resource_path) worker = launch_worker.Worker(temp_dir.path, heartrate=1, max_wait=2) with pytest.raises(SystemExit): worker.start() assert worker.split_time != 0 assert worker.last_heartbeat_from_master != 0 worker.data_file = ".Worker_1.dat" out, err = capsys.readouterr() assert "Starting Worker_2" in out
def test_sqlitebroker_create_table(): tmpdir = br.TempDir() broker = helpers.SQLiteBroker(os.path.join(tmpdir.path, "db.sqlite")) broker.create_table( "foo", ['id INT PRIMARY KEY', 'some_data TEXT', 'numbers INT']) connect = sqlite3.connect(os.path.join(tmpdir.path, "db.sqlite")) cursor = connect.cursor() cursor.execute("SELECT name FROM sqlite_master WHERE type='table'") response = cursor.fetchone() assert response == ("foo", ) # Try to create the table again so the method skips through the try block broker.create_table( "foo", ['id INT PRIMARY KEY', 'some_data TEXT', 'numbers INT'])
def test_worker_check_master(hf, capsys): temp_dir = br.TempDir() temp_dir.copy_to("%swork_db.sqlite" % hf.resource_path) temp_dir.copy_to("%sheartbeat_db.sqlite" % hf.resource_path) worker = launch_worker.Worker(temp_dir.path) worker.last_heartbeat_from_master = time.time() + 100 assert worker.check_masters(20) is None worker.last_heartbeat_from_master = time.time() - 700 with pytest.raises(SystemExit): worker.check_masters(20) out, err = capsys.readouterr() assert "Terminating Worker_None because of 10 min, 0 sec of master inactivity (spent 20% time idle)" in out
def test_start_worker_clean_dead_master(hf, capsys, monkeypatch): monkeypatch.setattr(launch_worker, "random", lambda *_: 0.991) # No need to actually call the function, just confirm we can get there monkeypatch.setattr(launch_worker.Worker, "clean_dead_threads", lambda *_: print("PASSED")) temp_dir = br.TempDir() temp_dir.copy_to("%swork_db.sqlite" % hf.resource_path) temp_dir.copy_to("%sheartbeat_db.sqlite" % hf.resource_path) worker = launch_worker.Worker(temp_dir.path, heartrate=1, max_wait=2) with pytest.raises(SystemExit): worker.start() out, err = capsys.readouterr() assert "PASSED" in out
def test_sqlitebroker_query(monkeypatch): tmpdir = br.TempDir() broker = helpers.SQLiteBroker(os.path.join(tmpdir.path, "db.sqlite")) broker.create_table( "foo", ['id INT PRIMARY KEY', 'some_data TEXT', 'numbers INT']) with pytest.raises(RuntimeError) as err: broker.query( "INSERT INTO foo (id, some_data, numbers) VALUES (0, 'hello', 25)") assert "Broker not running." in str(err) broker.start_broker() query = broker.query( "INSERT INTO foo (id, some_data, numbers) VALUES (0, 'hello', 25)") assert query == [] broker.close() connect = sqlite3.connect(os.path.join(tmpdir.path, "db.sqlite")) cursor = connect.cursor() cursor.execute("SELECT * FROM foo") response = cursor.fetchone() assert response == (0, 'hello', 25) def raise_error(*_, **__): raise sqlite3.OperationalError("sqlite error") monkeypatch.setattr(SimpleQueue, "put", raise_error) with pytest.raises(sqlite3.OperationalError) as err: broker.query("NONSENSE QUERY") assert "sqlite error" in str(err) def raise_error(*_, **__): raise RuntimeError("can't start new thread") monkeypatch.setattr(SimpleQueue, "put", raise_error) monkeypatch.setattr(helpers, "sleep", raise_error) with pytest.raises(RuntimeError) as err: broker.query("NONSENSE QUERY") assert "can't start new thread" in str(err) def raise_error(*_, **__): raise RuntimeError("some other runtime error") monkeypatch.setattr(SimpleQueue, "put", raise_error) monkeypatch.setattr(helpers, "sleep", raise_error) with pytest.raises(RuntimeError) as err: broker.query("NONSENSE QUERY") assert "some other runtime error" in str(err)
def test_start_worker_deleted_check(hf, capsys, monkeypatch): temp_dir = br.TempDir() temp_dir.copy_to("%swork_db.sqlite" % hf.resource_path) temp_dir.copy_to("%sheartbeat_db.sqlite" % hf.resource_path) worker = launch_worker.Worker(temp_dir.path, heartrate=1, max_wait=1) monkeypatch.setattr(helpers, "dummy_func", lambda *_: os.remove(worker.worker_file)) monkeypatch.setattr(launch_worker.Worker, "check_masters", lambda *_, **__: True) with pytest.raises(SystemExit): worker.start() out, err = capsys.readouterr() assert "Terminating Worker_2 because of deleted check file" in out, print( out) assert not os.path.isfile(worker.data_file)
def convert(f, args): in_place = args[0] tmp = br.TempDir() rand_name = "".join( [choice(string.ascii_letters + string.digits) for _ in range(10)]) tmp_file = join(tmp.path, rand_name + ".flac") out_file = join(tmp.path, rand_name + ".mp3") shutil.copyfile(f, tmp_file) Popen("ffmpeg -i '%s' '%s'" % (tmp_file, out_file), stderr=PIPE, stdout=PIPE, shell=True).communicate() shutil.copyfile(out_file, os.path.splitext(f)[0] + ".mp3") if in_place: os.remove(f) return
def test_start_worker_fetch_queue(hf, capsys, monkeypatch): temp_dir = br.TempDir() temp_dir.copy_to("%swork_db.sqlite" % hf.resource_path) temp_dir.copy_to("%sheartbeat_db.sqlite" % hf.resource_path) def kill(*args): self = args[0] os.remove(self.worker_file) return monkeypatch.setattr(launch_worker.Worker, "process_final_results", kill) worker = launch_worker.Worker(temp_dir.path, heartrate=1, max_wait=1) work_con = sqlite3.connect(os.path.join(temp_dir.path, "work_db.sqlite")) work_cursor = work_con.cursor() work_cursor.execute("INSERT INTO " "waiting (hash, master_id) " "VALUES ('foo', 2)") work_cursor.execute( "INSERT INTO " "queue (hash, psi_pred_dir, align_m, align_p, trimal, gap_open, gap_extend) " "VALUES ('foo', ?, 'clustalo', '', 'gappyout 50 90 clean', 0, 0)", (os.path.join(hf.resource_path, "psi_pred"), )) work_con.commit() seqbuddy = hf.get_data("cteno_panxs") seqbuddy = Sb.pull_recs(seqbuddy, "Oma") # Only 4 records, which means 6 comparisons seqbuddy.write(os.path.join(worker.output, "foo.seqs")) with pytest.raises(SystemExit): worker.start() out, err = capsys.readouterr() assert "Running foo" in out assert "Creating MSA (4 seqs)" in out assert "Trimal (4 seqs)" in out assert os.path.isfile(os.path.join(worker.output, "foo.aln")) assert "Updating 4 psipred dataframes" in out assert "Preparing all-by-all data" in out assert "Running all-by-all data (6 comparisons)" in out assert "Processing final results" in out work_con.close()
def test_worker_load_subjob(hf): temp_dir = br.TempDir() worker = launch_worker.Worker(temp_dir.path) temp_dir.subdir(".worker_output/foo") worker.cpus = 2 ss2_dfs = hf.get_data("ss2_dfs") subjob_dir = os.path.join(worker.output, "foo") with open(os.path.join(subjob_dir, "2_of_3.txt"), "w") as ofile: ofile.write("""\ Bch-PanxαA Bch-PanxαB Bch-PanxαA Bch-PanxαC Bch-PanxαA Bch-PanxαD Bch-PanxαA Bch-PanxαE """) data_len, data = worker.load_subjob("foo", 2, 3, ss2_dfs) assert data_len == 4 assert len(data) == 2
def test_main_write(monkeypatch, hf, capsys): tmp_dir = br.TempDir() argv = [ 'rdmcl.py', os.path.join(hf.resource_path, "final_clusters.txt"), os.path.join(hf.resource_path, "Cteno_pannexins.fa"), "-w", tmp_dir.path ] monkeypatch.setattr(sys, "argv", argv) group_by_cluster.main() root, dirs, files = next(os.walk(tmp_dir.path)) assert sorted(files) == [ 'group_0_0.txt', 'group_0_1.txt', 'group_0_18.txt', 'group_0_19.txt', 'group_0_2.txt', 'group_0_20.txt', 'group_0_23.txt', 'group_0_26.txt', 'group_0_3.txt', 'group_0_30.txt', 'group_0_5.txt', 'group_0_6.txt', 'group_0_7.txt' ], print(sorted(files)) out, err = capsys.readouterr() assert out + err == ""
def test_worker_idle_workers(hf): temp_dir = br.TempDir() temp_dir.copy_to("%swork_db.sqlite" % hf.resource_path) temp_dir.copy_to("%sheartbeat_db.sqlite" % hf.resource_path) worker = launch_worker.Worker(temp_dir.path) hb_con = sqlite3.connect(os.path.join(temp_dir.path, "heartbeat_db.sqlite")) hb_cursor = hb_con.cursor() hb_cursor.execute( "INSERT INTO heartbeat (thread_type, pulse) VALUES ('worker', %s)" % round(time.time())) hb_cursor.execute( "INSERT INTO heartbeat (thread_type, pulse) VALUES ('worker', %s)" % round(time.time())) hb_cursor.execute( "INSERT INTO heartbeat (thread_type, pulse) VALUES ('worker', %s)" % round(time.time())) hb_cursor.execute( "INSERT INTO heartbeat (thread_type, pulse) VALUES ('worker', %s)" % round(time.time())) hb_cursor.execute( "INSERT INTO heartbeat (thread_type, pulse) VALUES ('worker', %s)" % round(time.time())) hb_con.commit() work_con = sqlite3.connect(os.path.join(temp_dir.path, "work_db.sqlite")) work_cursor = work_con.cursor() work_cursor.execute( "INSERT INTO processing (hash, worker_id) VALUES ('foo', 1)") work_cursor.execute( "INSERT INTO processing (hash, worker_id) VALUES ('1_2_foo', 1)") work_cursor.execute( "INSERT INTO processing (hash, worker_id) VALUES ('2_2_foo', 2)") work_cursor.execute( "INSERT INTO processing (hash, worker_id) VALUES ('bar', 3)") work_cursor.execute( "INSERT INTO processing (hash, worker_id) VALUES ('baz', 4)") work_con.commit() assert worker.idle_workers() == 1 hb_con.close() work_con.close()
def test_start_worker_missing_ss2(hf, monkeypatch, capsys): temp_dir = br.TempDir() temp_dir.copy_to("%swork_db.sqlite" % hf.resource_path) temp_dir.copy_to("%sheartbeat_db.sqlite" % hf.resource_path) worker = launch_worker.Worker(temp_dir.path, heartrate=1, max_wait=15) work_con = sqlite3.connect(os.path.join(temp_dir.path, "work_db.sqlite")) work_cursor = work_con.cursor() work_cursor.execute("INSERT INTO " "waiting (hash, master_id) " "VALUES ('foo', 2)") # This first one will raise a FileNotFoundError and will `continue` because it's a subjob work_cursor.execute( "INSERT INTO " "queue (hash, psi_pred_dir, align_m, align_p, trimal, gap_open, gap_extend) " "VALUES ('1_2_foo', ?, 'clustalo', '', 'gappyout 50 90 clean', 0, 0)", (os.path.join(hf.resource_path, "psi_pred"), )) # This second one will also raise a FileNotFoundError but it will terminate because it's a primary job work_cursor.execute( "INSERT INTO " "queue (hash, psi_pred_dir, align_m, align_p, trimal, gap_open, gap_extend) " "VALUES ('foo', ?, 'clustalo', '', 'gappyout 50 90 clean', 0, 0)", (os.path.join(hf.resource_path, "psi_pred"), )) work_con.commit() seqbuddy = hf.get_data("cteno_panxs") seqbuddy.records[0].id = "Foo" seqbuddy.records = seqbuddy.records[:3] seqbuddy.write(os.path.join(worker.output, "foo.seqs")) monkeypatch.setattr(Alb, "generate_msa", lambda *_, **__: "Blahh") monkeypatch.setattr(Alb, "AlignBuddy", lambda *_, **__: "Blahh") monkeypatch.setattr(launch_worker.Worker, "check_masters", lambda *_, **__: True) with pytest.raises(SystemExit): worker.start() out, err = capsys.readouterr() assert "Terminating Worker_2 because of something wrong with primary cluster foo" in out work_con.close()
def test_sqlitebroker_iterator(): tmpdir = br.TempDir() connect = sqlite3.connect(os.path.join(tmpdir.path, "db.sqlite")) cursor = connect.cursor() cursor.execute( "CREATE TABLE foo (id INT PRIMARY KEY, some_data TEXT, numbers INT)") cursor.execute( "INSERT INTO foo (id, some_data, numbers) VALUES (0, 'hello', 25)") cursor.execute( "INSERT INTO foo (id, some_data, numbers) VALUES (1, 'bonjour', 50)") cursor.execute( "INSERT INTO foo (id, some_data, numbers) VALUES (2, 'hola', 75)") connect.commit() connect.close() broker = helpers.SQLiteBroker(os.path.join(tmpdir.path, "db.sqlite")) results = broker.iterator("SELECT * FROM foo") assert next(results) == (0, 'hello', 25) assert next(results) == (1, 'bonjour', 50) assert next(results) == (2, 'hola', 75) with pytest.raises(StopIteration): next(results)
def test_worker_prepare_psipred_dfs(hf, capsys): temp_dir = br.TempDir() temp_dir.copy_to("%swork_db.sqlite" % hf.resource_path) worker = launch_worker.Worker(temp_dir.path) seqbuddy = hf.get_data("cteno_panxs") psipred_dfs = worker.prepare_psipred_dfs( seqbuddy, os.path.join(hf.resource_path, "psi_pred")) assert len(seqbuddy) == len(psipred_dfs) assert str(psipred_dfs["Hvu-PanxβO"].iloc[0]) == """\ indx 1 aa M ss C coil_prob 0.999 helix_prob 0.001 sheet_prob 0.001 Name: 0, dtype: object""" capsys.readouterr() seqbuddy.records[0].id = "Foo" with pytest.raises(FileNotFoundError) as err: worker.prepare_psipred_dfs(seqbuddy, os.path.join(hf.resource_path, "psi_pred")) assert "Foo.ss2" in str(err)
def test_worker_process_subjob(hf): temp_dir = br.TempDir() temp_dir.copy_to("%swork_db.sqlite" % hf.resource_path) worker = launch_worker.Worker(temp_dir.path) worker.heartbeat.id = 1 subjob_dir = temp_dir.subdir(".worker_output/foo") work_con = sqlite3.connect(os.path.join(temp_dir.path, "work_db.sqlite")) work_cursor = work_con.cursor() work_cursor.execute( "INSERT INTO processing (hash, worker_id) VALUES ('2_3_foo', 1)") work_cursor.execute( "INSERT INTO waiting (hash, master_id) VALUES ('foo', 3)") work_con.commit() # First test what happens when there are remaining subjobs sim_scores = """\ seq1,seq2,subsmat,psi Oma-PanxαA,Oma-PanxαB,0.2440918473487719,0.4821566689314651 Oma-PanxαA,Oma-PanxαC,0.5135617078978646,0.7301561315022769 """ sim_scores_df = pd.read_csv(StringIO(sim_scores)) sim_scores_df = worker.process_subjob("foo", sim_scores_df, 2, 3) assert sim_scores_df.empty assert work_cursor.execute( "SELECT * FROM complete").fetchone()[0] == "2_3_foo" assert not work_cursor.execute("SELECT * FROM processing").fetchall() with open(os.path.join(subjob_dir, "2_of_3.sim_df"), "r") as ifile: assert ifile.read() == sim_scores # Now test the final processing after all subjobs complete work_cursor.execute("INSERT INTO " "complete (hash) VALUES ('1_3_foo')") work_cursor.execute( "INSERT INTO processing (hash, worker_id) VALUES ('3_3_foo', 1)") work_cursor.execute( "INSERT INTO waiting (hash, master_id) VALUES ('3_3_foo', 3)") work_con.commit() with open(os.path.join(subjob_dir, "1_of_3.sim_df"), "w") as ofile: ofile.write("""\ seq1,seq2,subsmat,psi Oma-PanxαA,Oma-PanxαD,0.587799312776859,0.7428144752048478 Oma-PanxαB,Oma-PanxαC,0.2302289845944233,0.5027489193831555 """) sim_scores = """\ seq1,seq2,subsmat,psi Oma-PanxαB,Oma-PanxαD,0.2382962711779895,0.44814103743455824 Oma-PanxαC,Oma-PanxαD,0.4712328736449831,0.6647632735397735 """ sim_scores_df = pd.read_csv(StringIO(sim_scores)) sim_scores_df = worker.process_subjob("foo", sim_scores_df, 3, 3) assert str(sim_scores_df) == """\ seq1 seq2 subsmat psi 0 Oma-PanxαA Oma-PanxαD 0.587799312777 0.742814475205 1 Oma-PanxαB Oma-PanxαC 0.230228984594 0.502748919383 0 Oma-PanxαA Oma-PanxαB 0.244091847349 0.482156668931 1 Oma-PanxαA Oma-PanxαC 0.513561707898 0.730156131502 0 Oma-PanxαB Oma-PanxαD 0.238296271178 0.448141037435 1 Oma-PanxαC Oma-PanxαD 0.471232873645 0.664763273540""", print( sim_scores_df) work_con.close()
def test_worker_process_final_results(hf, monkeypatch, capsys): temp_dir = br.TempDir() temp_dir.copy_to("%swork_db.sqlite" % hf.resource_path) work_con = sqlite3.connect(os.path.join(temp_dir.path, "work_db.sqlite")) work_cursor = work_con.cursor() worker = launch_worker.Worker(temp_dir.path) aln_file = temp_dir.subfile(os.path.join(worker.output, "foo.aln")) seqs_file = temp_dir.subfile(os.path.join(worker.output, "foo.seqs")) graph_file = os.path.join(worker.output, "foo.graph") subjob_dir = temp_dir.subdir(os.path.join(worker.output, "foo")) # Use worker id = 2 and master id = 3 worker.data_file = os.path.join(temp_dir.path, ".Worker_2.dat") worker.heartbeat.id = 2 work_cursor.execute( "INSERT INTO waiting (hash, master_id) VALUES ('foo', 3)") work_cursor.execute( "INSERT INTO processing (hash, worker_id) VALUES ('foo', 2)") work_cursor.execute( "INSERT INTO processing (hash, worker_id) VALUES ('1_3_foo', 2)") work_cursor.execute("INSERT INTO complete (hash) VALUES ('2_3_foo')") work_con.commit() # Process an empty result without subjobs with open(worker.data_file, "w") as ifile: ifile.write("seq1,seq2,subsmat,psi") assert worker.process_final_results("foo", 1, 1) is None assert work_cursor.execute( "SELECT * FROM waiting WHERE hash='foo'").fetchone() assert work_cursor.execute( "SELECT * FROM processing WHERE hash='foo'").fetchone() assert work_cursor.execute( "SELECT * FROM processing WHERE hash LIKE '%%_foo'").fetchone() assert not work_cursor.execute( "SELECT * FROM complete WHERE hash='foo'").fetchone() assert work_cursor.execute( "SELECT * FROM complete WHERE hash LIKE '%%_foo'").fetchone() assert os.path.isfile(aln_file) assert os.path.isfile(seqs_file) assert not os.path.isfile(graph_file) assert os.path.isdir(subjob_dir) # Confirm that the sim_scores will be collected from process_subjob if num_subjobs > 1 sim_scores = launch_worker.pd.read_csv(worker.data_file, index_col=False) def patch_process_subjob(*args): print(args[2]) return sim_scores monkeypatch.setattr(launch_worker.Worker, "process_subjob", patch_process_subjob) assert worker.process_final_results("foo", 1, 3) is None assert work_cursor.execute( "SELECT * FROM waiting WHERE hash='foo'").fetchone() assert work_cursor.execute( "SELECT * FROM processing WHERE hash='foo'").fetchone() assert len( work_cursor.execute("SELECT * FROM processing WHERE hash LIKE '%%_foo'" ).fetchall()) == 1 assert not work_cursor.execute( "SELECT * FROM complete WHERE hash='foo'").fetchone() assert os.path.isfile(aln_file) assert os.path.isfile(seqs_file) assert not os.path.isfile(graph_file) assert os.path.isdir(subjob_dir) out, err = capsys.readouterr() assert out.strip() == str(sim_scores).strip(), print(out + err) # Process an actual result result = """ Oma-PanxαA,Oma-PanxαB,0.24409184734877187,0.48215666893146514 Oma-PanxαA,Oma-PanxαC,0.5135617078978646,0.7301561315022769 Oma-PanxαA,Oma-PanxαD,0.587799312776859,0.7428144752048478 Oma-PanxαB,Oma-PanxαC,0.23022898459442326,0.5027489193831555 Oma-PanxαB,Oma-PanxαD,0.2382962711779895,0.44814103743455824 Oma-PanxαC,Oma-PanxαD,0.47123287364498306,0.6647632735397735 """ with open(worker.data_file, "a") as ifile: ifile.write(result) assert worker.process_final_results("foo", 1, 1) is None assert work_cursor.execute( "SELECT * FROM waiting WHERE hash='foo'").fetchone() assert not work_cursor.execute( "SELECT * FROM processing WHERE hash='foo'").fetchone() assert work_cursor.execute( "SELECT * FROM complete WHERE hash='foo'").fetchone() assert not work_cursor.execute( "SELECT * FROM complete WHERE hash LIKE '%%_foo'").fetchone() assert os.path.isfile(aln_file) assert os.path.isfile(seqs_file) assert os.path.isfile(graph_file) assert not os.path.isdir(subjob_dir) # Process a result with no masters waiting around work_cursor.execute("DELETE FROM waiting WHERE hash='foo'") work_cursor.execute("DELETE FROM complete WHERE hash='foo'") work_con.commit() os.remove(seqs_file) assert worker.process_final_results("foo", 1, 1) is None assert not work_cursor.execute( "SELECT * FROM waiting WHERE hash='foo'").fetchone() assert not work_cursor.execute( "SELECT * FROM processing WHERE hash='foo'").fetchone() assert not work_cursor.execute( "SELECT * FROM complete WHERE hash='foo'").fetchone() assert not work_cursor.execute( "SELECT * FROM complete WHERE hash LIKE '%%_foo'").fetchone() assert not os.path.isfile(aln_file) assert not os.path.isfile(seqs_file) assert not os.path.isfile(graph_file) work_con.close()
def test_worker_clean_dead_threads(hf): temp_dir = br.TempDir() temp_dir.copy_to("%swork_db.sqlite" % hf.resource_path) temp_dir.copy_to("%sheartbeat_db.sqlite" % hf.resource_path) worker = launch_worker.Worker(temp_dir.path) hb_con = sqlite3.connect(os.path.join(temp_dir.path, "heartbeat_db.sqlite")) hb_cursor = hb_con.cursor() hb_cursor.execute( "INSERT INTO heartbeat (thread_type, pulse) VALUES ('master', %s)" % round(time.time())) master_id = hb_cursor.lastrowid hb_cursor.execute( "INSERT INTO heartbeat (thread_type, pulse) VALUES ('worker', %s)" % round(time.time())) worker_id = hb_cursor.lastrowid hb_con.commit() work_con = sqlite3.connect(os.path.join(temp_dir.path, "work_db.sqlite")) work_cursor = work_con.cursor() work_cursor.execute( "INSERT INTO queue (hash, psi_pred_dir, align_m, align_p, trimal, gap_open, gap_extend) " "VALUES ('foo', './', '', '', 'gappyout 50 90 clean', 0, 0)") work_cursor.execute( "INSERT INTO waiting (hash, master_id) VALUES ('foo', %s)" % master_id) work_cursor.execute( "INSERT INTO waiting (hash, master_id) VALUES ('bar', %s)" % master_id) work_cursor.execute( "INSERT INTO waiting (hash, master_id) VALUES ('baz', %s)" % master_id) work_cursor.execute( "INSERT INTO processing (hash, worker_id) VALUES ('bar', ?)", (worker_id, )) work_cursor.execute("INSERT INTO complete (hash) VALUES ('baz')") work_con.commit() # Everyone should be alive and well! worker.clean_dead_threads() assert hb_cursor.execute("SELECT * FROM heartbeat WHERE thread_id=%s" % master_id).fetchone() assert hb_cursor.execute("SELECT * FROM heartbeat WHERE thread_id=%s" % worker_id).fetchone() assert len( work_cursor.execute( "SELECT * FROM queue WHERE hash='foo'").fetchall()) == 1 assert len( work_cursor.execute("SELECT hash FROM waiting WHERE master_id=%s" % master_id).fetchall()) == 3 assert len( work_cursor.execute("SELECT * FROM processing WHERE worker_id=%s" % worker_id).fetchall()) == 1 assert len( work_cursor.execute( "SELECT * FROM complete WHERE hash='baz'").fetchall()) == 1 # Delete orphaned complete/queue jobs work_cursor.execute( "INSERT INTO queue (hash, psi_pred_dir, align_m, align_p, trimal, gap_open, gap_extend) " "VALUES ('apple', './', '', '', 'gappyout 50 90 clean', 0, 0)") work_cursor.execute("INSERT INTO complete (hash) VALUES ('orange')") work_con.commit() graph_file = temp_dir.subfile(os.path.join(".worker_output", "apple.graph")) align_file = temp_dir.subfile(os.path.join(".worker_output", "orange.aln")) seqs_file = temp_dir.subfile(os.path.join(".worker_output", "orange.seqs")) worker.clean_dead_threads() assert not work_cursor.execute( "SELECT * FROM queue WHERE hash='apple'").fetchone() assert not work_cursor.execute( "SELECT * FROM complete WHERE hash='orange'").fetchone() assert not os.path.isfile(graph_file) assert not os.path.isfile(align_file) assert not os.path.isfile(seqs_file) # Orphans work_cursor.execute( "INSERT INTO queue (hash, psi_pred_dir, align_m, align_p, trimal, gap_open, gap_extend) " "VALUES ('apple', './', '', '', 'gappyout 50 90 clean', 0, 0)") work_cursor.execute( "INSERT INTO waiting (hash, master_id) VALUES ('orange', 100)") work_cursor.execute( "INSERT INTO processing (hash, worker_id) VALUES ('orange', ?)", (worker_id, )) work_cursor.execute("INSERT INTO complete (hash) VALUES ('lemon')") work_con.commit() worker.clean_dead_threads() assert hb_cursor.execute("SELECT * FROM heartbeat WHERE thread_id=%s" % master_id).fetchone() assert hb_cursor.execute("SELECT * FROM heartbeat WHERE thread_id=%s" % worker_id).fetchone() assert not work_cursor.execute( "SELECT * FROM queue WHERE hash='apple'").fetchone() assert not work_cursor.execute( "SELECT * FROM waiting WHERE master_id='orange' OR master_id=100" ).fetchone() assert not work_cursor.execute( "SELECT * FROM processing WHERE worker_id='orange'").fetchone() assert not work_cursor.execute( "SELECT * FROM complete WHERE hash='lemon'").fetchone() # Dead worker hb_cursor.execute( "INSERT INTO heartbeat (thread_type, pulse) VALUES ('worker', 100)") worker2_id = hb_cursor.lastrowid hb_con.commit() worker.clean_dead_threads() assert not hb_cursor.execute( "SELECT * FROM heartbeat WHERE thread_id=%s" % worker2_id).fetchone() # Dead master hb_cursor.execute("UPDATE heartbeat SET pulse=100 WHERE thread_id=?", (master_id, )) hb_con.commit() worker.clean_dead_threads() assert hb_cursor.execute("SELECT * FROM heartbeat WHERE thread_id=%s" % worker_id).fetchone() assert not hb_cursor.execute( "SELECT * FROM heartbeat WHERE thread_type='master'").fetchone() assert not work_cursor.execute("SELECT * FROM queue").fetchone() assert not work_cursor.execute("SELECT * FROM waiting").fetchone() hb_con.close() work_con.close()
def test_main(monkeypatch, capsys): out_dir = br.TempDir() argv = ['launch_worker.py', '--workdb', out_dir.path, "--max_wait", "1"] monkeypatch.setattr(launch_worker.sys, "argv", argv) with pytest.raises(SystemExit): launch_worker.main() out, err = capsys.readouterr() assert 'Terminating Worker_1 because of 1 sec of master inactivity' in out work_con = sqlite3.connect(os.path.join(out_dir.path, "work_db.sqlite")) workdb_cursor = work_con.cursor() tables = { 'queue': [(0, 'hash', 'TEXT', 0, None, 1), (1, 'psi_pred_dir', 'TEXT', 0, None, 0), (2, 'align_m', 'TEXT', 0, None, 0), (3, 'align_p', 'TEXT', 0, None, 0), (4, 'trimal', 'TEXT', 0, None, 0), (5, 'gap_open', 'FLOAT', 0, None, 0), (6, 'gap_extend', 'FLOAT', 0, None, 0)], 'processing': [(0, 'hash', 'TEXT', 0, None, 1), (1, 'worker_id', 'INTEGER', 0, None, 0)], 'complete': [(0, 'hash', 'TEXT', 0, None, 1)], 'waiting': [(0, 'hash', 'TEXT', 0, None, 0), (1, 'master_id', 'INTEGER', 0, None, 0)] } workdb_tables = workdb_cursor.execute( "SELECT name FROM sqlite_master WHERE type='table'").fetchall() for table in tables: assert (table, ) in workdb_tables for table, fields in tables.items(): fields_query = workdb_cursor.execute("PRAGMA table_info(%s)" % table).fetchall() assert fields_query == fields hb_con = sqlite3.connect(os.path.join(out_dir.path, "heartbeat_db.sqlite")) hb_db_cursor = hb_con.cursor() tables = { 'heartbeat': [(0, 'thread_id', 'INTEGER', 0, None, 1), (1, 'thread_type', 'TEXT', 0, None, 0), (2, 'pulse', 'INTEGER', 0, None, 0)] } hb_db_tables = hb_db_cursor.execute( "SELECT name FROM sqlite_master WHERE type='table'").fetchall() for table in tables: assert (table, ) in hb_db_tables for table, fields in tables.items(): fields_query = hb_db_cursor.execute("PRAGMA table_info(%s)" % table).fetchall() assert fields_query == fields # Test quiet argv = [ 'launch_worker.py', '--workdb', out_dir.path, "--max_wait", "2", "--quiet" ] monkeypatch.setattr(launch_worker.sys, "argv", argv) capsys.readouterr() with pytest.raises(SystemExit): launch_worker.main() out, err = capsys.readouterr() assert not (out + err) out, err = capsys.readouterr() assert not (out + err) # Test logging mode (line breaks are inserted between start and termination messages) argv = [ 'launch_worker.py', '--workdb', out_dir.path, "--max_wait", "2", "--log" ] monkeypatch.setattr(launch_worker.sys, "argv", argv) capsys.readouterr() with pytest.raises(SystemExit): launch_worker.main() out, err = capsys.readouterr() assert "Starting Worker_3\n" in out and "Terminating Worker_3 because of 2 sec of master inactivity" in out # Test termination types monkeypatch.setattr(launch_worker.helpers, "dummy_func", mock_valueerror) argv = ['launch_worker.py', '--workdb', out_dir.path, "--max_wait", "2"] monkeypatch.setattr(launch_worker.sys, "argv", argv) with pytest.raises(SystemExit): launch_worker.main() out, err = capsys.readouterr() assert 'Terminating Worker_7 because of too many Worker crashes' in out monkeypatch.setattr(launch_worker.helpers, "dummy_func", mock_keyboardinterupt) with pytest.raises(SystemExit): launch_worker.main() out, err = capsys.readouterr() assert 'Terminating Worker_8 because of KeyboardInterrupt' in out hb_con.close() work_con.close()
def test_worker_spawn_subjobs(hf): temp_dir = br.TempDir() temp_dir.copy_to("%swork_db.sqlite" % hf.resource_path) work_con = sqlite3.connect(os.path.join(temp_dir.path, "work_db.sqlite")) work_cursor = work_con.cursor() worker = launch_worker.Worker(temp_dir.path, cpus=3, job_size_coff=2) # Set max job size at 4 worker.heartbeat.id = 1 subjob_dir = os.path.join(worker.output, "foo") ss2_dfs = hf.get_data("ss2_dfs") pairs = [('Bch-PanxαA', 'Bch-PanxαB'), ('Bch-PanxαA', 'Bch-PanxαC'), ('Bch-PanxαA', 'Bch-PanxαD'), ('Bch-PanxαA', 'Bch-PanxαE'), ('Bch-PanxαB', 'Bch-PanxαC'), ('Bch-PanxαB', 'Bch-PanxαD'), ('Bch-PanxαB', 'Bch-PanxαE'), ('Bch-PanxαC', 'Bch-PanxαD'), ('Bch-PanxαC', 'Bch-PanxαE'), ('Bch-PanxαD', 'Bch-PanxαE')] for indx, pair in enumerate(pairs): pairs[indx] = (pair[0], pair[1], ss2_dfs[pair[0]], ss2_dfs[pair[1]]) data = [pairs[i:i + 5] for i in range(0, len(pairs), 5)] # This gives two groups of five data_len, data, subjob_num, num_subjobs = worker.spawn_subjobs( "foo", data, ss2_dfs, 3, -5) assert data_len == len(data[0]) == 2 assert len(data[1]) == 2 assert len(data) == 2 assert subjob_num == 1 assert num_subjobs == 3 assert os.path.isfile(os.path.join(subjob_dir, "Bch-PanxαA.ss2")) with open(os.path.join(subjob_dir, "1_of_3.txt"), "r") as ifile: assert ifile.read() == """\ Bch-PanxαA Bch-PanxαB Bch-PanxαA Bch-PanxαC Bch-PanxαA Bch-PanxαD Bch-PanxαA Bch-PanxαE """ with open(os.path.join(subjob_dir, "3_of_3.txt"), "r") as ifile: assert ifile.read() == """\ Bch-PanxαC Bch-PanxαE Bch-PanxαD Bch-PanxαE """ queue = work_cursor.execute("SELECT * FROM queue").fetchall() assert len(queue) == 2 hash_id, psi_pred_dir, align_m, align_p, trimal, gap_open, gap_extend = queue[ 0] assert hash_id == "2_3_foo" assert psi_pred_dir == subjob_dir assert align_m is align_p is trimal is None assert gap_open, gap_extend == (-5, 0) processing = work_cursor.execute("SELECT * FROM processing").fetchall() assert len(processing) == 1 hash_id, worker_id = processing[0] assert hash_id == "1_3_foo" assert worker_id == worker.heartbeat.id work_con.close()
def setup(): import urllib.request sys.stdout.write( "\033[1mWelcome to RD-MCL!\033[m\nConfirming installation...\n\n") sys.stdout.write("\033[1mChecking for PSIPRED:\033[m ") path_install = [] local_install = [] programs = ["psipass2", "psipred", "seq2mtx"] for prog in programs: if shutil.which(prog): path_install.append(prog) elif os.path.isfile(os.path.join(SCRIPT_PATH, "psipred", "bin", prog)): local_install.append(prog) if sorted(list(set(path_install + local_install))) == programs: sys.stdout.write("\033[92mFound\033[39m\n") if local_install: path_install, local_install = False, True else: path_install, local_install = True, False else: sys.stdout.write("\033[91mMissing\033[39m\n\n") if br.ask( "Would you like the setup script to try and install PSIPRED? [y]/n:" ): if shutil.which("conda"): sys.stdout.write("\033[1mCalling conda...\033[m\n") path_install, local_install = True, False Popen("conda install -y -c biocore psipred", shell=True).wait() else: path_install, local_install = False, True cwd = os.getcwd() tmp_dir = br.TempDir() os.chdir(tmp_dir.path) if sys.platform == "darwin": version = "osx-64" else: version = "linux-64" url = "https://anaconda.org/biocore/psipred/4.01/download/%s/psipred-4.01-1.tar.bz2" % version sys.stdout.write( "\n\033[1mDownloading %s binaries from %s\033[m\n" % (version, url)) urllib.request.urlretrieve(url, "psipred-4.01-1.tar.bz2") sys.stdout.write("\033[1mUnpacking...\033[m\n") Popen("tar -xjf psipred-4.01-1.tar.bz2", shell=True).wait() sys.stdout.write("\033[1mInstalling...\033[m\n") if os.path.isdir("%s%spsipred" % (SCRIPT_PATH, os.sep)): shutil.rmtree("%s%spsipred" % (SCRIPT_PATH, os.sep)) os.makedirs("%s%spsipred" % (SCRIPT_PATH, os.sep)) shutil.move("bin", "{0}{1}psipred{1}".format(SCRIPT_PATH, os.sep)) shutil.move("share{0}psipred_4.01{0}data".format(os.sep), "{0}{1}psipred{1}".format(SCRIPT_PATH, os.sep)) os.chdir(cwd) if not shutil.which("psipred") and not os.path.isfile( os.path.join(SCRIPT_PATH, "psipred", "bin", "psipred")): sys.stdout.write( "\033[91mRD-MCL depends on PSIPRED, and it is not installed correctly.\033[39m\n" "Please see instructions at" " github.com/biologyguy/RD-MCL/wiki/Installation-Guide#psipred\n\n" ) return else: sys.stdout.write("\033[92mPSIPRED binary installed\033[39m\n\n") # Confirm all psipred weight files are in the rdmcl directory weight_files = [ "weights.dat", "weights.dat2", "weights.dat3", "weights_p2.dat", "weights_s.dat", "weights_s.dat2", "weights_s.dat3" ] error_msg = """\033[1mError:\033[m psi-pred data file '{0}' not found in {1}! Please try reinstalling PSIPRED: $: conda install -c biocore --no-deps --force psipred or build from http://bioinfadmin.cs.ucl.ac.uk/downloads/psipred/ If the problem persists, please create an issue at https://github.com/biologyguy/RD-MCL/issues """ data_dir = "{0}{1}psipred{1}data".format(SCRIPT_PATH, os.sep) if path_install: os.makedirs("%s%spsipred" % (SCRIPT_PATH, os.sep), exist_ok=True) os.makedirs(data_dir, exist_ok=True) psipred_bin_dir = shutil.which("psipred").split(os.sep)[:-2] root, dirs, files = next( os.walk(os.sep + os.path.join(*psipred_bin_dir, "share"))) psipred_data_dir = re.search("'(psipred.*?)'[,\]]", str(dirs)).group(1) psipred_data_dir = os.sep + os.path.join(*psipred_bin_dir, "share", psipred_data_dir, "data") for next_file in weight_files: if not os.path.isfile("{0}{1}{2}".format(data_dir, os.sep, next_file)) \ and not os.path.isfile(os.path.join(psipred_data_dir, next_file)): print(error_msg.format(next_file, psipred_data_dir)) return elif not os.path.isfile("{0}{1}{2}".format(data_dir, os.sep, next_file)): shutil.copyfile( os.path.join(psipred_data_dir, next_file), "{0}{1}{2}".format(data_dir, os.sep, next_file)) elif local_install: for next_file in weight_files: if not os.path.isfile("{0}{1}{2}".format(data_dir, os.sep, next_file)): print(error_msg.format(next_file, data_dir)) return sys.stdout.write("\033[1mChecking for HMMER3 programs:\033[m\n") not_installed = [] for program in ["hmmbuild", "hmm_fwd_back"]: if not shutil.which(program) and not os.path.isfile( os.path.join(SCRIPT_PATH, "hmmer", program)): sys.stdout.write("\t\033[1m%s: \033[91mMissing\033[39m\n" % program) not_installed.append(program) else: sys.stdout.write("\t\033[1m%s: \033[92mFound\033[39m\n" % program) if not_installed: if br.ask( "Would you like the setup script to try and install missing HMMER3 programs? [y]/n:" ): cwd = os.getcwd() temp_dir = br.TempDir() os.chdir(temp_dir.path) url = "http://eddylab.org/software/hmmer3/3.1b2/hmmer-3.1b2.tar.gz" sys.stdout.write("\n\033[1mDownloading hmmer-3.1b2.tar.gz\033[m\n") urllib.request.urlretrieve(url, "hmmer-3.1b2.tar.gz") sys.stdout.write("\033[1mUnpacking...\033[m\n") Popen("tar -xzf hmmer-3.1b2.tar.gz", shell=True).wait() if not os.path.isdir("hmmer-3.1b2"): sys.stdout.write( "\033[91mFailed to download HMMER3.\033[39m\nPlease see instructions at" " github.com/biologyguy/RD-MCL/wiki/Installation-Guide#hmmer3\n\n" ) return os.chdir("hmmer-3.1b2") sys.stdout.write("\033[1mInstalling...\033[m\n") Popen("./configure; make;", shell=True).wait() os.chdir("src") with open("generic_fwdback.c", "r") as ifile: modified = re.sub(r'(p7_gmx_Dump\(stdout, fwd, p7_DEFAULT\);)', '', ifile.read()) with open("generic_fwdback.c", "w") as ofile: ofile.write(modified) Popen("make generic_fwdback_example", shell=True).wait() os.makedirs(os.path.join(SCRIPT_PATH, "hmmer"), exist_ok=True) if "hmm_fwd_back" in not_installed: shutil.move("generic_fwdback_example", os.path.join(SCRIPT_PATH, "hmmer", "hmm_fwd_back")) if "hmmbuild" in not_installed: shutil.move("hmmbuild", os.path.join(SCRIPT_PATH, "hmmer", "hmmbuild")) os.chdir(cwd) else: sys.stdout.write( "\033[91mRD-MCL depends on HMMER3.\033[39m\nPlease see instructions at" " github.com/biologyguy/RD-MCL/wiki/Installation-Guide#hmmer3\n\n" ) return for program in ["hmmbuild", "hmm_fwd_back"]: if not shutil.which(program) and not os.path.isfile( os.path.join(SCRIPT_PATH, "hmmer", program)): sys.stdout.write( "\033[91mFailed to install HMMER3 programs.\033[39m\nPlease see instructions at" " github.com/biologyguy/RD-MCL/wiki/Installation-Guide#hmmer3\n\n" ) return else: sys.stdout.write("\033[92mHMMER3 binaries installed\033[39m\n\n") open(os.path.join(SCRIPT_PATH, "config.ini"), "w").close() sys.stdout.write("\033[1mSuccess! You're all set.\033[m\n") return
def test_start_worker_deal_with_subjobs(hf, capsys, monkeypatch): temp_dir = br.TempDir() temp_dir.copy_to("%swork_db.sqlite" % hf.resource_path) temp_dir.copy_to("%sheartbeat_db.sqlite" % hf.resource_path) worker = launch_worker.Worker(temp_dir.path, heartrate=1, max_wait=5, cpus=3, job_size_coff=2) work_con = sqlite3.connect(os.path.join(temp_dir.path, "work_db.sqlite")) work_cursor = work_con.cursor() work_cursor.execute("INSERT INTO " "waiting (hash, master_id) " "VALUES ('foo', 2)") work_cursor.execute( "INSERT INTO " "queue (hash, psi_pred_dir, align_m, align_p, trimal, gap_open, gap_extend) " "VALUES ('foo', ?, 'clustalo', '', 'gappyout 50 90 clean', 0, 0)", (os.path.join(hf.resource_path, "psi_pred"), )) work_con.commit() monkeypatch.setattr(rdmcl, "prepare_all_by_all", lambda *_: [136, []]) monkeypatch.setattr(launch_worker.Worker, "spawn_subjobs", lambda *_: [2, [], 1, 3]) monkeypatch.setattr(launch_worker.Worker, "load_subjob", lambda *_: [4, []]) monkeypatch.setattr(launch_worker.Worker, "check_masters", lambda *_, **__: True) def kill_worker(*args, **kwargs): worker.terminate("unit test kill") return args, kwargs monkeypatch.setattr(br, "run_multicore_function", kill_worker) seqbuddy = hf.get_data("cteno_panxs") seqbuddy.write(os.path.join(worker.output, "foo.seqs")) alignment = hf.get_data("cteno_panxs_aln") alignment.write(os.path.join(worker.output, "foo.aln")) monkeypatch.setattr(Alb, "generate_msa", lambda *_, **__: alignment) # This first run is a full (large) job that will spawn subjobs with pytest.raises(SystemExit): worker.start() out, err = capsys.readouterr() assert "Terminating Worker_2 because of unit test kill" in out, print(out) # A second run is pulling a subjob off the queue (first one fails because no MSA available) work_cursor.execute( "INSERT INTO " "queue (hash, psi_pred_dir, align_m, align_p, trimal, gap_open, gap_extend) " "VALUES ('2_3_bar', ?, 'clustalo', '', 'gappyout 50 90 clean', 0, 0)", (os.path.join(hf.resource_path, "psi_pred"), )) work_cursor.execute( "INSERT INTO " "queue (hash, psi_pred_dir, align_m, align_p, trimal, gap_open, gap_extend) " "VALUES ('2_3_foo', ?, 'clustalo', '', 'gappyout 50 90 clean', 0, 0)", (os.path.join(hf.resource_path, "psi_pred"), )) work_con.commit() with pytest.raises(SystemExit): worker.start() out, err = capsys.readouterr() assert "Terminating Worker_3 because of unit test kill" in out, print(out) work_con.close()