示例#1
0
    def _run(self, check_file_path):
        printer = DynamicPrint()
        output = [("#Master", 9), ("AveMhb", 9), ("#Worker", 9), ("AveWhb", 9), ("#queue", 9),
                  ("#subq", 8), ("#proc", 8), ("#subp", 8), ("#comp", 8), ("#ProcComp", 10), ("#HashWait", 10),
                  ("#IdWait", 9), ("ConnectTime", 9)]

        output = [str(x[0]).ljust(x[1]) for x in output]
        printer.write("".join(output))
        printer.new_line(1)
        while True:
            with open("%s" % check_file_path, "r") as ifile:
                ifile_content = ifile.read()
            if ifile_content != "Running":
                break

            split_time = time()
            try:
                with helpers.ExclusiveConnect(self.hbdb_path) as cursor:
                    heartbeat = cursor.execute("SELECT * FROM heartbeat").fetchall()
                with helpers.ExclusiveConnect(self.wdb_path) as cursor:
                    queue = cursor.execute("SELECT hash FROM queue").fetchall()
                    processing = cursor.execute("SELECT hash FROM processing").fetchall()
                    complete = cursor.execute("SELECT hash FROM complete").fetchall()
                    proc_comp = cursor.execute("SELECT hash FROM proc_comp").fetchall()
                    waiting = cursor.execute("SELECT * FROM waiting").fetchall()
                connect_time = round(time() - split_time, 3)
                master_hb = [hb[2] for hb in heartbeat if hb[1] == "master"]
                len_mas = len(master_hb)
                master_hb = 0 if not master_hb else round(time() - (sum(master_hb) / len(master_hb)), 1)
                worker_hb = [hb[2] for hb in heartbeat if hb[1] == "worker"]
                len_wor = len(worker_hb)
                worker_hb = 0 if not worker_hb else round(time() - (sum(worker_hb) / len(worker_hb)), 1)

                hash_wait = {}
                master_wait = {}
                for seq_hash, master_id in waiting:
                    hash_wait.setdefault(seq_hash, 0)
                    hash_wait[seq_hash] += 1
                    master_wait.setdefault(master_id, 0)
                    master_wait[master_id] += 1

                subqueue_len = len([None for x in queue if "_" in x[0]])
                queue_len = len(queue) - subqueue_len

                subproc_len = len([None for x in processing if "_" in x[0]])
                proc_len = len(processing) - subproc_len

                output = [(len_mas, 9), (master_hb, 9), (len_wor, 9), (worker_hb, 9), (queue_len, 9),
                          (subqueue_len, 8), (proc_len, 8), (subproc_len, 8), (len(complete), 8), (len(proc_comp), 10),
                          (len(hash_wait), 10), (len(master_wait), 9), (connect_time, 9)]
                output = [str(x[0]).ljust(x[1]) for x in output]

                printer.write("".join(output))
            except helpers.sqlite3.OperationalError:
                printer.write("Worker databases not detected")
            sleep(0.5)
        return
示例#2
0
    def process_final_results(self, id_hash, subjob_num, num_subjobs):
        with open(self.data_file, "r") as ifile:
            sim_scores = pd.read_csv(ifile, index_col=False)

        if num_subjobs > 1:
            # If all subjobs are not complete, an empty df is returned
            sim_scores = self.process_subjob(id_hash, sim_scores, subjob_num,
                                             num_subjobs)

        if not sim_scores.empty:
            sim_scores = rdmcl.set_final_sim_scores(sim_scores)

            with helpers.ExclusiveConnect(os.path.join(self.output,
                                                       "write.lock"),
                                          max_lock=0):
                # Place these write commands in ExclusiveConnect to ensure a writing lock
                sim_scores.to_csv(os.path.join(self.output,
                                               "%s.graph" % id_hash),
                                  header=None,
                                  index=False)

            with helpers.ExclusiveConnect(self.wrkdb_path) as cursor:
                # Confirm that the job is still being waited on and wasn't killed before adding to the `complete` table
                waiting = cursor.execute(
                    "SELECT master_id FROM waiting WHERE hash=?",
                    (id_hash, )).fetchall()
                processing = cursor.execute(
                    "SELECT worker_id FROM processing WHERE hash=?",
                    (id_hash, )).fetchall()
                if waiting and processing:
                    cursor.execute("INSERT INTO complete (hash) "
                                   "VALUES (?)", (id_hash, ))
                elif not waiting:
                    for del_file in [
                            "%s.%s" % (id_hash, x)
                            for x in ["graph", "aln", "seqs"]
                    ]:
                        try:
                            os.remove(os.path.join(self.output, del_file))
                        except FileNotFoundError:
                            pass

                cursor.execute("DELETE FROM processing WHERE hash=?",
                               (id_hash, ))
                cursor.execute("DELETE FROM complete WHERE hash LIKE '%%_%s'" %
                               id_hash)
            if os.path.isdir(os.path.join(self.output, id_hash)):
                shutil.rmtree(os.path.join(self.output, id_hash))
        return
示例#3
0
    def fetch_queue_job(self):
        while True:
            with helpers.ExclusiveConnect(self.wrkdb_path,
                                          priority=True) as cursor:
                data = cursor.execute('SELECT * FROM queue').fetchone()
                if data:
                    id_hash, psipred_dir, align_m, align_p, trimal, gap_open, gap_extend = data
                    if trimal:
                        trimal = trimal.split()
                        for indx, arg in enumerate(trimal):
                            try:
                                trimal[indx] = float(arg)
                            except ValueError:
                                pass

                    cursor.execute("DELETE FROM queue WHERE hash=?",
                                   (id_hash, ))
                    if cursor.execute(
                            'SELECT worker_id FROM processing WHERE hash=?',
                        (id_hash, )).fetchone():
                        continue

                    cursor.execute(
                        "INSERT INTO processing (hash, worker_id)"
                        " VALUES (?, ?)", (
                            id_hash,
                            self.heartbeat.id,
                        ))
                    data = [
                        id_hash, psipred_dir, align_m, align_p, trimal,
                        gap_open, gap_extend
                    ]
                break
        return data
示例#4
0
def main():
    import argparse

    def fmt(prog):
        return br.CustomHelpFormatter(prog)

    parser = argparse.ArgumentParser(prog="reset_workers",
                                     formatter_class=fmt,
                                     add_help=False,
                                     usage=argparse.SUPPRESS,
                                     description='''\
\033[1mReset Workers\033[m
  Someone broke something... Time to wipe the slate!

  Delete all queued jobs. Don't worry, RD-MCL will queue them back up
  if it still really wants them to run.

\033[1mUsage\033[m:
  reset_workers <args>
''')

    parser_flags = parser.add_argument_group(
        title="\033[1mAvailable commands\033[m")

    parser_flags.add_argument("-wdb",
                              "--workdb",
                              action="store",
                              default=os.getcwd(),
                              metavar="",
                              help="Specify the worker directory")

    # Misc
    misc = parser.add_argument_group(title="\033[1mMisc options\033[m")
    misc.add_argument('-v',
                      '--version',
                      action='version',
                      version=str(VERSION))
    misc.add_argument('-h',
                      '--help',
                      action="help",
                      help="Show this help message and exit")

    in_args = parser.parse_args()

    work_db = os.path.join(in_args.workdb, "work_db.sqlite")
    work_output_dir = os.path.join(in_args.workdb, ".worker_output")

    if os.path.isfile(work_db) and os.path.isdir(work_output_dir):
        with helpers.ExclusiveConnect(work_db) as cursor:
            root, dirs, files = next(os.walk(work_output_dir))
            for _dir in dirs:
                shutil.rmtree(os.path.join(root, _dir))
            for _file in files:
                os.remove(os.path.join(root, _file))
            cursor.execute("DELETE FROM queue")
            cursor.execute("DELETE FROM processing")
            cursor.execute("DELETE FROM complete")
            cursor.execute("DELETE FROM waiting")
示例#5
0
 def terminate(self, message):
     self.printer.write("Terminating Worker_%s because of %s." %
                        (self.heartbeat.id, message))
     self.printer.new_line(1)
     with helpers.ExclusiveConnect(self.wrkdb_path) as cursor:
         cursor.execute("DELETE FROM processing WHERE worker_id=?",
                        (self.heartbeat.id, ))
     if os.path.isfile(self.data_file):
         os.remove(self.data_file)
     if os.path.isfile(self.worker_file):
         os.remove(self.worker_file)
     self.heartbeat.end()
     sys.exit()
示例#6
0
    def idle_workers(self):
        # Calculate how many workers are just sitting around
        with helpers.ExclusiveConnect(self.hbdb_path) as cursor:
            workers = cursor.execute(
                "SELECT COUNT(*) FROM heartbeat WHERE thread_type='worker'"
            ).fetchone()[0]

        with helpers.ExclusiveConnect(self.wrkdb_path) as cursor:
            jobs = cursor.execute(
                "SELECT * FROM processing WHERE hash NOT LIKE '%%^_%%' ESCAPE '^'"
            ).fetchall()
            subjobs = cursor.execute(
                "SELECT * FROM processing WHERE hash LIKE '%%^_%%' ESCAPE '^'"
            ).fetchall()

        jobs = [x[0] for x in jobs]
        subjobs = [x[0] for x in subjobs]
        active_jobs = len(subjobs)
        subjobs = set([x.split("_")[-1] for x in subjobs])
        for job in jobs:
            active_jobs += 1 if job not in subjobs else 0
        return workers - active_jobs if active_jobs < workers else 0  # This can happen if a worker died
示例#7
0
    def spawn_subjobs(self, id_hash, data, psipred_dfs, gap_open, gap_extend):
        subjob_out_dir = os.path.join(self.output, id_hash)
        os.makedirs(subjob_out_dir, exist_ok=True)
        # Flatten the data jobs list back down
        data = [y for x in data for y in x]
        len_data = len(data)

        for rec_id, df in psipred_dfs.items():
            df.to_csv(os.path.join(subjob_out_dir, "%s.ss2" % rec_id),
                      header=None,
                      index=False,
                      sep=" ")

        # Break it up again into min number of chunks where len(each chunk) < #CPUs * job_size_coff
        num_subjobs = int(
            rdmcl.ceil(len_data / (self.cpus * self.job_size_coff)))
        job_size = int(rdmcl.ceil(len_data / num_subjobs))
        data = [data[i:i + job_size] for i in range(0, len_data, job_size)]

        for indx, subjob in enumerate(data):
            with open(
                    os.path.join(subjob_out_dir,
                                 "%s_of_%s.txt" % (indx + 1, num_subjobs)),
                    "w") as ofile:
                for pair in subjob:
                    ofile.write("%s %s\n" % (pair[0], pair[1]))

        # Push all jobs into the queue except the first job, which is held back for the current node to work on
        with helpers.ExclusiveConnect(self.wrkdb_path) as cursor:
            for indx, subjob in enumerate(data[1:]):
                # NOTE: the 'indx + 2' is necessary to push index to '1' start and account for the job already removed
                cursor.execute(
                    "INSERT INTO queue (hash, psi_pred_dir, gap_open, gap_extend) "
                    "VALUES (?, ?, ?, ?)", (
                        "%s_%s_%s" % (indx + 2, num_subjobs, id_hash),
                        subjob_out_dir,
                        gap_open,
                        gap_extend,
                    ))

            cursor.execute(
                "INSERT INTO processing (hash, worker_id) VALUES (?, ?)", (
                    "1_%s_%s" % (num_subjobs, id_hash),
                    self.heartbeat.id,
                ))

        n = int(rdmcl.ceil(len(data[0]) / self.cpus))
        data = [data[0][i:i + n] for i in range(0, len(data[0]), n)]

        subjob_num = 1
        return len(data[0]), data, subjob_num, num_subjobs
示例#8
0
 def check_masters(self, idle):
     if time.time() - self.last_heartbeat_from_master > self.max_wait:
         terminate = False
         with helpers.ExclusiveConnect(self.hbdb_path) as cursor:
             cursor.execute(
                 "SELECT * FROM heartbeat WHERE thread_type='master' "
                 "AND pulse>?",
                 (time.time() - self.max_wait - cursor.lag, ))
             masters = cursor.fetchall()
             if not masters:
                 terminate = True
             self.last_heartbeat_from_master = time.time()
         if terminate:
             self.terminate(
                 "%s of master inactivity (spent %s%% time idle)" %
                 (br.pretty_time(self.max_wait), idle))
     return
示例#9
0
    def process_subjob(self, id_hash, sim_scores, subjob_num, num_subjobs):
        output = pd.DataFrame()
        subjob_out_dir = os.path.join(self.output, id_hash)
        if os.path.isdir(subjob_out_dir):
            full_id_hash = "%s_%s_%s" % (subjob_num, num_subjobs, id_hash)
            sim_scores.to_csv(os.path.join(
                subjob_out_dir, "%s_of_%s.sim_df" % (subjob_num, num_subjobs)),
                              index=False)

            with helpers.ExclusiveConnect(self.wrkdb_path) as cursor:
                # Confirm that the job is still being waited on and wasn't killed before adding to the `complete` table
                waiting = cursor.execute(
                    "SELECT master_id FROM waiting WHERE hash=?",
                    (id_hash, )).fetchall()
                processing = cursor.execute(
                    "SELECT worker_id FROM processing WHERE hash=? AND worker_id=?",
                    (
                        full_id_hash,
                        self.heartbeat.id,
                    )).fetchall()
                complete = cursor.execute(
                    "SELECT hash FROM complete WHERE hash=?",
                    (full_id_hash, )).fetchall()

                if waiting and processing and not complete:
                    cursor.execute("INSERT INTO complete (hash) "
                                   "VALUES (?)", (full_id_hash, ))

                cursor.execute("DELETE FROM processing WHERE hash=?",
                               (full_id_hash, ))
                complete_count = cursor.execute(
                    "SELECT COUNT(*) FROM complete "
                    "WHERE hash LIKE '%%_%s'" % id_hash).fetchone()[0]

            if complete_count == num_subjobs:
                output = pd.DataFrame(
                    columns=["seq1", "seq2", "subsmat", "psi"])
                for indx in range(1, num_subjobs + 1):
                    next_df = os.path.join(
                        subjob_out_dir,
                        "%s_of_%s.sim_df" % (indx, num_subjobs))
                    output = output.append(
                        pd.read_csv(next_df, index_col=False))
        return output
示例#10
0
    def start(self):
        self.split_time = time.time()
        self.start_time = time.time()

        self.heartbeat.start()
        self.worker_file = os.path.join(self.working_dir,
                                        "Worker_%s" % self.heartbeat.id)
        with open(self.worker_file, "w") as ofile:
            ofile.write("To terminate this Worker, simply delete this file.")

        self.data_file = os.path.join(self.working_dir,
                                      ".Worker_%s.dat" % self.heartbeat.id)
        open(self.data_file, "w").close()

        helpers.dummy_func()

        self.last_heartbeat_from_master = time.time()
        self.printer.write("Starting Worker_%s" % self.heartbeat.id)
        self.printer.new_line(1)

        idle_countdown = 1
        while os.path.isfile(self.worker_file):
            idle = round(100 * self.idle / (self.idle + self.running), 2)
            if not idle_countdown:
                self.printer.write("Idle %s%%" % idle)
                idle_countdown = 5

            # Make sure there are some masters still kicking around
            self.check_masters(idle)

            # Check for and clean up dead threads and orphaned jobs every twentieth(ish) time through
            rand_check = random()
            if rand_check > 0.95:
                self.clean_dead_threads()

            # Fetch a job from the queue
            data = self.fetch_queue_job()
            if data:
                full_name, psipred_dir, align_m, align_p, trimal, gap_open, gap_extend = data
                subjob_num, num_subjobs, id_hash = [1, 1, full_name] if len(full_name.split("_")) == 1 \
                    else full_name.split("_")
                subjob_num = int(subjob_num)
                num_subjobs = int(num_subjobs)
                self.printer.write("Running %s" % full_name)
            else:
                time.sleep(
                    random() * self.idle_workers()
                )  # Pause for some time relative to num idle workers
                idle_countdown -= 1
                self.idle += time.time() - self.split_time
                self.split_time = time.time()
                continue

            try:
                idle_countdown = 1
                seqbuddy = Sb.SeqBuddy("%s/%s.seqs" % (self.output, id_hash),
                                       in_format="fasta")

                # Prepare alignment
                if len(seqbuddy) == 1:
                    raise ValueError("Queued job of size 1 encountered: %s" %
                                     id_hash)
                else:
                    if num_subjobs == 1:
                        self.printer.write("Creating MSA (%s seqs)" %
                                           len(seqbuddy))
                        alignment = Alb.generate_msa(Sb.make_copy(seqbuddy),
                                                     align_m,
                                                     params=align_p,
                                                     quiet=True)
                    else:
                        self.printer.write("Reading MSA (%s seqs)" %
                                           len(seqbuddy))
                        alignment = Alb.AlignBuddy(
                            os.path.join(self.output, "%s.aln" % id_hash))

                # Prepare psipred dataframes
                psipred_dfs = self.prepare_psipred_dfs(seqbuddy, psipred_dir)

                if num_subjobs == 1:  # This is starting a full job from scratch, not a sub-job
                    # Need to specify what columns the PsiPred files map to now that there are gaps.
                    psipred_dfs = rdmcl.update_psipred(alignment, psipred_dfs,
                                                       "msa")

                    # TrimAl
                    self.printer.write("Trimal (%s seqs)" % len(seqbuddy))
                    alignment = rdmcl.trimal(seqbuddy, trimal, alignment)

                    with helpers.ExclusiveConnect(os.path.join(
                            self.output, "write.lock"),
                                                  max_lock=0):
                        # Place these write commands in ExclusiveConnect to ensure a writing lock
                        if not os.path.isfile(
                                os.path.join(self.output, "%s.aln" % id_hash)):
                            alignment.write(os.path.join(
                                self.output, "%s.aln" % id_hash),
                                            out_format="fasta")

                    # Re-update PsiPred files now that some columns, possibly including non-gap characters, are removed
                    self.printer.write("Updating %s psipred dataframes" %
                                       len(seqbuddy))
                    psipred_dfs = rdmcl.update_psipred(alignment, psipred_dfs,
                                                       "trimal")

                # Prepare all-by-all list
                self.printer.write("Preparing all-by-all data")
                data_len, data = rdmcl.prepare_all_by_all(
                    seqbuddy, psipred_dfs, self.cpus)

                if num_subjobs == 1 and data_len > self.cpus * self.job_size_coff:
                    data_len, data, subjob_num, num_subjobs = self.spawn_subjobs(
                        id_hash, data, psipred_dfs, gap_open, gap_extend)
                elif subjob_num > 1:
                    data_len, data = self.load_subjob(id_hash, subjob_num,
                                                      num_subjobs, psipred_dfs)

                # Launch multicore
                self.printer.write("Running all-by-all data (%s comparisons)" %
                                   data_len)
                with open(self.data_file, "w") as ofile:
                    ofile.write("seq1,seq2,subsmat,psi")

                br.run_multicore_function(data,
                                          rdmcl.mc_score_sequences,
                                          quiet=True,
                                          max_processes=self.cpus,
                                          func_args=[
                                              alignment, gap_open, gap_extend,
                                              self.data_file
                                          ])

                self.printer.write("Processing final results")
                self.process_final_results(id_hash, subjob_num, num_subjobs)

                self.running += time.time() - self.split_time
                self.split_time = time.time()

            except (OSError, FileNotFoundError, br.GuessError,
                    ValueError) as err:
                if num_subjobs == 1:
                    self.terminate(
                        "something wrong with primary cluster %s\n%s" %
                        (full_name, err))
                else:
                    with helpers.ExclusiveConnect(self.wrkdb_path) as cursor:
                        cursor.execute("DELETE FROM processing WHERE hash=?",
                                       (full_name, ))
                    continue

        # Broken out of while loop, clean up and terminate worker
        if os.path.isfile(self.data_file):
            os.remove(self.data_file)

        self.terminate("deleted check file")
示例#11
0
def main():
    in_args = argparse_init()

    workdb = os.path.join(in_args.workdb, "work_db.sqlite")
    heartbeatdb = os.path.join(in_args.workdb, "heartbeat_db.sqlite")

    connection = sqlite3.connect(workdb)
    cur = connection.cursor()
    for sql in [
            'CREATE TABLE queue (hash TEXT PRIMARY KEY, psi_pred_dir TEXT, '
            'align_m TEXT, align_p TEXT, trimal TEXT, gap_open FLOAT, gap_extend FLOAT)',
            'CREATE TABLE processing (hash TEXT PRIMARY KEY, worker_id INTEGER)',
            'CREATE TABLE complete   (hash TEXT PRIMARY KEY)',
            'CREATE TABLE proc_comp   (hash TEXT PRIMARY KEY, master_id INTEGER)',
            'CREATE TABLE waiting (hash TEXT, master_id INTEGER)'
    ]:
        try:
            cur.execute(sql)
        except sqlite3.OperationalError:
            pass

    cur.close()
    connection.close()

    connection = sqlite3.connect(heartbeatdb)
    cur = connection.cursor()
    try:
        cur.execute(
            'CREATE TABLE heartbeat (thread_id INTEGER PRIMARY KEY AUTOINCREMENT, '
            'thread_type TEXT, pulse INTEGER)')
    except sqlite3.OperationalError:
        pass

    cur.close()
    connection.close()

    wrkr = Worker(in_args.workdb,
                  heartrate=in_args.heart_rate,
                  max_wait=in_args.max_wait,
                  dead_thread_wait=in_args.dead_thread_wait,
                  cpus=in_args.max_cpus,
                  job_size_coff=in_args.job_size,
                  log=in_args.log,
                  quiet=in_args.quiet)
    valve = br.SafetyValve(5)
    while True:  # The only way out is through Worker.terminate
        try:
            valve.step("Too many Worker crashes detected.")
            wrkr.start()

        except KeyboardInterrupt:
            wrkr.terminate("KeyboardInterrupt")

        except Exception as err:
            with helpers.ExclusiveConnect(wrkr.wrkdb_path) as cursor:
                cursor.execute("DELETE FROM processing WHERE worker_id=?",
                               (wrkr.heartbeat.id, ))

            if "Too many Worker crashes detected" in str(err):
                wrkr.terminate("too many Worker crashes")

            tb = "%s: %s\n\n" % (type(err).__name__, err)
            for _line in traceback.format_tb(sys.exc_info()[2]):
                if os.name == "nt":
                    _line = re.sub(
                        '"(?:[A-Za-z]:)*\{0}.*\{0}(.*)?"'.format(os.sep),
                        r'"\1"', _line)
                else:
                    _line = re.sub('"{0}.*{0}(.*)?"'.format(os.sep), r'"\1"',
                                   _line)
                tb += _line
            print("\nWorker_%s crashed!\n" % wrkr.heartbeat.id, tb)
示例#12
0
    def clean_dead_threads(self):
        with helpers.ExclusiveConnect(self.hbdb_path) as cursor:
            wait_time = time.time() - self.dead_thread_wait - cursor.lag
            dead_masters = cursor.execute(
                "SELECT thread_id FROM heartbeat WHERE thread_type='master' "
                "AND pulse < ?", (wait_time, )).fetchall()
            if dead_masters:
                dead_masters = ", ".join([str(x[0]) for x in dead_masters])
                cursor.execute(
                    "DELETE FROM heartbeat WHERE thread_id IN (%s)" %
                    dead_masters)
            else:
                dead_masters = ""

            master_ids = cursor.execute(
                "SELECT thread_id FROM heartbeat WHERE thread_type='master'"
            ).fetchall()
            master_ids = "" if not master_ids else ", ".join(
                [str(x[0]) for x in master_ids])

            dead_workers = cursor.execute(
                "SELECT * FROM heartbeat WHERE thread_type='worker' "
                "AND pulse < ?", (wait_time, )).fetchall()
            if dead_workers:
                dead_workers = ", ".join([str(x[0]) for x in dead_workers])
                cursor.execute(
                    "DELETE FROM heartbeat WHERE thread_id IN (%s)" %
                    dead_workers)

        with helpers.ExclusiveConnect(self.wrkdb_path, max_lock=120) as cursor:
            # Remove any jobs in the 'processing' table where the worker is dead
            if dead_workers:
                cursor.execute(
                    "DELETE FROM processing WHERE worker_id IN (%s)" %
                    dead_workers)
            # Add master ids from orphaned entries to the dead masters list
            orphans = cursor.execute("SELECT master_id FROM waiting "
                                     "WHERE master_id NOT IN (%s)" %
                                     master_ids).fetchall()
            orphans += cursor.execute("SELECT master_id FROM proc_comp "
                                      "WHERE master_id NOT IN (%s)" %
                                      master_ids).fetchall()
            if orphans:
                orphans = ", ".join([str(x[0]) for x in orphans])
                dead_masters += ", %s" % orphans if dead_masters else orphans

            cursor.execute("DELETE FROM waiting WHERE master_id IN (%s)" %
                           dead_masters)
            cursor.execute("DELETE FROM proc_comp WHERE master_id IN (%s)" %
                           dead_masters)
            waiting_hashes = cursor.execute(
                "SELECT hash FROM waiting").fetchall()
            waiting_hashes = [x[0] for x in waiting_hashes]
            dead_hashes = []
            all_hashes = cursor.execute("SELECT hash FROM queue").fetchall()
            all_hashes += cursor.execute(
                "SELECT hash FROM processing").fetchall()
            all_hashes += cursor.execute(
                "SELECT hash FROM complete").fetchall()
            all_hashes = [x[0] for x in all_hashes]
            for next_hash in all_hashes:
                if next_hash.split("_")[-1] not in waiting_hashes:
                    dead_hashes.append(next_hash)
            if dead_hashes:
                dead_hashes = set(dead_hashes)
                for id_hash in dead_hashes:
                    for del_file in [
                            "%s.%s" % (id_hash, x)
                            for x in ["graph", "aln", "seqs"]
                    ]:
                        try:
                            os.remove(os.path.join(self.output, del_file))
                        except FileNotFoundError:
                            pass
                        shutil.rmtree(os.path.join(self.output, id_hash),
                                      ignore_errors=True)
                dead_hashes = "'%s'" % "', '".join(dead_hashes)
                cursor.execute("DELETE FROM queue WHERE hash IN (%s)" %
                               dead_hashes)
                cursor.execute("DELETE FROM complete WHERE hash IN (%s)" %
                               dead_hashes)
        return