示例#1
0
    def test_job_complete(self):
        """check if a job is complete"""
        t = mergeorder(['A', 'B', 'C', 'D', 'E'], 'foo')
        self.assertFalse(job_complete(t))
        self.assertFalse(job_complete(t.Children[0]))
        self.assertFalse(job_complete(t.Children[1].Children[1]))

        self.assertRaises(JobError, job_complete, t.Children[0].Children[0])

        f = 'test_parallel_merge_otus_JOB_COMPLETE_TEST.poll'
        self.assertFalse(os.path.exists(f))

        testf = open(f, 'w')
        testf.write('0\n')
        testf.close()
        t.PollPath = f
        t.StartTime = 10

        self.assertTrue(job_complete(t))
        self.assertNotEqual(t.EndTime, None)
        self.assertNotEqual(t.TotalTime, None)

        testf = open(f, 'w')
        testf.write('1\n')
        testf.close()

        self.assertRaises(JobError, job_complete, t)
        t.Processed = False
        self.assertRaises(JobError, job_complete, t)

        os.remove(f)
示例#2
0
 def test_job_complete(self):
     """check if a job is complete"""
     t = mergeorder(['A','B','C','D','E'],'foo')
     self.assertFalse(job_complete(t))
     self.assertFalse(job_complete(t.Children[0]))
     self.assertFalse(job_complete(t.Children[1].Children[1]))
     
     self.assertRaises(JobError, job_complete, t.Children[0].Children[0])
 
     f = 'test_parallel_merge_otus_JOB_COMPLETE_TEST.poll'
     self.assertFalse(os.path.exists(f))
     
     testf = open(f,'w')
     testf.write('0\n')
     testf.close()
     t.PollPath = f
     t.StartTime = 10
     
     self.assertTrue(job_complete(t))
     self.assertNotEqual(t.EndTime, None)
     self.assertNotEqual(t.TotalTime, None)
     
     testf = open(f,'w')
     testf.write('1\n')
     testf.close()
     
     self.assertRaises(JobError, job_complete, t)
     t.Processed = False
     self.assertRaises(JobError, job_complete, t)
     
     os.remove(f)
示例#3
0
    def test_job_complete(self):
        """check if a job is complete"""
        t = mergeorder(["A", "B", "C", "D", "E"], "foo")
        self.assertFalse(job_complete(t))
        self.assertFalse(job_complete(t.Children[0]))
        self.assertFalse(job_complete(t.Children[1].Children[1]))

        self.assertRaises(JobError, job_complete, t.Children[0].Children[0])

        f = "test_parallel_merge_otus_JOB_COMPLETE_TEST.poll"
        self.assertFalse(os.path.exists(f))

        testf = open(f, "w")
        testf.write("0\n")
        testf.close()
        t.PollPath = f
        t.StartTime = 10

        self.assertTrue(job_complete(t))
        self.assertNotEqual(t.EndTime, None)
        self.assertNotEqual(t.TotalTime, None)

        testf = open(f, "w")
        testf.write("1\n")
        testf.close()

        self.assertRaises(JobError, job_complete, t)
        t.Processed = False
        self.assertRaises(JobError, job_complete, t)

        os.remove(f)
示例#4
0
def main():
    option_parser, opts, args =\
        parse_command_line_parameters(**script_info)

    input_fps = opts.input_fps
    output_dir = opts.output_dir
    seconds_to_sleep = opts.seconds_to_sleep
    verbose = opts.verbose

    merge_otus_serial_script = 'merge_otu_tables.py'
    created_temp_paths = []

    # set the job_prefix either based on what the user passed in,
    # or a random string beginning with MOTU
    job_prefix = opts.job_prefix or get_random_job_prefix('MOTU')

    # A temporary output directory is created in output_dir named
    # job_prefix. Output files are then moved from the temporary
    # directory to the output directory when they are complete, allowing
    # a poller to detect when runs complete by the presence of their
    # output files.
    working_dir = '%s/%s' % (output_dir, job_prefix)
    try:
        makedirs(working_dir)
    except OSError:
        # working dir already exists
        pass

    import os.path
    # wrapper log output contains run details
    log_fp = os.path.join(output_dir, 'parallel_merge_otus.log')

    wrapper_log_output = open(log_fp, 'w')
    wrapper_log_output.write("Parallel merge output\n\n")

    # construct the dependency tree
    import os

    for f in input_fps:
        if not os.path.exists(f):
            raise IOError("%f does not exist!" % f)

    tree = mergeorder(input_fps, working_dir)

    if verbose:
        print tree.asciiArt()

    wrapper_log_output.write('Dependency tree:\n')
    wrapper_log_output.write(tree.asciiArt())
    wrapper_log_output.write('\n\n')
    wrapper_log_output.flush()

    to_process = initial_nodes_to_merge(tree)
    has_dependencies = initial_has_dependencies(tree, to_process)

    # loop until the whole shabang is done
    pending = []  # jobs that are currently running

    while not tree.Processed:
        # check if we have nodes to process, if so, shoot them off
        for node in to_process:
            if opts.cluster:
                start_job(node,
                          merge_otus_serial_script,
                          qiime_config['torque_queue'],
                          wrap_call=torque_job)
            else:
                start_job(node,
                          merge_otus_serial_script,
                          qiime_config['torque_queue'],
                          wrap_call=local_job)

            wrapper_log_output.write(node.FullCommand)
            wrapper_log_output.write('\n')
            wrapper_log_output.flush()

            pending.append(node)
        to_process = set([])

        # check running jobs
        current_pending = []
        for pending_node in pending:
            # if we're complete, update state
            if job_complete(pending_node):
                wrapper_log_output.write(
                    "Node %s completed in %f seconds" %
                    (pending_node.Name, pending_node.TotalTime))
                wrapper_log_output.write('\n')
                wrapper_log_output.flush()
            else:
                current_pending.append(pending_node)
        pending = current_pending

        # check for new jobs to add
        current_dependencies = []
        for dep_node in has_dependencies:
            # if children are satisfied, then allow for processing
            # the logic here is odd to handle the case where an internal node
            # has both a tip that is a child and child that is an internal node
            children_are_complete = [(c.Processed or c.istip())
                                     for c in dep_node.Children]
            if all(children_are_complete):
                to_process.add(dep_node)
            else:
                current_dependencies.append(dep_node)
        has_dependencies = current_dependencies

        sleep(seconds_to_sleep)
    os.rename(tree.FilePath, "%s/%s" % (output_dir, "merged.biom"))
def main():
    option_parser, opts, args =\
        parse_command_line_parameters(**script_info)

    input_fps = opts.input_fps
    output_dir = opts.output_dir
    seconds_to_sleep = opts.seconds_to_sleep
    verbose = opts.verbose

    merge_otus_serial_script = 'merge_otu_tables.py'
    created_temp_paths = []

    # set the job_prefix either based on what the user passed in,
    # or a random string beginning with MOTU
    job_prefix = opts.job_prefix or get_random_job_prefix('MOTU')

    # A temporary output directory is created in output_dir named
    # job_prefix. Output files are then moved from the temporary
    # directory to the output directory when they are complete, allowing
    # a poller to detect when runs complete by the presence of their
    # output files.
    working_dir = '%s/%s' % (output_dir, job_prefix)
    try:
        makedirs(working_dir)
    except OSError:
    # working dir already exists
        pass

    import os.path
    # wrapper log output contains run details
    log_fp = os.path.join(output_dir, 'parallel_merge_otus.log')

    wrapper_log_output = open(log_fp, 'w')
    wrapper_log_output.write("Parallel merge output\n\n")

    # construct the dependency tree
    import os

    for f in input_fps:
        if not os.path.exists(f):
            raise IOError("%f does not exist!" % f)

    tree = mergeorder(input_fps, working_dir)

    if verbose:
        print tree.asciiArt()

    wrapper_log_output.write('Dependency tree:\n')
    wrapper_log_output.write(tree.asciiArt())
    wrapper_log_output.write('\n\n')
    wrapper_log_output.flush()

    to_process = initial_nodes_to_merge(tree)
    has_dependencies = initial_has_dependencies(tree, to_process)

    # loop until the whole shabang is done
    pending = []  # jobs that are currently running

    while not tree.Processed:
        # check if we have nodes to process, if so, shoot them off
        for node in to_process:
            if opts.cluster:
                start_job(node, merge_otus_serial_script,
                          qiime_config['torque_queue'], wrap_call=torque_job)
            else:
                start_job(node, merge_otus_serial_script,
                          qiime_config['torque_queue'], wrap_call=local_job)

            wrapper_log_output.write(node.FullCommand)
            wrapper_log_output.write('\n')
            wrapper_log_output.flush()

            pending.append(node)
        to_process = set([])

        # check running jobs
        current_pending = []
        for pending_node in pending:
            # if we're complete, update state
            if job_complete(pending_node):
                wrapper_log_output.write("Node %s completed in %f seconds" %
                                         (pending_node.Name, pending_node.TotalTime))
                wrapper_log_output.write('\n')
                wrapper_log_output.flush()
            else:
                current_pending.append(pending_node)
        pending = current_pending

        # check for new jobs to add
        current_dependencies = []
        for dep_node in has_dependencies:
            # if children are satisfied, then allow for processing
            # the logic here is odd to handle the case where an internal node
            # has both a tip that is a child and child that is an internal node
            children_are_complete = [(c.Processed or c.istip())
                                     for c in dep_node.Children]
            if all(children_are_complete):
                to_process.add(dep_node)
            else:
                current_dependencies.append(dep_node)
        has_dependencies = current_dependencies

        sleep(seconds_to_sleep)
    os.rename(tree.FilePath, "%s/%s" % (output_dir, "merged.biom"))