Пример #1
0
    def __init__(self, out_store=None, overrides=Namespace()):
        """
        Make a new context, so we can run the library.
        
        Takes an optional Namespace of overrides for default toil-vg and Toil
        configuration values.
        
        Overrides can also have a "config" key, in which case that config file
        will be loaded.
        
        """

        # Load configuration and apply overrides. If the overrides are from
        # command-line options, we might also get a bunch of tool-specific
        # fields.
        self.config = apply_config_file_args(overrides)

        # Make a container runner for running tools
        self.runner = ContainerRunner(
            container_tool_map=get_container_tool_map(self.config),
            realtime_stderr=self.config.realTimeStderr)

        if out_store is not None:
            # We want to dump files to an IOStore
            # Make it an absolute path while we're getting set up.
            self.out_store_string = IOStore.absolute(out_store)
        else:
            # We don't want to use an out store
            self.out_store_string = None
Пример #2
0
    def get_out_store(self):
        """
        Return the IOStore to write output files to, or None if they should not
        be written anywhere besides the Toil file store.
        """

        if self.out_store_string is not None:
            return IOStore.get(self.out_store_string)
        else:
            return None
Пример #3
0
def copy_everything(job, options):
    """
    Download the file list and copy all the files.
    
    """

    # Set up the IO stores.
    in_store = IOStore.get(options.in_store)
    out_store = IOStore.get(options.out_store)

    batch_count = 0

    # List all the files.
    blobs_iterator = in_store.list_input_directory("", recursive=True)

    # Make an iterator that filters them
    filtered_iterator = (x for x in blobs_iterator
                         if fnmatch.fnmatchcase(x, options.pattern))

    # Batch them up
    for batch in group(filtered_iterator, options.batch_size):

        # For every batch, strip out any Nones that got put in when grouping
        batch = [x for x in batch if x is not None]

        # Copy everything in that batch
        job.addChildJobFn(copy_batch,
                          options,
                          batch,
                          cores=1,
                          memory="1G",
                          disk="10G")

        batch_count += 1

        if batch_count % 10 == 0:

            RealtimeLogger.info("Queued {} batches...".format(batch_count))

    RealtimeLogger.info("Queued {} total batches".format(batch_count))
Пример #4
0
    def _assertOutput(self, sample_name, outstore, f1_threshold=0.90):

        # grab the f1.txt file
        local_f1 = os.path.join(self.workdir, 'f1.txt')
        io_store = IOStore.get(outstore)
        f1_path = 'vcfeval_output_f1.txt'
        if sample_name:
            f1_path = sample_name + '_' + f1_path
        io_store.read_input_file(f1_path, local_f1)

        with open(local_f1) as f1_file:
            f1_score = float(f1_file.readline().strip())
        print(f1_score)
        self.assertGreaterEqual(f1_score, f1_threshold)
Пример #5
0
def copy_batch(job, options, batch):
    """
    Copy a batch of files from input to output.
    """

    RealtimeLogger.info("Copying a batch")

    # Set up the IO stores.
    in_store = IOStore.get(options.in_store)
    out_store = IOStore.get(options.out_store)

    # Start some threads
    pool = ThreadPool(10)

    def download(filename):
        """
        Download each file
        """

        try:

            if (not options.overwrite) and out_store.exists(filename):
                # File exists. But make sure its size is correct.

                if not options.check_size:
                    # Skip existing file. No need to check the length.
                    RealtimeLogger.info("Skipped {}".format(filename))
                    return

                out_size = out_store.get_size(filename)
                in_size = in_store.get_size(filename)
                if out_size != in_size:
                    # Complain about size mismatch and copy
                    RealtimeLogger.warning(
                        "Redownloading {}! Size was {} and not {}!".format(
                            filename, out_size, in_size))
                else:
                    # Skip existing file
                    RealtimeLogger.info("Skipped {}".format(filename))
                    return

            # Make a temp file
            (handle,
             path) = tempfile.mkstemp(dir=job.fileStore.getLocalTempDir())
            os.close(handle)

            RealtimeLogger.debug("Download {}".format(filename))

            # Download
            in_store.read_input_file(filename, path)
            # Store
            out_store.write_output_file(path, filename)

            # Clean up
            os.unlink(path)

        except:
            # Put all exception text into an exception and raise that
            raise Exception("".join(
                traceback.format_exception(*sys.exc_info())))

        RealtimeLogger.info("Copied {}".format(filename))

    # Run all the downloads in parallel
    pool.map(download, batch)