示例#1
0
 def validate_import_bam(t, bam_path, fasta_sequences, genome):
     validate_bam_fasta_pairs(bam_path, fasta_sequences, genome)
     return [
         FileID.forPath(t.importFile('file://' + bam_path), bam_path),
         FileID.forPath(t.importFile('file://' + bam_path + '.bai'),
                        bam_path + '.bai')
     ]
示例#2
0
def align_transcripts(args, toil_options):
    """
    Main entry function for transcript alignment toil pipeline
    :param args: dictionary of arguments from CAT
    :param toil_options: toil options Namespace object
    """
    with Toil(toil_options) as t:
        if not t.options.restart:
            input_file_ids = argparse.Namespace()
            input_file_ids.ref_genome_fasta = tools.toilInterface.write_fasta_to_filestore(t, args.ref_genome_fasta)
            input_file_ids.genome_fasta = tools.toilInterface.write_fasta_to_filestore(t, args.genome_fasta)
            input_file_ids.annotation_gp = FileID.forPath(t.importFile('file://' + args.annotation_gp),
                                                          args.annotation_gp)
            input_file_ids.ref_db = FileID.forPath(t.importFile('file://' + args.ref_db_path), args.ref_db_path)
            input_file_ids.modes = {}
            file_ids = [input_file_ids.ref_genome_fasta, input_file_ids.genome_fasta, input_file_ids.annotation_gp,
                        input_file_ids.ref_db]
            for mode in args.transcript_modes:
                input_file_ids.modes[mode] = t.importFile('file://' + args.transcript_modes[mode]['gp'])
                file_ids.append(input_file_ids.modes[mode])
            disk_usage = tools.toilInterface.find_total_disk_usage(file_ids)
            job = Job.wrapJobFn(setup, args, input_file_ids, memory='16G', disk=disk_usage)
            results_file_ids = t.start(job)
        else:
            results_file_ids = t.restart()
        for file_path, file_id in results_file_ids.items():
            tools.fileOps.ensure_file_dir(file_path)
            t.exportFile(file_id, 'file://' + file_path)
def augustus_pb(args, toil_options):
    """
    Main entry function for AugustusPB toil pipeline
    :param args: dictionary of arguments from CAT
    :param toil_options: toil options Namespace object
    :return:
    """
    with Toil(toil_options) as t:
        if not t.options.restart:
            input_file_ids = argparse.Namespace()
            input_file_ids.genome_fasta = tools.toilInterface.write_fasta_to_filestore(
                t, args.genome_fasta)
            input_file_ids.chrom_sizes = FileID.forPath(
                t.importFile('file://' + args.chrom_sizes), args.chrom_sizes)
            input_file_ids.pb_cfg = FileID.forPath(
                t.importFile('file://' + args.pb_cfg), args.pb_cfg)
            input_file_ids.hints_gff = FileID.forPath(
                t.importFile('file://' + args.hints_gff), args.hints_gff)
            job = Job.wrapJobFn(setup,
                                args,
                                input_file_ids,
                                memory='16G',
                                disk='32G')
            raw_gtf_file_id, gtf_file_id, joined_gp_file_id = t.start(job)
        else:
            raw_gtf_file_id, gtf_file_id, joined_gp_file_id = t.restart()
        tools.fileOps.ensure_file_dir(args.augustus_pb_raw_gtf)
        t.exportFile(raw_gtf_file_id, 'file://' + args.augustus_pb_raw_gtf)
        t.exportFile(gtf_file_id, 'file://' + args.augustus_pb_gtf)
        t.exportFile(joined_gp_file_id, 'file://' + args.augustus_pb_gp)
示例#4
0
def hints_db(hints_args, toil_options):
    """
    Entry point for hints database Toil pipeline.
    """
    def validate_import_bam(t, bam_path, fasta_sequences, genome):
        validate_bam_fasta_pairs(bam_path, fasta_sequences, genome)
        return [FileID.forPath(t.importFile('file://' + bam_path), bam_path),
                FileID.forPath(t.importFile('file://' + bam_path + '.bai'), bam_path + '.bai')]

    fasta = pyfasta.Fasta(hints_args.fasta)
    fasta_sequences = {(x.split()[0], len(fasta[x])) for x in fasta.keys()}
    with Toil(toil_options) as t:
        if not t.options.restart:
            # load the RNA-seq data, if we have any
            bam_file_ids = {'BAM': {}, 'INTRONBAM': {}}
            for dtype in ['BAM', 'INTRONBAM']:
                if hints_args.genome not in hints_args.cfg[dtype]:
                    continue
                for bam_path in hints_args.cfg[dtype][hints_args.genome]:
                    bam_file_ids[dtype][os.path.basename(bam_path)] = validate_import_bam(t, bam_path,
                                                                                          fasta_sequences,
                                                                                          hints_args.genome)

            # load the IsoSeq data, if we have any
            iso_seq_file_ids = []
            if hints_args.genome in hints_args.cfg['ISO_SEQ_BAM']:
                for bam_path in hints_args.cfg['ISO_SEQ_BAM'][hints_args.genome]:
                    validate_bam_fasta_pairs(bam_path, fasta_sequences, hints_args.genome)
                    iso_seq_file_ids.append(validate_import_bam(t, bam_path, fasta_sequences, hints_args.genome))

            if hints_args.annotation_gp is None:
                annotation_file_id = None
            else:
                annotation_file_id = FileID.forPath(t.importFile('file://' + hints_args.annotation_gp),
                                                    hints_args.annotation_gp)
            if hints_args.protein_fasta is None:
                protein_fasta_file_id = genome_fasta_file_id = None
            else:
                protein_fasta_file_id = FileID.forPath(t.importFile('file://' + hints_args.protein_fasta),
                                                       hints_args.protein_fasta)
                genome_fasta_file_id = FileID.forPath(t.importFile('file://' + hints_args.fasta), hints_args.fasta)

            input_file_ids = {'bams': bam_file_ids,
                              'iso_seq_bams': iso_seq_file_ids,
                              'annotation': annotation_file_id,
                              'protein_fasta': protein_fasta_file_id,
                              'genome_fasta': genome_fasta_file_id}
            if len(input_file_ids['bams']) + len(input_file_ids['iso_seq_bams']) > 0:
                logger.info('All BAMs validated for {}. Beginning Toil hints pipeline'.format(hints_args.genome))

            disk_usage = tools.toilInterface.find_total_disk_usage(input_file_ids)
            job = Job.wrapJobFn(setup_hints, input_file_ids, disk=disk_usage)
            combined_hints = t.start(job)
        else:
            logger.info('Restarting Toil hints pipeline for {}.'.format(hints_args.genome))
            combined_hints = t.restart()
        tools.fileOps.ensure_file_dir(hints_args.hints_path)
        t.exportFile(combined_hints, 'file://' + hints_args.hints_path)
def write_fasta_to_filestore(toil, fasta_local_path):
    """
    Convenience function that loads a fasta and its associated gdx/flat file into the fileStore.
    Assumes that the paths are consistent with the requirements (i.e. $path.gdx and $path.flat)
    :param toil: Toil context manager
    :param fasta_local_path: Path to local fasta to load.
    :return: List of fileStore IDs for fasta, fasta_gdx, fasta_flat
    """
    fasta_file_id = FileID.forPath(toil.importFile('file:///' + fasta_local_path), fasta_local_path)
    gdx_file_id = FileID.forPath(toil.importFile('file:///' + fasta_local_path + '.gdx'), fasta_local_path + '.gdx')
    flat_file_id = FileID.forPath(toil.importFile('file:///' + fasta_local_path + '.flat'), fasta_local_path + '.flat')
    return fasta_file_id, gdx_file_id, flat_file_id
示例#6
0
 def writeGlobalFile(self, localFileName, cleanup=False):
     absLocalFileName = self._resolveAbsoluteLocalPath(localFileName)
     creatorID = self.jobGraph.jobStoreID
     fileStoreID = self.jobStore.writeFile(absLocalFileName, creatorID,
                                           cleanup)
     self.localFileMap[fileStoreID].append(absLocalFileName)
     return FileID.forPath(fileStoreID, absLocalFileName)
示例#7
0
    def writeGlobalFileStream(self, cleanup=False):
        """
        Similar to writeGlobalFile, but allows the writing of a stream to the job store.
        The yielded file handle does not need to and should not be closed explicitly.

        :param bool cleanup: is as in :func:`toil.fileStores.abstractFileStore.AbstractFileStore.writeGlobalFile`.
        :return: A context manager yielding a tuple of
                  1) a file handle which can be written to and
                  2) the toil.fileStores.FileID of the resulting file in the job store.
        """
        
        # TODO: Make this work with FileID
        with self.jobStore.writeFileStream(None if not cleanup else self.jobGraph.jobStoreID) as (backingStream, fileStoreID):
            # We have a string version of the file ID, and the backing stream.
            # We need to yield a stream the caller can write to, and a FileID
            # that accurately reflects the size of the data written to the
            # stream. We assume the stream is not seekable.
            
            # Make and keep a reference to the file ID, which is currently empty
            fileID = FileID(fileStoreID, 0)
            
            # Wrap the stream to increment the file ID's size for each byte written
            wrappedStream = WriteWatchingStream(backingStream)
            
            # When the stream is written to, count the bytes
            def handle(numBytes):
                fileID.size += numBytes 
            wrappedStream.onWrite(handle)
            
            yield wrappedStream, fileID
示例#8
0
 def _importFile(self,
                 otherCls,
                 url,
                 sharedFileName=None,
                 hardlink=False,
                 symlink=False):
     if issubclass(otherCls, FileJobStore):
         if sharedFileName is None:
             executable = os.stat(url.path).st_mode & stat.S_IXUSR != 0
             absPath = self._getUniqueFilePath(
                 url.path
             )  # use this to get a valid path to write to in job store
             with self.optionalHardCopy(hardlink):
                 self._copyOrLink(url, absPath, symlink=symlink)
             # TODO: os.stat(absPath).st_size consistently gives values lower than
             # getDirSizeRecursively()
             return FileID(self._getFileIdFromPath(absPath),
                           os.stat(absPath).st_size, executable)
         else:
             self._requireValidSharedFileName(sharedFileName)
             path = self._getSharedFilePath(sharedFileName)
             with self.optionalHardCopy(hardlink):
                 self._copyOrLink(url, path, symlink=symlink)
             return None
     else:
         return super(FileJobStore,
                      self)._importFile(otherCls,
                                        url,
                                        sharedFileName=sharedFileName)
示例#9
0
    def _importFile(self, otherCls, url, sharedFileName=None, hardlink=False):
        """
        Import the file at the given URL using the given job store class to retrieve that file.
        See also :meth:`.importFile`. This method applies a generic approach to importing: it
        asks the other job store class for a stream and writes that stream as either a regular or
        a shared file.

        :param AbstractJobStore otherCls: The concrete subclass of AbstractJobStore that supports
               reading from the given URL and getting the file size from the URL.

        :param urlparse.ParseResult url: The location of the file to import.

        :param str sharedFileName: Optional name to assign to the imported file within the job store

        :return The jobStoreFileId of imported file or None if sharedFileName was given
        :rtype: toil.fileStores.FileID or None
        """
        if sharedFileName is None:
            with self.writeFileStream() as (writable, jobStoreFileID):
                size = otherCls._readFromUrl(url, writable)
                return FileID(jobStoreFileID, size)
        else:
            self._requireValidSharedFileName(sharedFileName)
            with self.writeSharedFileStream(sharedFileName) as writable:
                otherCls._readFromUrl(url, writable)
                return None
示例#10
0
 def writeGlobalFile(self, localFileName, cleanup=False):
     absLocalFileName = self._resolveAbsoluteLocalPath(localFileName)
     creatorID = self.jobDesc.jobStoreID
     fileStoreID = self.jobStore.writeFile(absLocalFileName, creatorID, cleanup)
     if absLocalFileName.startswith(self.localTempDir):
         # Only files in the appropriate directory should become local files
         # we can delete with deleteLocalFile
         self.localFileMap[fileStoreID].append(absLocalFileName)
     return FileID.forPath(fileStoreID, absLocalFileName)
def chaining(args, toil_options):
    """entry point to this program"""
    with Toil(toil_options) as t:
        if not t.options.restart:
            input_file_ids = argparse.Namespace()
            input_file_ids.hal = FileID.forPath(t.importFile('file://' + args.hal), args.hal)
            input_file_ids.query_sizes = FileID.forPath(t.importFile('file://' + args.query_sizes), args.query_sizes)
            input_file_ids.query_two_bit = FileID.forPath(t.importFile('file://' + args.query_two_bit),
                                                          args.query_two_bit)
            target_two_bit_file_ids = {genome: FileID.forPath(t.importFile('file://' + f), f)
                                       for genome, f in args.target_two_bits.iteritems()}
            input_file_ids.target_two_bits = target_two_bit_file_ids
            job = Job.wrapJobFn(setup, args, input_file_ids)
            chain_file_ids = t.start(job)
        else:
            chain_file_ids = t.restart()
        for chain_file, chain_file_id in chain_file_ids.iteritems():
            tools.fileOps.ensure_file_dir(chain_file)
            t.exportFile(chain_file_id, 'file://' + chain_file)
示例#12
0
    def writeGlobalFileStream(
        self,
        cleanup: bool = False,
        basename: Optional[str] = None,
        encoding: Optional[str] = None,
        errors: Optional[str] = None
    ) -> Iterator[Tuple[Union[BinaryIO, TextIO], FileID]]:
        """
        Similar to writeGlobalFile, but allows the writing of a stream to the job store.
        The yielded file handle does not need to and should not be closed explicitly.

        :param encoding: The name of the encoding used to decode the file. Encodings are the same as
                for decode(). Defaults to None which represents binary mode.

        :param errors: Specifies how encoding errors are to be handled. Errors
                are the same as for open(). Defaults to 'strict' when an encoding is specified.

        :param cleanup: is as in :func:`toil.fileStores.abstractFileStore.AbstractFileStore.writeGlobalFile`.

        :param basename: If supported by the backing JobStore, use the given
               file basename so that when searching the job store with a query
               matching that basename, the file will be detected.

        :return: A context manager yielding a tuple of
                  1) a file handle which can be written to and
                  2) the toil.fileStores.FileID of the resulting file in the job store.
        """

        with self.jobStore.writeFileStream(self.jobDesc.jobStoreID, cleanup,
                                           basename, encoding,
                                           errors) as (backingStream,
                                                       fileStoreID):

            # We have a string version of the file ID, and the backing stream.
            # We need to yield a stream the caller can write to, and a FileID
            # that accurately reflects the size of the data written to the
            # stream. We assume the stream is not seekable.

            # Make and keep a reference to the file ID, which is currently empty
            fileID = FileID(fileStoreID, 0)

            # Wrap the stream to increment the file ID's size for each byte written
            wrappedStream = WriteWatchingStream(backingStream)

            # When the stream is written to, count the bytes
            def handle(numBytes: int) -> None:
                # No scope problem here, because we don't assign to a fileID local
                fileID.size += numBytes

            wrappedStream.onWrite(handle)

            yield wrappedStream, fileID
示例#13
0
文件: utils.py 项目: mr-c/toil
def download_structure(
    file_store: AbstractFileStore,
    index: Dict[str, str],
    existing: Dict[str, str],
    dir_dict: DirectoryStructure,
    into_dir: str,
) -> None:
    """
    Download a whole nested dictionary of files and directories from the
    Toil file store to a local path.

    :param file_store: The Toil file store to download from.

    :param index: Maps from downloaded file path back to input Toil URI.

    :param existing: Maps from file_store_id URI to downloaded file path.

    :param dir_dict: a dict from string to string (for files) or dict (for
    subdirectories) describing a directory structure.

    :param into_dir: The directory to download the top-level dict's files
    into.
    """

    logger.debug("Downloading directory with %s items", len(dir_dict))

    for name, value in dir_dict.items():
        if name == ".":
            # Skip this key that isn't a real child file.
            continue
        if isinstance(value, dict):
            # This is a subdirectory, so make it and download
            # its contents
            logger.debug("Downloading subdirectory %s", name)
            subdir = os.path.join(into_dir, name)
            os.mkdir(subdir)
            download_structure(file_store, index, existing, value, subdir)
        else:
            # This must be a file path uploaded to Toil.
            assert isinstance(value, str)
            assert value.startswith("toilfile:")
            logger.debug("Downloading contained file %s", name)
            dest_path = os.path.join(into_dir, name)
            # So download the file into place
            file_store.readGlobalFile(
                FileID.unpack(value[len("toilfile:") :]), dest_path, symlink=True
            )
            # Update the index dicts
            # TODO: why?
            index[dest_path] = value
            existing[value] = dest_path
def augustus(args, coding_gp, toil_options):
    """
    Main entry function for Augustus toil pipeline
    :param args: dictionary of arguments from CAT
    :param coding_gp: genePred with only coding transcripts
    :param toil_options: toil options Namespace object
    """
    with Toil(toil_options) as t:
        if not t.options.restart:
            input_file_ids = argparse.Namespace()
            input_file_ids.genome_fasta = tools.toilInterface.write_fasta_to_filestore(
                t, args.genome_fasta)
            input_file_ids.tm_cfg = FileID.forPath(
                t.importFile('file://' + args.tm_cfg), args.tm_cfg)
            input_file_ids.coding_gp = FileID.forPath(
                t.importFile('file://' + coding_gp), coding_gp)
            input_file_ids.ref_psl = FileID.forPath(
                t.importFile('file://' + args.ref_psl), args.ref_psl)
            input_file_ids.tm_psl = FileID.forPath(
                t.importFile('file://' + args.filtered_tm_psl),
                args.filtered_tm_psl)
            input_file_ids.annotation_gp = FileID.forPath(
                t.importFile('file://' + args.annotation_gp),
                args.annotation_gp)
            file_ids = [
                input_file_ids.genome_fasta, input_file_ids.coding_gp,
                input_file_ids.ref_psl, input_file_ids.tm_psl,
                input_file_ids.annotation_gp
            ]
            if args.augustus_tmr:
                input_file_ids.augustus_hints_db = FileID.forPath(
                    t.importFile('file://' + args.augustus_hints_db),
                    args.augustus_hints_db)
                input_file_ids.tmr_cfg = FileID.forPath(
                    t.importFile('file://' + args.tmr_cfg), args.tmr_cfg)
                file_ids.append(args.augustus_hints_db)
            disk_usage = tools.toilInterface.find_total_disk_usage(file_ids)
            job = Job.wrapJobFn(setup,
                                args,
                                input_file_ids,
                                disk_usage,
                                disk=disk_usage)
            tm_file_id, tmr_file_id = t.start(job)
        else:
            tm_file_id, tmr_file_id = t.restart()
        tools.fileOps.ensure_file_dir(args.augustus_tm_gtf)
        t.exportFile(tm_file_id, 'file://' + args.augustus_tm_gtf)
        if tmr_file_id is not None:
            tools.fileOps.ensure_file_dir(args.augustus_tmr_gtf)
            t.exportFile(tmr_file_id, 'file://' + args.augustus_tmr_gtf)
示例#15
0
    def writeGlobalFileStream(self, cleanup=False, basename=None):
        """
        Similar to writeGlobalFile, but allows the writing of a stream to the job store.
        The yielded file handle does not need to and should not be closed explicitly.

        :param bool cleanup: is as in :func:`toil.fileStores.abstractFileStore.AbstractFileStore.writeGlobalFile`.
        
        :param str basename: If supported by the backing JobStore, use the given
               file basename so that when searching the job store with a query
               matching that basename, the file will be detected.
        
        :return: A context manager yielding a tuple of
                  1) a file handle which can be written to and
                  2) the toil.fileStores.FileID of the resulting file in the job store.
        """

        with self.jobStore.writeFileStream(self.jobGraph.jobStoreID, cleanup,
                                           basename) as (backingStream,
                                                         fileStoreID):

            # We have a string version of the file ID, and the backing stream.
            # We need to yield a stream the caller can write to, and a FileID
            # that accurately reflects the size of the data written to the
            # stream. We assume the stream is not seekable.

            # Make and keep a reference to the file ID, which is currently empty
            fileID = FileID(fileStoreID, 0)

            # Wrap the stream to increment the file ID's size for each byte written
            wrappedStream = WriteWatchingStream(backingStream)

            # When the stream is written to, count the bytes
            def handle(numBytes):
                # No scope problem here, because we don't assign to a fileID local
                fileID.size += numBytes

            wrappedStream.onWrite(handle)

            yield wrappedStream, fileID
def augustus_cgp(args, toil_options):
    """
    Main entry function for AugustusCGP toil pipeline
    :param args: dictionary of arguments from CAT
    :param toil_options: toil options Namespace object
    :return:
    """
    with Toil(toil_options) as t:
        if not t.options.restart:
            input_file_ids = argparse.Namespace()
            input_file_ids.hal = FileID.forPath(t.importFile('file://' + args.hal), args.hal)
            input_file_ids.chrom_sizes = FileID.forPath(t.importFile('file://' + args.query_sizes), args.query_sizes)
            input_file_ids.hints_db = FileID.forPath(t.importFile('file://' + args.hints_db), args.hints_db)
            if args.cgp_param is not None:
                input_file_ids.cgp_param = FileID.forPath(t.importFile('file://' + args.cgp_param), args.cgp_param)
            else:
                input_file_ids.cgp_param = None
                input_file_ids.gtf = FileID.forPath(t.importFile('file://' + args.gtf), args.gtf)
            input_file_ids.cgp_cfg = FileID.forPath(t.importFile('file://' + args.cgp_cfg), args.cgp_cfg)
            input_file_ids.fasta = {genome: FileID.forPath(t.importFile('file://' + fasta), fasta)
                                    for genome, fasta in args.fasta_files.items()}
            du = tools.toilInterface.find_total_disk_usage([input_file_ids.hints_db], buffer='4G')
            job = Job.wrapJobFn(setup, args, input_file_ids, memory='8G', disk=du)
            results, stdout_file_ids, param_file_id = t.start(job)
        else:
            results, stdout_file_ids, param_file_id = t.restart()
        tools.fileOps.ensure_file_dir(args.stdout_file)
        with open(args.stdout_file, 'w') as outf, tools.fileOps.TemporaryFilePath() as tmp:
            for (chrom, start, chunksize), stdout_file in stdout_file_ids.items():
                outf.write('## BEGIN CHUNK chrom: {} start: {} chunksize: {}\n'.format(chrom, start, chunksize))
                t.exportFile(stdout_file, 'file://' + tmp)
                for l in open(tmp):
                    outf.write(l)
        for genome, (raw_gtf_file_id, joined_gtf_file_id, joined_gp_file_id) in results.items():
            tools.fileOps.ensure_file_dir(args.augustus_cgp_raw_gtf[genome])
            t.exportFile(raw_gtf_file_id, 'file://' + args.augustus_cgp_raw_gtf[genome])
            t.exportFile(joined_gtf_file_id, 'file://' + args.augustus_cgp_gtf[genome])
            t.exportFile(joined_gp_file_id, 'file://' + args.augustus_cgp_gp[genome])
        if args.cgp_param is None:
            t.exportFile(param_file_id, 'file://' + args.param_out_path)
示例#17
0
文件: cwlTest.py 项目: mr-c/toil
    def test_download_structure(self) -> None:
        """
        Make sure that download_structure makes the right calls to what it thinks is the file store.
        """

        # Define what we would download
        fid1 = FileID('afile', 10, False)
        fid2 = FileID('adifferentfile', 1000, True)

        # And what directory structure it would be in
        structure = {
            'dir1': {
                'dir2': {
                    'f1': 'toilfile:' + fid1.pack(),
                    'f1again': 'toilfile:' + fid1.pack(),
                    'dir2sub': {}
                },
                'dir3': {}
            },
            'anotherfile': 'toilfile:' + fid2.pack()
        }

        # Say where to put it on the filesystem
        to_dir = self._createTempDir()

        # Make a fake file store
        file_store = Mock(AbstractFileStore)

        # These will be populated.
        # TODO: This cache seems unused. Remove it?
        # This maps filesystem path to CWL URI
        index = {}
        # This maps CWL URI to filesystem path
        existing = {}

        # Do the download
        download_structure(file_store, index, existing, structure, to_dir)

        # Check the results
        # 3 files should be made
        self.assertEqual(len(index), 3)
        # From 2 unique URIs
        self.assertEqual(len(existing), 2)

        # Make sure that the index contents (path to URI) are correct
        self.assertIn(os.path.join(to_dir, 'dir1/dir2/f1'), index)
        self.assertIn(os.path.join(to_dir, 'dir1/dir2/f1again'), index)
        self.assertIn(os.path.join(to_dir, 'anotherfile'), index)
        self.assertEqual(index[os.path.join(to_dir, 'dir1/dir2/f1')],
                         structure['dir1']['dir2']['f1'])
        self.assertEqual(index[os.path.join(to_dir, 'dir1/dir2/f1again')],
                         structure['dir1']['dir2']['f1again'])
        self.assertEqual(index[os.path.join(to_dir, 'anotherfile')],
                         structure['anotherfile'])

        # And the existing contents (URI to path)
        self.assertIn('toilfile:' + fid1.pack(), existing)
        self.assertIn('toilfile:' + fid2.pack(), existing)
        self.assertIn(existing['toilfile:' + fid1.pack()], [
            os.path.join(to_dir, 'dir1/dir2/f1'),
            os.path.join(to_dir, 'dir1/dir2/f1again')
        ])
        self.assertEqual(existing['toilfile:' + fid2.pack()],
                         os.path.join(to_dir, 'anotherfile'))

        # The directory structure should be created for real
        self.assertTrue(os.path.isdir(os.path.join(to_dir, 'dir1')))
        self.assertTrue(os.path.isdir(os.path.join(to_dir, 'dir1/dir2')))
        self.assertTrue(
            os.path.isdir(os.path.join(to_dir, 'dir1/dir2/dir2sub')))
        self.assertTrue(os.path.isdir(os.path.join(to_dir, 'dir1/dir3')))

        # The file store should have been asked to do the download
        file_store.readGlobalFile.assert_has_calls([
            call(fid1, os.path.join(to_dir, 'dir1/dir2/f1'), symlink=True),
            call(fid1, os.path.join(to_dir, 'dir1/dir2/f1again'),
                 symlink=True),
            call(fid2, os.path.join(to_dir, 'anotherfile'), symlink=True)
        ],
                                                   any_order=True)