def process_single_infile(wdl_file: WDLFile, fileStore: AbstractFileStore) -> WDLFile: f = wdl_file.file_path logger.info(f'Importing {f} into the jobstore.') if f.startswith('http://') or f.startswith('https://') or \ f.startswith('file://') or f.startswith('wasb://'): filepath = fileStore.importFile(f) preserveThisFilename = os.path.basename(f) elif f.startswith('s3://'): try: filepath = fileStore.importFile(f) preserveThisFilename = os.path.basename(f) except: from toil.lib.ec2nodes import EC2Regions success = False for region in EC2Regions: try: html_path = f'http://s3.{region}.amazonaws.com/' + f[5:] filepath = fileStore.importFile(html_path) preserveThisFilename = os.path.basename(f) success = True except: pass if not success: raise RuntimeError('Unable to import: ' + f) elif f.startswith('gs://'): f = 'https://storage.googleapis.com/' + f[5:] filepath = fileStore.importFile(f) preserveThisFilename = os.path.basename(f) else: filepath = fileStore.importFile("file://" + os.path.abspath(f)) preserveThisFilename = os.path.basename(f) return WDLFile(file_path=filepath, file_name=preserveThisFilename, imported=True)
def process_single_outfile(wdl_file: WDLFile, fileStore, workDir, outDir) -> WDLFile: f = wdl_file.file_path if os.path.exists(f): output_f_path = f elif os.path.exists(os.path.abspath(f)): output_f_path = os.path.abspath(f) elif os.path.exists(os.path.join(workDir, 'execution', f)): output_f_path = os.path.join(workDir, 'execution', f) elif os.path.exists(os.path.join('execution', f)): output_f_path = os.path.join('execution', f) elif os.path.exists(os.path.join(workDir, f)): output_f_path = os.path.join(workDir, f) elif os.path.exists(os.path.join(outDir, f)): output_f_path = os.path.join(outDir, f) else: tmp = subprocess.check_output(['ls', '-lha', workDir]).decode('utf-8') exe = subprocess.check_output(['ls', '-lha', os.path.join(workDir, 'execution')]).decode('utf-8') for std_file in ('stdout', 'stderr'): std_file = os.path.join(workDir, 'execution', std_file) if os.path.exists(std_file): with open(std_file, 'rb') as f: logger.info(f.read()) raise RuntimeError('OUTPUT FILE: {} was not found in {}!\n' '{}\n\n' '{}\n'.format(f, os.getcwd(), tmp, exe)) output_file = fileStore.writeGlobalFile(output_f_path) preserveThisFilename = os.path.basename(output_f_path) fileStore.export_file(output_file, "file://" + os.path.join(os.path.abspath(outDir), preserveThisFilename)) return WDLFile(file_path=output_file, file_name=preserveThisFilename, imported=True)
def testFn_Size(self): """Test the wdl built-in functional equivalent of 'size()', which returns a file's size based on the path.""" from toil.common import Toil from toil.job import Job from toil.wdl.wdl_types import WDLFile options = Job.Runner.getDefaultOptions('./toilWorkflowRun') options.clean = 'always' with Toil(options) as toil: small = process_infile(WDLFile(file_path=os.path.abspath('src/toil/test/wdl/testfiles/vocab.wdl')), toil) small_file = size(small) large = process_infile(WDLFile(file_path=self.encode_data), toil) larger_file = size(large) larger_file_in_mb = size(large, 'mb') assert small_file >= 1800, small_file assert larger_file >= 70000000, larger_file assert larger_file_in_mb >= 70, larger_file_in_mb
def size(f: Optional[Union[str, WDLFile, List[Union[str, WDLFile]]]] = None, unit: Optional[str] = 'B', fileStore: Optional[AbstractFileStore] = None) -> float: """ Given a `File` and a `String` (optional), returns the size of the file in Bytes or in the unit specified by the second argument. Supported units are KiloByte ("K", "KB"), MegaByte ("M", "MB"), GigaByte ("G", "GB"), TeraByte ("T", "TB") (powers of 1000) as well as their binary version (https://en.wikipedia.org/wiki/Binary_prefix) "Ki" ("KiB"), "Mi" ("MiB"), "Gi" ("GiB"), "Ti" ("TiB") (powers of 1024). Default unit is Bytes ("B"). WDL syntax: Float size(File, [String]) Varieties: Float size(File?, [String]) Float size(Array[File], [String]) Float size(Array[File?], [String]) """ if f is None: return 0 # it is possible that size() is called directly (e.g.: size('file')) and so it is not treated as a file. if isinstance(f, str): f = WDLFile(file_path=f) elif isinstance(f, list): f = [WDLFile(file_path=sf) if isinstance(sf, str) else sf for sf in f] assert isinstance(f, ( WDLFile, list)), f'size() excepts a "File" or "File?" argument! Not: {type(f)}' # validate the input. fileStore is only required if the input is not processed. f = process_infile(f, fileStore) divisor = return_bytes(unit) if isinstance(f, list): total_size = sum(file.file_path.size for file in f) return total_size / divisor fileID = f.file_path return fileID.size / divisor
def abspath_single_file(f: WDLFile, cwd: str) -> WDLFile: path = f.file_path if path != os.path.abspath(path): f.file_path = os.path.join(cwd, path) return f