def install_tool(self, executable): """Install the given tool, using the script: bioconvert/install_script/install_executable.sh if the executable is not already present :param executable to install :return: nothing """ import bioconvert from bioconvert import bioconvert_data if shutil.which(executable) is None: logger.info("Installing tool : " + executable) bioconvert_path = bioconvert.__path__[0] script = bioconvert_data('install_' + executable + '.sh', where="../misc") subprocess.call(['sh', script])
def install_tool(self, executable): """Install the given tool, using the script: bioconvert/install_script/install_executable.sh if the executable is not already present :param executable: executable to install :return: nothing """ # imported but not unused (when we don't have bioconvert_path) # import bioconvert from bioconvert import bioconvert_data if shutil.which(executable) is None: logger.info("Installing tool : " + executable) # Assigned but never used, says flake8 # bioconvert_path = bioconvert.__path__[0] script = bioconvert_data( 'install_' + executable + '.sh', where="../misc") subprocess.call(['sh', script])
class shell: _process_args = {} _process_prefix = "" _process_suffix = "" @classmethod def executable(cls, cmd): if os.path.split(cmd)[-1] == "bash": cls._process_prefix = "set -euo pipefail; " cls._process_args["executable"] = cmd @classmethod def prefix(cls, prefix): cls._process_prefix = prefix @classmethod def suffix(cls, suffix): cls._process_suffix = suffix def __new__(cls, cmd, *args, async=False, iterable=False, read=False, **kwargs): if "stepout" in kwargs: raise KeyError("Argument stepout is not allowed in shell command.") stdout = sp.PIPE if iterable or async or read else STDOUT close_fds = sys.platform != 'win32' logger.info(cmd) proc = sp.Popen("{} {} {}".format(cls._process_prefix, cmd.rstrip(), cls._process_suffix), bufsize=-1, shell=True, stdout=stdout, close_fds=close_fds, **cls._process_args) ret = None if iterable: return cls.iter_stdout(proc, cmd) if read: ret = proc.stdout.read() elif async: return proc retcode = proc.wait() if retcode: raise sp.CalledProcessError(retcode, cmd) return ret
def wrapped(inst, *args, **kwargs): output_compressed = None if inst.outfile.endswith(".gz"): (inst.outfile, output_compressed) = splitext(inst.outfile) elif inst.outfile.endswith(".bz2"): (inst.outfile, output_compressed) = splitext(inst.outfile) elif inst.outfile.endswith(".dsrc"): # !!! only for fastq files (inst.outfile, output_compressed) = splitext(inst.outfile) # Now inst has the uncompressed output file name # computation results = func(inst, *args, **kwargs) # Compress output and restore inst output file name if output_compressed == ".gz": # TODO: this uses -f ; should be a logger.info("Compressing output into .gz") inst.shell("pigz -f -p {} {}".format(inst.threads, inst.outfile)) inst.outfile = inst.outfile + ".gz" elif output_compressed == ".bz2": logger.info("Compressing output into .bz2") inst.shell("pbzip2 -f -p{} {}".format(inst.threads, inst.outfile)) inst.outfile = inst.outfile + ".bz2" elif output_compressed == ".dsrc": # !!! only for FastQ files logger.info("Compressing output into .dsrc") inst.shell("dsrc c -t{} {} {}.dsrc".format(inst.threads, inst.outfile, inst.outfile)) inst.outfile = inst.outfile + ".dsrc" return results
def wrapped(inst, *args, **kwargs): infile_name = inst.infile output_compressed = None if inst.outfile.endswith(".gz"): (inst.outfile, output_compressed) = splitext(inst.outfile) elif inst.outfile.endswith(".bz2"): (inst.outfile, output_compressed) = splitext(inst.outfile) elif inst.outfile.endswith(".dsrc"): # !!! only for fastq files (inst.outfile, output_compressed) = splitext(inst.outfile) # Now inst has the uncompressed output file name if infile_name.endswith(".gz"): # decompress input # TODO: https://stackoverflow.com/a/29371584/1878788 logger.info("Generating uncompressed version of %s " % infile_name) (ungz_name, _) = splitext(infile_name) (_, base_suffix) = splitext(ungz_name) with TempFile(suffix=base_suffix) as ungz_infile: inst.infile = ungz_infile.name inst.shell("unpigz -c -p {} {} > {}".format( inst.threads, infile_name, inst.infile)) # computation results = func(inst, *args, **kwargs) inst.infile = infile_name else: results = func(inst, *args, **kwargs) # Compress output and restore inst output file name if output_compressed == ".gz": # TODO: this uses -f ; should be a logger.info("Compressing output into .gz") inst.shell("pigz -f -p {} {}".format(inst.threads, inst.outfile)) inst.outfile = inst.outfile + ".gz" elif output_compressed == ".bz2": logger.info("Compressing output into .bz2") inst.shell("pbzip2 -f -p{} {}".format(inst.threads, inst.outfile)) inst.outfile = inst.outfile + ".bz2" elif output_compressed == ".dsrc": # !!! only for FastQ files logger.info("Compressing output into .dsrc") inst.shell("dsrc c -t{} {} {}.dsrc".format(inst.threads, inst.outfile, inst.outfile)) inst.outfile = inst.outfile + ".dsrc" return results
def _method_biopython(self, *args, **kwargs): logger.info("Executing biopython") records = SeqIO.parse(self.infile, 'fastq') SeqIO.write(records, self.outfile, 'fasta')
def __init__(self, infile, outfile, force=False): """.. rubric:: constructor :param str infile: The path of the input file. :param str outfile: The path of The output file :param bool force: overwrite output file if it exists already otherwise raises an error """ if os.path.exists(infile) is False: msg = "Incorrect input file: %s" % infile _log.error(msg) raise ValueError(msg) # check existence of output file. If it exists, # fails except if force argument is set to True if os.path.exists(outfile) is True: msg = "output file {} exists already".format(outfile) _log.warning("output file exists already") if force is False: _log.critical( "output file exists. If you are using bioconvert, use --force " ) raise ValueError(msg) else: _log.warning("output file will be overwritten") # Only fastq files can be compressed with dsrc if outfile.endswith(".dsrc"): # only valid for FastQ files extension # dsrc accepts only .fastq file extension if outfile.endswith(".fastq.dsrc") is False: msg = "When compressing with .dsrc extension, " +\ "only files ending with .fastq extension are " +\ "accepted. This is due to the way dsrc executable +"\ "is implemented." _log.critical(msg) raise IOError # case1: fastq.gz to fasta.bz2 # Here, we want to decompress, convert, compress. # so we need the extension without .gz or .bz2 # We should have inext set to fastq and outext # set to fasta.bz2 self.inext = getext(infile, remove_compression=True) self.outext = getext(outfile, remove_compression=True) # Case 2, fastq.gz to fastq.bz2 # data is not changed, just the type of compression, so we want # to keep the original extensions, here inext and outext will contain # .gz and .bz2 if self.inext == self.outext: _log.info("decompression/compression mode") self.inext = getext(infile) self.outext = getext(outfile) self.mapper = Registry() # From the input parameters 1 and 2, we get the module name try: _log.info("Input: {}".format(self.inext)) _log.info("Output: {}".format(self.outext)) class_converter = self.mapper[(self.inext, self.outext)] self.name = class_converter.__name__ except KeyError: # This module name was not found msg = "Requested input format ({}) to output format ({}) is not available in bioconvert" _log.critical(msg.format(self.inext, self.outext)) _log.critical( "Use --formats to know the available formats and --help for examples" ) sys.exit(1) self.converter = class_converter(infile, outfile) _log.info("Using {} class".format(self.converter.name))