Exemplo n.º 1
0
    def install_tool(self, executable):
        """Install the given tool, using the script:
        bioconvert/install_script/install_executable.sh
        if the executable is not already present

        :param executable to install
        :return: nothing
        """
        import bioconvert
        from bioconvert import bioconvert_data

        if shutil.which(executable) is None:
            logger.info("Installing tool : " + executable)
            bioconvert_path = bioconvert.__path__[0]
            script = bioconvert_data('install_' + executable + '.sh',
                                     where="../misc")
            subprocess.call(['sh', script])
Exemplo n.º 2
0
    def install_tool(self, executable):
        """Install the given tool, using the script:
        bioconvert/install_script/install_executable.sh
        if the executable is not already present

        :param executable: executable to install
        :return: nothing

        """
        # imported but not unused (when we don't have bioconvert_path)
        # import bioconvert
        from bioconvert import bioconvert_data

        if shutil.which(executable) is None:
            logger.info("Installing tool : " + executable)
            # Assigned but never used, says flake8
            # bioconvert_path = bioconvert.__path__[0]
            script = bioconvert_data(
                'install_' + executable + '.sh', where="../misc")
            subprocess.call(['sh', script])
Exemplo n.º 3
0
class shell:
    _process_args = {}
    _process_prefix = ""
    _process_suffix = ""

    @classmethod
    def executable(cls, cmd):
        if os.path.split(cmd)[-1] == "bash":
            cls._process_prefix = "set -euo pipefail; "
        cls._process_args["executable"] = cmd

    @classmethod
    def prefix(cls, prefix):
        cls._process_prefix = prefix

    @classmethod
    def suffix(cls, suffix):
        cls._process_suffix = suffix

    def __new__(cls,
                cmd,
                *args,
                async=False,
                iterable=False,
                read=False,
                **kwargs):
        if "stepout" in kwargs:
            raise KeyError("Argument stepout is not allowed in shell command.")

        stdout = sp.PIPE if iterable or async or read else STDOUT

        close_fds = sys.platform != 'win32'

        logger.info(cmd)
        proc = sp.Popen("{} {} {}".format(cls._process_prefix, cmd.rstrip(),
                                          cls._process_suffix),
                        bufsize=-1,
                        shell=True,
                        stdout=stdout,
                        close_fds=close_fds,
                        **cls._process_args)

        ret = None
        if iterable:
            return cls.iter_stdout(proc, cmd)
        if read:
            ret = proc.stdout.read()
        elif async:
            return proc
        retcode = proc.wait()
        if retcode:
            raise sp.CalledProcessError(retcode, cmd)
        return ret
Exemplo n.º 4
0
    def wrapped(inst, *args, **kwargs):
        output_compressed = None
        if inst.outfile.endswith(".gz"):
            (inst.outfile, output_compressed) = splitext(inst.outfile)
        elif inst.outfile.endswith(".bz2"):
            (inst.outfile, output_compressed) = splitext(inst.outfile)
        elif inst.outfile.endswith(".dsrc"):  # !!! only for fastq files
            (inst.outfile, output_compressed) = splitext(inst.outfile)
        # Now inst has the uncompressed output file name

        # computation
        results = func(inst, *args, **kwargs)

        # Compress output and restore inst output file name
        if output_compressed == ".gz":
            # TODO: this uses -f ; should be a
            logger.info("Compressing output into .gz")
            inst.shell("pigz -f -p {} {}".format(inst.threads, inst.outfile))
            inst.outfile = inst.outfile + ".gz"
        elif output_compressed == ".bz2":
            logger.info("Compressing output into .bz2")
            inst.shell("pbzip2 -f -p{} {}".format(inst.threads, inst.outfile))
            inst.outfile = inst.outfile + ".bz2"
        elif output_compressed == ".dsrc":  # !!! only for FastQ files
            logger.info("Compressing output into .dsrc")
            inst.shell("dsrc c -t{} {} {}.dsrc".format(inst.threads,
                                                       inst.outfile,
                                                       inst.outfile))
            inst.outfile = inst.outfile + ".dsrc"
        return results
Exemplo n.º 5
0
    def wrapped(inst, *args, **kwargs):
        infile_name = inst.infile

        output_compressed = None
        if inst.outfile.endswith(".gz"):
            (inst.outfile, output_compressed) = splitext(inst.outfile)
        elif inst.outfile.endswith(".bz2"):
            (inst.outfile, output_compressed) = splitext(inst.outfile)
        elif inst.outfile.endswith(".dsrc"):  # !!! only for fastq files
            (inst.outfile, output_compressed) = splitext(inst.outfile)
        # Now inst has the uncompressed output file name

        if infile_name.endswith(".gz"):
            # decompress input
            # TODO: https://stackoverflow.com/a/29371584/1878788
            logger.info("Generating uncompressed version of %s " % infile_name)
            (ungz_name, _) = splitext(infile_name)
            (_, base_suffix) = splitext(ungz_name)
            with TempFile(suffix=base_suffix) as ungz_infile:
                inst.infile = ungz_infile.name
                inst.shell("unpigz -c -p {} {} > {}".format(
                    inst.threads, infile_name, inst.infile))
                # computation
                results = func(inst, *args, **kwargs)
            inst.infile = infile_name
        else:
            results = func(inst, *args, **kwargs)

        # Compress output and restore inst output file name
        if output_compressed == ".gz":
            # TODO: this uses -f ; should be a
            logger.info("Compressing output into .gz")
            inst.shell("pigz -f -p {} {}".format(inst.threads, inst.outfile))
            inst.outfile = inst.outfile + ".gz"
        elif output_compressed == ".bz2":
            logger.info("Compressing output into .bz2")
            inst.shell("pbzip2 -f -p{} {}".format(inst.threads, inst.outfile))
            inst.outfile = inst.outfile + ".bz2"
        elif output_compressed == ".dsrc":  # !!! only for FastQ files
            logger.info("Compressing output into .dsrc")
            inst.shell("dsrc c -t{} {} {}.dsrc".format(inst.threads,
                                                       inst.outfile,
                                                       inst.outfile))
            inst.outfile = inst.outfile + ".dsrc"
        return results
Exemplo n.º 6
0
 def _method_biopython(self, *args, **kwargs):
     logger.info("Executing biopython")
     records = SeqIO.parse(self.infile, 'fastq')
     SeqIO.write(records, self.outfile, 'fasta')
Exemplo n.º 7
0
    def __init__(self, infile, outfile, force=False):
        """.. rubric:: constructor

        :param str infile: The path of the input file.
        :param str outfile: The path of The output file
        :param bool force: overwrite output file if it exists already
            otherwise raises an error

        """
        if os.path.exists(infile) is False:
            msg = "Incorrect input file: %s" % infile
            _log.error(msg)
            raise ValueError(msg)

        # check existence of output file. If it exists,
        # fails except if force argument is set to True
        if os.path.exists(outfile) is True:
            msg = "output file {} exists already".format(outfile)
            _log.warning("output file exists already")
            if force is False:
                _log.critical(
                    "output file exists. If you are using bioconvert, use --force "
                )
                raise ValueError(msg)
            else:
                _log.warning("output file will be overwritten")

        # Only fastq files can be compressed with dsrc
        if outfile.endswith(".dsrc"):
            # only valid for FastQ files extension
            # dsrc accepts only .fastq file extension
            if outfile.endswith(".fastq.dsrc") is False:
                msg = "When compressing with .dsrc extension, " +\
                    "only files ending with .fastq extension are " +\
                    "accepted. This is due to the way dsrc executable +"\
                    "is implemented."
                _log.critical(msg)
                raise IOError

        # case1: fastq.gz to fasta.bz2
        # Here, we want to decompress, convert, compress.
        # so we need the extension without .gz or .bz2
        # We should have inext set to fastq and outext
        # set to fasta.bz2
        self.inext = getext(infile, remove_compression=True)
        self.outext = getext(outfile, remove_compression=True)

        # Case 2, fastq.gz to fastq.bz2
        # data is not changed, just the type of compression, so we want
        # to keep the original extensions, here inext and outext  will contain
        # .gz and .bz2
        if self.inext == self.outext:
            _log.info("decompression/compression mode")
            self.inext = getext(infile)
            self.outext = getext(outfile)

        self.mapper = Registry()

        # From the input parameters 1 and 2, we get the module name
        try:
            _log.info("Input: {}".format(self.inext))
            _log.info("Output: {}".format(self.outext))
            class_converter = self.mapper[(self.inext, self.outext)]
            self.name = class_converter.__name__
        except KeyError:
            # This module name was not found
            msg = "Requested input format ({}) to output format ({}) is not available in bioconvert"
            _log.critical(msg.format(self.inext, self.outext))
            _log.critical(
                "Use --formats to know the available formats and --help for examples"
            )
            sys.exit(1)

        self.converter = class_converter(infile, outfile)
        _log.info("Using {} class".format(self.converter.name))