def __init__(self, infile, outfile, reference=None, *args, **kargs): """.. rubric:: constructor :param str infile: input BAM file :param str outfile: output CRAM filename :param str reference: reference file in :term:`FASTA` format """ super(BAM2CRAM, self).__init__(infile, outfile, *args, **kargs) self._default_method = "samtools" self.reference = reference if self.reference is None: logger.debug("No reference provided. Infering from input file") # try to find the local file replacing .sam by .fa reference = infile.replace(".cram", ".fa") if os.path.exists(reference): logger.debug( "Reference found from inference ({})".format(reference)) else: logger.debug("No reference found.") msg = "Please enter the reference corresponding " msg += "to the input BAM file:" reference = input(msg) if os.path.exists(reference) is False: raise IOError("Reference required") else: logger.debug("Reference exist ({}).".format(reference)) self.reference = reference self.threads = cpu_count()
def __init__(self, infile, outfile, reference=None, *args, **kargs): """.. rubric:: constructor :param str infile: input SAM file :param str outfile: output filename :param str reference: reference file in :term:`FASTA` format command used:: samtools view -SCh .. note:: the API related to the third argument may change in the future. """ super(SAM2CRAM, self).__init__(infile, outfile, *args, **kargs) self.reference = reference if self.reference is None: logger.debug("No reference provided. Infering from input file") # try to find the local file replacing .sam by .fa reference = infile.replace(".sam", ".fa") if os.path.exists(reference): logger.debug("Reference found from inference ({})".format(reference)) else: logger.debug("No reference found.") msg = "Please enter the reference corresponding " msg += "to the input SAM file:" reference = raw_input(msg) if os.path.exists(reference) is False: raise IOError("Reference required") else: logger.debug("Reference exist ({}).".format(reference)) self.reference = reference self.threads = cpu_count()
def __init__(self, infile, outfile): """.. rubric:: constructor :param str infile: The path of the input file. :param str outfile: The path of The output file """ if os.path.exists(infile) is False: msg = "Incorrect input file: %s" % infile _log.error(msg) raise ValueError(msg) self.infile = infile self.outfile = outfile self.threads = cpu_count() self._execute_mode = "subprocess" # set to shell to call shell() method
def __init__(self, infile, outfile, reference=None, *args, **kargs): """.. rubric:: constructor :param str infile: input FASTQ file :param str outfile: output filename :param str reference: reference file in :term:`FASTA` format command used:: samtools view -@ <thread> -Sh -T <reference> in.cram > out.sam .. note:: the API related to the third argument may change in the future. """ super(CRAM2FASTQ, self).__init__(infile, outfile, *args, **kargs) self.threads = cpu_count()
def __init__(self, infile, outfile): """.. rubric:: constructor :param str infile: The path of the input file. :param str outfile: The path of The output file """ # do not check the existence of the input file because it could be just a prefix # if os.path.exists(infile) is False: # msg = "Incorrect input file: %s" % infile # _log.error(msg) # raise ValueError(msg) if not outfile: outfile = generate_outfile_name(infile, self.output_ext[0]) self.infile = infile self.outfile = outfile self.threads = cpu_count() self._execute_mode = "shell" #"subprocess" # set to shell to call shell() method self.logger = logger
class ConvBase(metaclass=ConvMeta): """Base class for all converters. To build a new converter, create a new class which inherits from :class:`ConvBase` and implement method that performs the conversion. The name of the converter method must start with ``_method_``. For instance: :: class Fastq2Fasta(ConvBase): def _method_python(self, *args, **kwargs): # include your code here. You can use the infile and outfile # attributes. self.infile self.outfile """ # specify the extensions of the input file, can be a sequence (must be # overridden in subclasses) input_ext = None # specify the extensions of the output file, can be a sequence (must be # overridden in subclasses) output_ext = None # list available methods available_methods = [] # default method should be provided _default_method = None _library_to_install = None _is_compressor = False # threads to be used by default if argument is required in a method threads = cpu_count() def __init__(self, infile, outfile): """.. rubric:: constructor :param str infile: the path of the input file. :param str outfile: the path of The output file """ if not outfile: outfile = generate_outfile_name(infile, self.output_ext[0]) self.infile = infile self.outfile = outfile # execute mode can be shell or subprocess. self._execute_mode = "shell" # The logger to be set to INFO, DEBUG, WARNING, ERROR, CRITICAL self.logger = logger def __call__(self, *args, method_name=None, **kwargs): """ :param str method: the method to be found in :attr:`available_methods` :param *args: positional arguments :param *kwargs: keyword arguments """ # If method provided, use it if "method" in kwargs: method_name = kwargs["method"] del kwargs["method"] # If not, but there is one argument, presumably this is # the method method_name = method_name or self.default # If not, we need to check the name # "dummy" is a method used to evaluate the cost of the # execute() method for the benchmark if method_name not in self.available_methods + ['dummy']: msg = "Methods available are {}".format(self.available_methods) _log.error(msg) raise ValueError(msg) _log.info("{}> Executing {} method ".format(self.name, method_name)) # reference to the method requested method_reference = getattr(self, "_method_{}".format(method_name)) # call the method itself t1 = time.time() method_reference(*args, **kwargs) t2 = time.time() _log.info("Took {} seconds ".format(t2 - t1)) #FIXME property not use @property def name(self): """ The name of the class """ return type(self).__name__ def _method_dummy(self, *args, **kwargs): # The execute commands has a large initialisation cost (about a second) # This commands does not and can be used to evaluate that cost self.execute("") def shell(self, cmd): from bioconvert.core.shell import shell _log.info("CMD: {}".format(cmd)) shell(cmd) def execute(self, cmd, ignore_errors=False, verbose=False, shell=False): if shell is True or self._execute_mode == "shell": self.shell(cmd) return _log.info("CMD: {}".format(cmd)) self._execute(cmd, ignore_errors, verbose) def _execute(self, cmd, ignore_errors=False, verbose=False): """ Execute a command in a sub-shell :param str cmd: the command to execute :param ignore_errors: If True the result is returned whatever the return value of the sub-shell. Otherwise a Runtime error is raised when the sub-shell return a non zero value :param verbose: If true displays errors on standard error :return: the result of the command :rtype: a :class:`StringIO` instance """ try: process_ = Popen(cmd, shell=True, stdout=PIPE, stderr=PIPE, stdin=None) except Exception as err: msg = "Failed to execute Command: '{}'. error: '{}'".format(cmd, err) raise RuntimeError(msg) inputs = [process_.stdout, process_.stderr] output = StringIO() errors = StringIO() while process_.poll() is None: # select has 3 parameters, 3 lists, the sockets, the fileobject to watch # in reading, writing, the errors # in addition a timeout option (the call is blocking while a fileObject # is not ready to be processed) # by return we get 3 lists with the fileObject to be processed # in reading, writing, errors. readable, writable, exceptional = select.select(inputs, [], [], 1) while readable and inputs: for flow in readable: data = flow.read() if not data: # the flow ready in reading which has no data # is a closed flow # thus we must stop to watch it inputs.remove(flow) if flow is process_.stdout: output.write(data.decode("utf-8")) elif flow is process_.stderr: errors.write(data.decode("utf-8")) print(process_.stderr) readable, writable, exceptional = select.select(inputs, [], [], 1) errors = errors.getvalue().strip() if verbose: if errors: print(errors, file=sys.stderr) if process_.returncode != 0: if not ignore_errors: raise RuntimeError(errors) else: return output def boxplot_benchmark(self, N=5, rerun=True, include_dummy=False, to_exclude=[], to_include=[], rot_xticks=90, boxplot_args={}): """Simple wrapper to call :class:`Benchmark` and plot the results see :class:`~bioconvert.core.benchmark.Benchmark` for details. """ self._benchmark = Benchmark(self, N=N, to_exclude=to_exclude, to_include=to_include) self._benchmark.include_dummy = include_dummy data = self._benchmark.plot(rerun=rerun, rot_xticks=rot_xticks, boxplot_args=boxplot_args) return data def _get_default_method(self): if self._default_method is None: return self.available_methods[0] elif self._default_method not in self.available_methods: return self.available_methods[0] else: return self._default_method default = property(_get_default_method) def install_tool(self, executable): """Install the given tool, using the script: bioconvert/install_script/install_executable.sh if the executable is not already present :param executable: executable to install :return: nothing """ # imported but not unused (when we don't have bioconvert_path) # import bioconvert from bioconvert import bioconvert_data if shutil.which(executable) is None: logger.info("Installing tool : " + executable) # Assigned but never used, says flake8 # bioconvert_path = bioconvert.__path__[0] script = bioconvert_data( 'install_' + executable + '.sh', where="../misc") subprocess.call(['sh', script]) @classmethod def add_argument_to_parser(cls, sub_parser): sub_parser.description = cls.get_description() for arg in itertools.chain(cls.get_common_arguments_for_converter(), cls.get_additional_arguments()): arg.add_to_sub_parser(sub_parser) @classmethod def get_description(cls): return "Allow to convert file in '%s' to '%s' format." % ConvMeta.split_converter_to_format(cls.__name__) @classmethod def get_additional_arguments(cls): return [] @staticmethod def get_common_arguments(): yield ConvArg( names="input_file", nargs="?", default=None, type=ConvArg.file, help="The path to the file to convert.", ) yield ConvArg( names="output_file", nargs="?", default=None, type=ConvArg.file, output_argument=True, help="The path where the result will be stored.", ) yield ConvArg( names=["-f", "--force", ], action="store_true", help="if outfile exists, it is overwritten with this option", ) yield ConvArg( names=["-v", "--verbosity", ], default=bioconvert.logger.level, help="Set the outpout verbosity.", choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], ) yield ConvArg( names=["--raise-exception", ], action="store_true", help="Let exception ending the execution be raised and displayed", ) yield ConvArg( names=["-m", "--batch", ], default=False, action="store_true", help="Allow conversion of a set of files using wildcards. You " "must use quotes to escape the wildcards. For instance: " "--batch 'test*fastq' ") yield ConvArg( names=["-b", "--benchmark", ], default=False, action="store_true", help="Running all available methods", ) yield ConvArg( names=["-N", "--benchmark-N", ], default=5, type=int, help="Number of trials for each methods", ) yield ConvArg( names=["-a", "--allow-indirect-conversion", ], default=False, action="store_true", help="Allow to chain converter when direct conversion is absent", ) @classmethod def get_common_arguments_for_converter(cls): for a in ConvBase.get_common_arguments(): yield a try: # Some converter does not have any method and work in __call__, so preventing to crash by searching for them yield ConvArg( names=["-c", "--method", ], nargs="?", default=cls._get_default_method(cls), help="The method to use to do the conversion.", choices=cls.available_methods, ) except Exception as e: _log.warning("converter '{}' does not seems to have methods: {}".format(cls.__name__, e)) pass yield ConvArg( names=["-s", "--show-methods", ], default=False, action="store_true", help="A converter may have several methods", )