def parse(self): """Parse file and store information in self.""" if isGzip(self.filepath): with gzip.open(self.filepath, "rt") as FH: self._parseFileHandle(FH) else: with open(self.filepath, "r") as FH: self._parseFileHandle(FH)
def nbSeq(filepath): """ Return the number of sequences in file. :param filepath: Path to the file. :type filepath: str :return: The number of sequences. :rtype: int """ handler = open handler_options = "r" if isGzip(filepath): handler = gzip.open handler_options = "rt" nb_lines = 0 with handler(filepath, handler_options) as reader: for line in reader: nb_lines += 1 return int(nb_lines / 4)
def nbSeq(filepath): """ Return the number of sequences in file. :param filepath: Path to the file. :type filepath: str :return: The number of sequences. :rtype: int """ nb_seq = 0 handler = open handler_options = "r" if isGzip(filepath): handler = gzip.open handler_options = "rt" with handler(filepath, handler_options) as reader: for line in reader: if line.startswith(">"): nb_seq += 1 return nb_seq
def __init__(self, filepath, mode="r"): """ Build and return an instance of FastqIO. :param filepath: Path to the file. :type filepath: str :param mode: Mode to open the file ('r', 'w', 'a'). :type mode: str :return: The new instance. :rtype: FastqIO """ self.filepath = filepath self.mode = mode if (mode in ["w", "a"] and filepath.endswith('.gz')) or (mode not in ["w", "a"] and isGzip(filepath)): self.file_handle = gzip.open(filepath, mode + "t") else: self.file_handle = open(filepath, mode) self.current_line_nb = 1
def nbSeqAndNt(filepath): """ Return the number of sequences and nucleotids in file. :param filepath: Path to the file. :type filepath: str :return: The number of sequences and the number of nucleotids. :rtype: int, int """ nb_seq = 0 nb_nt = 0 handler = open handler_options = "r" if isGzip(filepath): handler = gzip.open handler_options = "rt" with handler(filepath, handler_options) as reader: for line in reader: if line.startswith(">"): nb_seq += 1 else: nb_nt += len(line.rstrip()) return nb_seq, nb_nt
# Logger logging.basicConfig( format= '%(asctime)s -- [%(filename)s][pid:%(process)d][%(levelname)s] -- %(message)s' ) log = logging.getLogger(os.path.basename(__file__)) log.setLevel(logging.INFO) log.info("Command: " + " ".join(sys.argv)) # Process out_open_fct = open out_mode = "w" if args.output.endswith('.gz'): out_open_fct = gzip.open out_mode = "wt" with out_open_fct(args.output, out_mode) as writer: last_line = "\n" for curr_in_file in args.inputs: if not last_line.endswith("\n"): writer.write("\n") # Start new line for a new file in_open_fct = open in_mode = "r" if isGzip(curr_in_file): in_open_fct = gzip.open in_mode = "rt" with in_open_fct(curr_in_file, in_mode) as reader: for line in reader: writer.write(line) last_line = line log.info("End of job")