def get(self, key: Any) -> Optional[FileLike]: """Get the file object associated with a path. If the file is not already open, it is first opened with `xopen`. Args: key: The file name/key. Returns: The opened file. """ fileobj = self._files.get(key, None) if fileobj is None: if isinstance(key, int) and len(self) > key: key = list(self.keys)[key] fileobj = self._files[key] else: return None if isinstance(fileobj, dict): path = self._paths[key] fileobj["context_wrapper"] = True fileobj = xopen(path, **fileobj) if self.header and fileobj.writable(): fileobj.write(self.header) self._files[key] = fileobj return fileobj
def main(): parser = argparse.ArgumentParser() parser.add_argument("-1", "--fastq1") parser.add_argument("-2", "--fastq2") parser.add_argument("-o", "--output", default="-") args = parser.parse_args() with xopen(args.fastq1) as fq1, xopen(args.fastq2) as fq2: hists = make_hists(fq1, fq2) with xopen(args.output, 'w') as o: w = csv.writer(o, delimiter="\t") w.writerow(('read', 'side', 'pos', 'base', 'count')) for i, h in enumerate(hists, 1): for j in range(2): for b in nuc: for k, count in enumerate(h[j][b], 1): w.writerow((i, j, k, b, count))
def translate(fasta): dbf = os.path.basename(rreplace(fasta, 'fasta', 'fasta.db', 1)) print('Translating...') with xopen(dbf, 'wt') as db: parser = SeqIO.parse(xopen(fasta, 'rt'), 'fasta') for sequence in parser: id, seq = sequence.description, Seq( str(sequence.seq).upper().replace('X', 'N')) f = (-3, -2, -1, 1, 2, 3) for i in f: if i < 0: db.write('>%s\n%s\n' % ('%s_%d' % (id, i), seq.reverse_complement()[-(i + 1):].translate( stop_symbol='X'))) elif i > 0: db.write('>%s\n%s\n' % ('%s_+%d' % (id, i), seq[i - 1:].translate(stop_symbol='X')))
def main(): parser = argparse.ArgumentParser() parser.add_argument("-a", "--adapter1", default=ADAPTER1) parser.add_argument("-A", "--adapter2", default=ADAPTER2) parser.add_argument("-1", "--fastq1") parser.add_argument("-2", "--fastq2") parser.add_argument("-o", "--output", default="-") args = parser.parse_args() with xopen(args.fastq1) as fq1, xopen(args.fastq2) as fq2: metrics = estimate_metrics(fq1, fq2, args.adapter1, args.adapter2) with xopen(args.output, 'w') as o: print("Avg error prob: {}".format(metrics[0]), file=o) print("Read 1 with full-length adapters: {}".format(metrics[1]), file=o) print("Read 1 full-length adapter bases: {}".format(metrics[2]), file=o) print("Read 2 with full-length adapters: {}".format(metrics[3]), file=o) print("Read 2 full-length adapter bases: {}".format(metrics[4]), file=o)
def main(args): with xopen(args.Tumor_Normal_mpileup, 'rt') as TN, open(args.Candidate_somatic_sites, 'wt') as Cs: combi = pairwise(enumerate(TN)) for line, line_pair in combi: # look at a genomic site and its next number_of_columns site line_content = line[1].rstrip('\n').split('\t') line_pair_content = line_pair[1].rstrip('\n').split('\t') if line_content[0] == line_pair_content[0] and (int(line_pair_content[1]) - int(line_content[1]) == args.number_of_columns): process_line(line_content, line, Cs) else: # ignore the genomic site if its next number_of_columns site does not exist next(itertools.islice(combi, 2*args.number_of_columns-1, 2*args.number_of_columns-1), None)
def main(args): with xopen(args.Tumor_Normal_mpileup, 'rt') as TN, open(args.Candidate_validated_somatic_sites, 'rt') as Cs, open(args.Mapping_information_file, 'wt') as Mi: n = 110 enume_Cs = enumerate(Cs) enume_TN = enumerate(TN) for line in enume_Cs: holder = [] # creating input for training or inference if args.indicator == 'training': label = [] else: pass base = ['A', 'T', 'G', 'C', 'N'] line_content = line[1].rstrip('\n').split('\t') # encoding for bases encoding = ['00001', '00010', '00100', '01000', '10000'] # process the first line if line[0] == 0: for col in itertools.islice(enume_TN, int(line_content[0]) - n, int(line_content[0]) + n +1): holder = generate_mapping_infor_reads(col, holder) keep = int(line_content[0]) # creating input for training or inference if args.indicator == 'training': for i in range(2805): label.append(line_content[3]) holder.append(label) overlap = holder[:-1] else: overlap = holder # write to a file in row order for row in zip(*holder): Mi.write('\t'.join(row) + '\n') # process the rest lines else: # the distance between two candidate or validated genomic sites gap = int(line_content[0]) - keep # the distance larger than the window size of 2*n if gap > 2*n: for col in itertools.islice(enume_TN, gap - 2*n -1, gap): holder = generate_mapping_infor_reads(col, holder) # creating input for training or inference if args.indicator == 'training': for i in range(2805): label.append(line_content[3]) holder.append(label) overlap = holder[:-1] else: overlap = holder for row in zip(*holder): Mi.write('\t'.join(row) + '\n') keep = int(line_content[0]) # the distance smaller than the window size of 2*n else: # copy the information of overlapping window from last column for infor in overlap[gap:]: holder.append(infor) overlap = [] for col in itertools.islice(enume_TN, 0, gap): holder = generate_mapping_infor_reads(col, holder) # creating input for training or inference if args.indicator == 'training': for i in range(2805): label.append(line_content[3]) holder.append(label) overlap = holder[:-1] else: overlap = holder for row in zip(*holder): Mi.write('\t'.join(row) + '\n') keep = int(line_content[0])
def open(self) -> None: """Opens the reader. """ self._reader = xopen(self.path, "rb", context_wrapper=True)
def __init__(self, file1, file2=None, **kwargs): self.paired = file2 is not None self.file1 = xopen(file1, 'wt', **kwargs) if self.paired: self.file2 = xopen(file2, 'wt', **kwargs)