示例#1
0
文件: utils.py 项目: pythseq/xphyle
    def get(self, key: Any) -> Optional[FileLike]:
        """Get the file object associated with a path. If the file is not
        already open, it is first opened with `xopen`.

        Args:
            key: The file name/key.

        Returns:
            The opened file.
        """
        fileobj = self._files.get(key, None)
        if fileobj is None:
            if isinstance(key, int) and len(self) > key:
                key = list(self.keys)[key]
                fileobj = self._files[key]
            else:
                return None
        if isinstance(fileobj, dict):
            path = self._paths[key]
            fileobj["context_wrapper"] = True
            fileobj = xopen(path, **fileobj)
            if self.header and fileobj.writable():
                fileobj.write(self.header)
            self._files[key] = fileobj
        return fileobj
示例#2
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-1", "--fastq1")
    parser.add_argument("-2", "--fastq2")
    parser.add_argument("-o", "--output", default="-")
    args = parser.parse_args()

    with xopen(args.fastq1) as fq1, xopen(args.fastq2) as fq2:
        hists = make_hists(fq1, fq2)

    with xopen(args.output, 'w') as o:
        w = csv.writer(o, delimiter="\t")
        w.writerow(('read', 'side', 'pos', 'base', 'count'))
        for i, h in enumerate(hists, 1):
            for j in range(2):
                for b in nuc:
                    for k, count in enumerate(h[j][b], 1):
                        w.writerow((i, j, k, b, count))
示例#3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-1", "--fastq1")
    parser.add_argument("-2", "--fastq2")
    parser.add_argument("-o", "--output", default="-")
    args = parser.parse_args()
    
    with xopen(args.fastq1) as fq1, xopen(args.fastq2) as fq2:
        hists = make_hists(fq1, fq2)
    
    with xopen(args.output, 'w') as o:
        w = csv.writer(o, delimiter="\t")
        w.writerow(('read', 'side', 'pos', 'base', 'count'))
        for i, h in enumerate(hists, 1):
            for j in range(2):
                for b in nuc:
                    for k, count in enumerate(h[j][b], 1):
                        w.writerow((i, j, k, b, count))
示例#4
0
def translate(fasta):
    dbf = os.path.basename(rreplace(fasta, 'fasta', 'fasta.db', 1))
    print('Translating...')
    with xopen(dbf, 'wt') as db:
        parser = SeqIO.parse(xopen(fasta, 'rt'), 'fasta')
        for sequence in parser:
            id, seq = sequence.description, Seq(
                str(sequence.seq).upper().replace('X', 'N'))
            f = (-3, -2, -1, 1, 2, 3)
            for i in f:
                if i < 0:
                    db.write('>%s\n%s\n' %
                             ('%s_%d' % (id, i),
                              seq.reverse_complement()[-(i + 1):].translate(
                                  stop_symbol='X')))
                elif i > 0:
                    db.write('>%s\n%s\n' %
                             ('%s_+%d' %
                              (id, i), seq[i - 1:].translate(stop_symbol='X')))
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-a", "--adapter1", default=ADAPTER1)
    parser.add_argument("-A", "--adapter2", default=ADAPTER2)
    parser.add_argument("-1", "--fastq1")
    parser.add_argument("-2", "--fastq2")
    parser.add_argument("-o", "--output", default="-")
    args = parser.parse_args()

    with xopen(args.fastq1) as fq1, xopen(args.fastq2) as fq2:
        metrics = estimate_metrics(fq1, fq2, args.adapter1, args.adapter2)

    with xopen(args.output, 'w') as o:
        print("Avg error prob: {}".format(metrics[0]), file=o)
        print("Read 1 with full-length adapters: {}".format(metrics[1]),
              file=o)
        print("Read 1 full-length adapter bases: {}".format(metrics[2]),
              file=o)
        print("Read 2 with full-length adapters: {}".format(metrics[3]),
              file=o)
        print("Read 2 full-length adapter bases: {}".format(metrics[4]),
              file=o)
示例#6
0
def main(args):
    with xopen(args.Tumor_Normal_mpileup, 'rt') as TN, open(args.Candidate_somatic_sites, 'wt') as Cs:
        combi = pairwise(enumerate(TN))
        
        for line, line_pair in combi:
            # look at a genomic site and its next number_of_columns site
            line_content = line[1].rstrip('\n').split('\t')
            line_pair_content = line_pair[1].rstrip('\n').split('\t')
            
            if line_content[0] == line_pair_content[0] and (int(line_pair_content[1]) - int(line_content[1]) == args.number_of_columns):
                process_line(line_content, line, Cs)    
            else:
                # ignore the genomic site if its next number_of_columns site does not exist
                next(itertools.islice(combi, 2*args.number_of_columns-1, 2*args.number_of_columns-1), None)         
示例#7
0
def main(args):
    with xopen(args.Tumor_Normal_mpileup, 'rt') as TN, open(args.Candidate_validated_somatic_sites, 'rt') as Cs, open(args.Mapping_information_file, 'wt') as Mi:
        n = 110
        enume_Cs = enumerate(Cs) 
        enume_TN = enumerate(TN)
        for line in enume_Cs:
            holder = []
            
            # creating input for training or inference
            if args.indicator == 'training':
                label = []
            else:
                pass

            base = ['A', 'T', 'G', 'C', 'N']
            line_content = line[1].rstrip('\n').split('\t')
            
            # encoding for bases
            encoding = ['00001', '00010', '00100', '01000', '10000'] 
            
            # process the first line
            if line[0] == 0:
                for col in itertools.islice(enume_TN, int(line_content[0]) - n, int(line_content[0]) + n +1):
                    holder = generate_mapping_infor_reads(col, holder) 
                keep = int(line_content[0])

                # creating input for training or inference
                if args.indicator == 'training':
                    for i in range(2805):
                        label.append(line_content[3])
                    holder.append(label)
                    overlap = holder[:-1]
                else:
                    overlap = holder

                # write to a file in row order
                for row in zip(*holder):
                    Mi.write('\t'.join(row) + '\n')
            
            # process the rest lines 
            else: 
                # the distance between two candidate or validated genomic sites 
                gap = int(line_content[0]) - keep
                
                # the distance larger than the window size of 2*n
                if gap > 2*n:
                    for col in itertools.islice(enume_TN, gap - 2*n -1, gap):
                        holder = generate_mapping_infor_reads(col, holder)

                    # creating input for training or inference
                    if args.indicator == 'training':
                        for i in range(2805):
                            label.append(line_content[3])
                        holder.append(label)
                        overlap = holder[:-1]
                    else:
                        overlap = holder

                    for row in zip(*holder):
                        Mi.write('\t'.join(row) + '\n')
                    keep = int(line_content[0])
                
                # the distance smaller than the window size of 2*n
                else:
                    # copy the information of overlapping window from last column
                    for infor in overlap[gap:]:
                        holder.append(infor)
                    overlap = []
                    for col in itertools.islice(enume_TN, 0, gap):
                        holder = generate_mapping_infor_reads(col, holder)

                    # creating input for training or inference
                    if args.indicator == 'training':
                        for i in range(2805):
                            label.append(line_content[3])
                        holder.append(label)
                        overlap = holder[:-1]
                    else:
                        overlap = holder

                    for row in zip(*holder):
                        Mi.write('\t'.join(row) + '\n')
                    keep = int(line_content[0])
示例#8
0
文件: utils.py 项目: jdidion/ngsindex
 def open(self) -> None:
     """Opens the reader.
     """
     self._reader = xopen(self.path, "rb", context_wrapper=True)
示例#9
0
 def __init__(self, file1, file2=None, **kwargs):
     self.paired = file2 is not None
     self.file1 = xopen(file1, 'wt', **kwargs)
     if self.paired:
         self.file2 = xopen(file2, 'wt', **kwargs)