Пример #1
0
This is the third and final script of the python pipeline. 
It is meant to aggregate the results from each recover_cdr3s worker
and summarize the information
Run using python -m summary <args>

Requirements:
	Python >3.8.5, samtools, pysam, biopython, pandas
"""
import argparse
from textwrap import dedent, indent
import os
import tcrgo.io as io
from tcrgo.collapse import collapse

from tcrgo import Log
log = Log('root')

def main(args):
	log.init(args.verbosity)
	if args.workers == "ALL":
		worker_range = io.list_cdr3_files(args.input_path)
	else:
		worker_range = [int(i) for i in args.workers.strip(':').split(':')]
		if worker_range[0] < 1:
			log.error("Please enter positive value for the start of the range.")
		if len(worker_range) > 1:
			worker_range = range(worker_range[0], worker_range[-1]+1)
		else:
			worker_range = range(1, worker_range[-1]+1)
	
	log.info("Aggregating CDR3 info files outputted from the recover_cdr3s script.")
Пример #2
0
Requirements:
	Python >3.8.5, samtools, pysam, biopython, pandas
"""
import argparse
import os.path
from textwrap import dedent, indent
import pysam

from typing import List, Dict, Iterator, Set
from pathlib import Path

from tcrgo.bam import BAMDict, ReferenceDict
import tcrgo.io as io
from tcrgo import Log
log = Log("root")


def main(args):
    log.init(args.verbosity)
    log.info("Parsing input data...")
    bam = io.sort_and_index(args.bam, args.output_path)

    log.info("Reading CDR3 positions file and FASTA file...")
    cdr3_positions = None
    if args.cdr3_positions_file is not None:
        cdr3_positions = io.read_cdr3_positions(args.cdr3_positions_file)
    log.verbose(f"{cdr3_positions}")
    entries = io.parse_fasta(args.fasta)
    refdict = ReferenceDict()
    refdict.build(entries, cdr3_positions, args.zero_indexed)
Пример #3
0
"""Alignment and preprocessing via Drop-Seq Tools 2.4.0 and Bowtie2 version 2.4.1"""
import tcrgo.dropseq_tools as ds
import tcrgo.io as io
import argparse
from textwrap import dedent, indent
from pathlib import Path
import os
import pysam

from tcrgo import Log
log = Log(name=__name__)


def main(args):
    """
	0. Raw BAM -> Single-end FASTQ -> Pair-end FASTQs -> Unmapped BAM  
	1. Unmapped BAM -> aligned and tagged BAM
		a. Tag cell barcodes
		b. Tag molecular barcodes
		c. Trim 5’ primer sequence
		d. Trim 3’ polyA sequence
		e. SAM -> Fastq
		f. STAR alignment
		g. Sort STAR alignment in queryname order
		h. Merge STAR alignment tagged SAM to recover cell/molecular barcodes
		i. Add gene/exon and other annotation tags
		j. Barcode Repair
			i. Repair substitution errors (DetectBeadSubstitutionErrors)
			ii. Repair indel errors (DetectBeadSynthesisErrors)
	"""
    log.init(args.verbosity)