Exemplo n.º 1
0
def parse_whitelist_file(whitelist_file, n_mismatch, n_repeat):
    barcode_list, _ = utils.read_one_col(whitelist_file)
    barcode_set = set(barcode_list)
    barcode_mismatch_dict = get_mismatch_dict(barcode_list, n_mismatch)
    barcode_mismatch_list = [barcode_mismatch_dict] * n_repeat
    barcode_set_list = [barcode_set] * n_repeat
    return barcode_set_list, barcode_mismatch_list
Exemplo n.º 2
0
    def __init__(self, args, display_title=None):
        Step.__init__(self, args, display_title=display_title)

        # set
        self.match_barcode_list, self.n_cell = utils.read_barcode_file(args.match_dir)
        self.match_barcode = set(self.match_barcode_list)

        if args.panel:
            self.gene_list = utils.get_gene_region_from_bed(args.panel)[0]
            self.n_gene = len(self.gene_list)
        else:
            self.gene_list, self.n_gene = utils.read_one_col(args.gene_list)

        if not self.gene_list:
            sys.exit("You must provide either --panel or --gene_list!")

        self.count_dict = utils.genDict(dim=3, valType=int)

        self.add_metric(
            name="Number of Target Genes",
            value=self.n_gene,
        )
        self.add_metric(
            name="Number of Cells",
            value=self.n_cell,
        )

        # out file
        self.out_bam_file = f'{self.out_prefix}_filtered.bam'
        self.out_bam_file_sorted = f'{self.out_prefix}_filtered_sorted.bam'
Exemplo n.º 3
0
 def setUp(self):
     os.chdir('/SGRNJ01/RD_dir/pipeline_test/zhouyiqi/0910_panel/')
     self.sample = 'S20071508_D_TS'
     count_detail_file = './/S20071508_D_TS/05.count_capture_rna/S20071508_D_TS_count_detail.txt'
     self.df = pd.read_table(count_detail_file, header=0)
     self.match_dir = '/SGRNJ02/RandD4/RD20051303_Panel/20200729/S20071508_D_ZL'
     self.sc_cell_barcodes, self.sc_cell_number = read_barcode_file(self.match_dir)
     self.outdir = f'{self.sample}/05.count_capture_rna/'
     self.genomeDir = '/SGRNJ/Public/Database/genome/homo_sapiens/ensembl_92'
     self.validated_barcodes, _ = read_one_col(f'{self.sample}/05.count_capture_rna/{self.sample}_matrix_10X/barcodes.tsv') 
     _refFlat, self.gtf = glob_genomeDir(self.genomeDir)
     self.assay = 'capture_rna'
Exemplo n.º 4
0
def convert(gene_list_file, gtf):
    gene_list_name, _count = read_one_col(gene_list_file)
    id_name = gene_convert(gtf)
    name_id = {}
    for id in id_name:
        name = id_name[id]
        name_id[name] = id
    gene_id_name_dic = {}
    for gene_name in gene_list_name:
        gene_id = name_id[gene_name]
        gene_id_name_dic[gene_id] = gene_name
    return gene_id_name_dic
Exemplo n.º 5
0
    def __init__(self, args):
        self.args = args
        self.rna_fq_file = glob.glob(f'{args.match_dir}/*barcode/*_2.fq*')[0]
        self.barcodes, _num = utils.read_one_col(args.barcode_file)
        self.read_index = set()

        # out
        self.out_fq1 = f'{args.outdir}/{SAMPLE}_1.fq.gz'
        self.out_fq2 = f'{args.outdir}/{SAMPLE}_2.fq.gz'

        # mkdir
        if not os.path.exists(args.outdir):
            os.system(f'mkdir -p {args.outdir}')