Пример #1
0
    if not bam_input:
        threads = get_num_threads(config.getint("mapping", "threads"))
        if VERBOSE:
            print("\nmapping with %s threads..." % threads)
        for (i, sample), outbam in zip(enumerate(args.input), bam_paths):
            if VERBOSE:
                print(
                    "\n", ht.now(), "Mapping %s to %s reference..." %
                    (os.path.basename(sample), ref_type.upper()))

            subprocess.call(MAPPING_CMD %
                            (threads, outbam, MAPPING_REF[ref_type], sample),
                            shell=True)

    # sam-to-hdf5
    table, features = ht.load_hdf(ALLELE_HDF, False, 'table', 'features')
    if VERBOSE:
        print("\n", ht.now(), "Generating binary hit matrix.")

    if is_paired:
        # combine matrices for paired-end mapping
        pos, read_details = ht.pysam_to_hdf(bam_paths[0])
        binary1 = np.sign(pos)  # dtype=np.uint16

        pos2, read_details2 = ht.pysam_to_hdf(bam_paths[1])
        binary2 = np.sign(pos2)  # dtype=np.uint16

        if not bam_input and config.getboolean('behavior', 'deletebam'):
            os.remove(bam_paths[0])
            os.remove(bam_paths[1])
Пример #2
0
    ref_type = "nuc" if args.rna else "gen"
    is_paired = len(args.input) > 1
    
    out_csv = out_dir+"/%s_result.tsv"%date
    out_plot = out_dir+"/%s_coverage_plot.pdf"%date

    #mapping fished file to reference
    for i, sample in enumerate(args.input):
        if args.verbose:
            print "\n", ht.now(), "Mapping %s to %s reference..."%(os.path.basename(sample), ref_type.upper())
        sample_out = out_dir+"/"+date+"_"+str(i)
        subprocess.call(MAPPING_CMD%(config.get("MAPPING", "THREADS"), sample_out,
                                     MAPPING_REF[ref_type], sample), shell=True)

    #sam-to-hd5
    table, features = ht.load_hdf(ALLELE_HDF, False, 'table', 'features')
    if args.verbose:
        print "\n", ht.now(), "Generating binary hit matrix."

    if is_paired:
        #combine matrices for paired-end mapping
        sample_1 = out_dir+"/"+date+"_0.sam"
        sample_2 = out_dir+"/"+date+"_1.sam"
        pos, etc, desc = ht.sam_to_hdf(sample_1, verbosity=args.verbose)
        binary1 = pos.applymap(bool).applymap(int)
        
        pos2, etc2, desc2 = ht.sam_to_hdf(sample_2, verbosity=args.verbose)
        binary2 = pos2.applymap(bool).applymap(int)
        
        id1 = set(binary1.index)
        id2 = set(binary2.index)