Exemplo n.º 1
0
def fseq(options):
    '''
    Call peaks using F-seq
    '''
    # parse options
    if not which('fseq'):
        sys.exit('Error: No F-seq installed!')
    folder = check_dir(options['<rampagedir>'])
    flength = options['-l']
    wig_flag = options['--wig']
    percent = float(options['-p'])
    with open(os.path.join(folder, 'total_counts.txt'), 'r') as f:
        total = int(f.read().rstrip())
    # run F-seq
    flist = {'+': 'rampage_plus_5end.bed', '-': 'rampage_minus_5end.bed'}
    all_peak_f = os.path.join(folder, 'rampage_peaks.txt')
    with open(all_peak_f, 'w') as out:
        for strand in flist:
            peak_f = run_fseq(folder, flist[strand], strand, flength, wig_flag,
                              percent)
            with open(peak_f, 'r') as f:
                for line in f:
                    if total:  # calculate RPM
                        reads = int(line.rstrip().split()[9])
                        rpm = reads * 1000000.0 / total
                        out.write(line.rstrip() + '\t%f\n' % rpm)
                    else:
                        out.write(line)
Exemplo n.º 2
0
def main():
    # parse options
    options = docopt(__doc__, version=__version__)
    # check output_dir
    if options['-o'] == './':
        dir = os.getcwd()
    else:
        dir = check_dir(options['-o'])
    # fetch junction bed file
    junc_f = fetch_juncfile(options['<bam>'],
                            url=options['--url'],
                            dir=dir,
                            uniq=options['--uniq'],
                            stranded=options['--stranded'],
                            min=int(options['--min-reads']))
    # create junction bigbed file in case
    if options['--bb'] and which('bedToBigBed') is not None:
        prefix = os.path.splitext(os.path.split(options['<bam>'])[-1])[0]
        bamf = pysam.AlignmentFile(options['<bam>'], 'rb')
        with tempfile.NamedTemporaryFile() as chrom_size:
            for seq in bamf.header['SQ']:
                chrom_size.write('%s\t%s\n' % (seq['SN'], seq['LN']))
            chrom_size.seek(0)
            bb_path = os.path.join(dir, prefix + '_junc.bb')
            return_code = os.system('bedToBigBed -type=bed12 %s %s %s' %
                                    (junc_f, chrom_size.name, bb_path)) >> 8
            if return_code:
                sys.exit('Error: cannot convert bed to BigBed!')
        bamf.close()
Exemplo n.º 3
0
def entropy(options):
    '''
    Calculate entropy for each cluster
    '''
    # parse options
    folder = check_dir(options['<rampagedir>'])
    link_f = check_bed(os.path.join(folder, 'rampage_link.bed'),
                       return_handle=False)
    threads = int(options['--thread'])
    with open(os.path.join(folder, 'rampage_peaks.txt'), 'r') as peak:
        result = Parallel(n_jobs=threads)(delayed(cal_entropy)(line, link_f)
                                          for line in peak)
    with open(os.path.join(folder, 'rampage_entropy.txt'), 'w') as out:
        for r in result:
            out.write(r)
Exemplo n.º 4
0
Arquivo: csf.py Projeto: kepbod/stuff
def main():
    # parse options
    options = docopt(__doc__, version=__version__)
    if options['--seq']:
        if not os.path.isfile(options['--seq']):
            sys.exit('Error: wrong seq file!')
        seq = os.path.abspath(options['--seq'])
        seq_flag = True
    else:
        seq = None
        seq_flag = False
    fa = check_fasta(options['--genome'])
    chrom = options['--chrom']
    site = int(options['--site'])
    strand = '+' if options['--strand'] == '1' else '-'
    rlen = int(options['--read-length'])
    alen = int(options['--region-length'])
    clen = int(options['--check-region-length'])
    thread = options['--thread']
    skip_flag = options['--skip-alignment']
    # check output directory
    if not skip_flag:  # not skip alignment
        out_dir = create_dir(options['<out_dir>'])
    else:  # skip alignment
        out_dir = check_dir(options['<out_dir>'])
    # build index for sgRNA
    index_path, offset = build_index(fa, chrom, site, strand, rlen, thread,
                                     out_dir, seq, seq_flag)
    if not skip_flag:  # not skip alignment
        # deal with reads file
        reads = tempfile.NamedTemporaryFile(mode='w+')
        if options['-R']:
            fq_lst = options['-R'].split(',')
            convert_read(reads, single=fq_lst)
        else:
            fq1_lst = options['-1'].split(',')
            fq2_lst = options['-2'].split(',')
            convert_read(reads, fq1=fq1_lst, fq2=fq2_lst)
        reads.seek(0)
        read_path = reads.name
        # mapped reads with bowtie2
        bam = bowtie2_align(index_path, read_path, thread, out_dir)
        # remove tempfile
        reads.close()
    else:
        bam = os.path.join(out_dir, 'cs.bam')
    # fetch cleavage site reads
    fetch_reads(index_path, offset, alen, clen, bam, out_dir)
Exemplo n.º 5
0
def assign_peak(options):
    '''
    Call rampage peaks
    '''
    # parse options
    if options['--ref']:
        db = options['--ref']
        ref_flag = True
    elif options['--db']:
        import gffutils
        db = gffutils.FeatureDB(options['--db'])
        ref_flag = False
    else:
        import gffutils
        gtf_f = options['--gtf']
        prefix = os.path.splitext(os.path.basename(gtf_f))[0]
        db = gffutils.create_db(gtf_f,
                                prefix + '.db',
                                force=True,
                                disable_infer_transcripts=True)
        ref_flag = False
    folder = check_dir(options['<rampagepeak>'])
    rampage = check_bed(os.path.join(folder, 'rampage_link.bed'),
                        return_handle=False)
    rampage_peak = check_bed(os.path.join(folder, 'rampage_peaks.txt'),
                             return_handle=False)
    prom = int(options['--promoter'])
    # align and filter candidate peak
    p = Pool(int(options['--thread']))
    results = []
    for gene_info, gpromoter in parse_gene(db, ref_flag, prom):
        results.append(
            p.apply_async(assign_peak_to_gene,
                          args=(rampage, rampage_peak, gene_info, gpromoter,
                                prom)))
    p.close()
    p.join()
    # output results
    with open(os.path.join(folder, 'rampage_assigned_peaks.txt'), 'w') as outf:
        for r in results:
            gene_info, peak = r.get()
            if gene_info:
                for p in peak:
                    outf.write('%s\t%s\n' % (p, gene_info))
Exemplo n.º 6
0
def fseq(options):
    '''
    Call peaks using F-seq
    '''
    # parse options
    if not which('fseq'):
        sys.exit('Error: No F-seq installed!')
    folder = check_dir(options['<rampagedir>'])
    flength = options['-l']
    percent = [0.95, 0.9, 0.85, 0.8, 0.75, 0.7]
    with open(os.path.join(folder, 'total_counts.txt'), 'r') as f:
        total = int(f.read().rstrip())
    # run F-seq
    flist = {'+': 'rampage_plus_5end.bed', '-': 'rampage_minus_5end.bed'}
    all_peak_f = options['-o']
    with open(all_peak_f, 'w') as out:
        for strand in flist:
            peak_f = run_fseq(folder, flist[strand], strand, flength, percent)
            with open(peak_f, 'r') as f:
                for line in f:
                    if total:
                        out.write(line.rstrip() + '\t%d\n' % total)
                    else:
                        out.write(line)
Exemplo n.º 7
0
def dbloci(options):
    '''
    Fetch bidirectionally transcribed loci
    '''
    # parse options
    folder = check_dir(options['<rampagedir>'])
    size = int(options['-l'])
    filter_flag = options['--filter']
    if filter_flag == 'rpm':
        filter = float(options['--rpm'])
    elif filter_flag == 'height':
        filter = int(options['--height'])
    # fetch rampage pairs
    peak_f = os.path.join(folder, 'rampage_peaks.txt')
    peak_bed = check_bed(peak_f)
    up_cluster = {}
    down_cluster = {}
    pairs = defaultdict(list)
    with open(peak_f, 'r') as peak:
        for ucluster in peak:  # parse upstream cluster
            chrom, _, _, _, _, ustrand, upos = ucluster.split()[:7]
            if ustrand == '+':  # upstream cluster should be minus
                continue
            if filter_cluster(ucluster, filter_flag, filter):
                continue
            uheight = int(ucluster.split()[7])
            up_id = '\t'.join([chrom, upos, ustrand])
            up_cluster[up_id] = uheight
            # parse downstream cluster
            start = int(upos)
            end = start + size * 2
            for dcluster in peak_bed.fetch(chrom, start, end):
                dstrand, dpos = dcluster.split()[5:7]
                if dstrand == '-':  # downstream cluster should be plus
                    continue
                if filter_cluster(dcluster, filter_flag, filter):
                    continue
                dheight = int(dcluster.split()[7])
                down_id = '\t'.join([chrom, dpos, dstrand])
                down_cluster[down_id] = dheight
                # construct pairs
                pairs[up_id].append(down_id)
                pairs[down_id].append(up_id)
    # output enhancers
    outf = os.path.join(folder, 'enhancers.txt')
    with open(outf, 'w') as out:
        for pair_set in fetch_pair(pairs):
            up_site, down_site = 0, 0
            up_height, down_height = 0, 0
            for site_id in pair_set:
                chrom, site, strand = site_id.split()
                if strand == '-':  # upstream
                    height = up_cluster[site_id]
                    if height > up_height:
                        up_site = int(site)
                        up_height = height
                else:  # downstream
                    height = down_cluster[site_id]
                    if height > down_height:
                        down_site = int(site)
                        down_height = height
            middle_site = int((up_site + down_site) / 2)
            forward_plus = cal_density(folder, chrom, middle_site,
                                       middle_site + size, 'plus')
            forward_minus = cal_density(folder, chrom, middle_site,
                                        middle_site + size, 'minus')
            reverse_plus = cal_density(folder, chrom, middle_site - size,
                                       middle_site, 'plus')
            reverse_minus = cal_density(folder, chrom, middle_site - size,
                                        middle_site, 'minus')
            if forward_minus >= forward_plus or reverse_plus >= reverse_minus:
                continue
            else:
                forward = forward_plus
                reverse = reverse_minus
            forward_dis = fetch_dis(folder, chrom, middle_site,
                                    middle_site + size, '+')
            reverse_dis = fetch_dis(folder, chrom, middle_site - size,
                                    middle_site, '-')
            fold = (forward - reverse) * 1.0 / (forward + reverse)
            start = middle_site - size
            end = middle_site + size
            out_format = '%s\t%d\t%d\tenhancer\t0\t+\t%d\t%d\t%d\t%d\t%d\t%f\n'
            out.write(out_format %
                      (chrom, start, end, middle_site, reverse_dis,
                       forward_dis, reverse, forward, fold))