def make_T_top_regions(signal_table_path, top=1000):
    '''
    makes a gff and fasta of the top N T regions based off the signal table
    '''

    signal_table = utils.parseTable(signal_table_path, '\t')

    signal_dict = defaultdict(float)
    for line in signal_table[1:]:

        signal = (max(float(line[2]) - float(line[3]), 0) +
                  max(float(line[4]) - float(line[5]), 0)) / 2
        signal_dict[line[1]] = signal

    signal_vector = [signal_dict[line[1]] for line in signal_table[1:]]
    signal_order = utils.order(signal_vector, decreasing=True)

    t_top_gff_path = '%sCH22_T_UNION_TOP_%s_-0_+0.gff' % (gffFolder, str(top))
    print(t_top_gff_path)
    t_top_gff = []
    for i in range(top):
        signal_row = signal_order[i] + 1
        line = signal_table[signal_row]
        region_id = line[1]
        chrom = region_id.split('(')[0]
        coords = region_id.split(':')[-1].split('-')
        gff_line = [
            chrom, region_id, '', coords[0], coords[1], '', '.', '', region_id
        ]
        t_top_gff.append(gff_line)

    utils.unParseTable(t_top_gff, t_top_gff_path, '\t')

    t_top_fasta = utils.gffToFasta('HG19', genomeDirectory, t_top_gff)
    t_top_fasta_path = '%sHG19_CH22_T_UNION_TOP_%s_-0_+0.fasta' % (fastaFolder,
                                                                   top)
    utils.unParseTable(t_top_fasta, t_top_fasta_path, '')

    return t_top_fasta_path
Exemplo n.º 2
0
import sys
sys.path.append('/storage/cylin/bin/pipeline/')
import utils
import re


gff_path = '/storage/cylin/grail/projects/rasmc_all/gff/rasmc_h3k27ac_0_tss_all_subpeak.gff'
genome_directory='/storage/cylin/grail/genomes/Rattus_norvegicus/UCSC/rn6/Sequence/Chromosomes/'

genome = 'RN6'

print('gffToFasta Tool running on ' + gff_path + ' for ' + genome)
fasta = utils.gffToFasta(genome,genome_directory,gff_path,UCSC=True,useID=False)


print('Creating density table')
table=[]
header=['DENSITY','POSITIONS','POS_COUNT','SUBPEAK_LENGTH']
table.append(header)

#CArG box motif
seq='CC[AT]{6}GG'

table_path='/storage/cylin/grail/projects/rasmc_all/motif_density/CArG_box_seq_density_from_fasta_full_length_no_slash.txt'




for i in range(0,len(fasta),2):
    positions=[]
    line=fasta[i+1]