示例#1
0
def replace_results(randoms_old, randoms_replace, directory, suffix,
                    replace_suffix, new_suffix):
    rold = load_enhancers(randoms_old)
    rreplace = load_enhancers(randoms_replace)
    if rreplace == []:
        return
    e2 = rreplace.pop(0)
    lines = []
    for e in rold:
        if e.id == e2.id:
            lines.append(True)
            if rreplace != []:
                e2 = rreplace.pop(0)
        else:
            lines.append(False)
    for filename in glob(directory + "/*" + suffix):
        frep = open(filename + replace_suffix)
        fnew = open(filename + new_suffix)
        for i, line in enumerate(open(filename)):
            if lines[i]:
                fnew.write(frep.readline())
            else:
                fnew.write(line)
        frep.close()
        fnew.close()
def replace_randoms(randoms_old, randoms_replace, randoms_new):
    rold = load_enhancers(randoms_old)
    rreplace = load_enhancers(randoms_replace)
    rnew = []
    if rreplace == []:
        return
    e2 = rreplace.pop(0)
    for e in rold:
       if e.id == e2.id:
           rnew.append(e2)
           if rreplace != []:
               e2 = rreplace.pop(0)
       else:
           rnew.append(e)
    
    save_enhancers(rnew, randoms_new)
示例#3
0
def replace_randoms(randoms_old, randoms_replace, randoms_new):
    rold = load_enhancers(randoms_old)
    rreplace = load_enhancers(randoms_replace)
    rnew = []
    if rreplace == []:
        return
    e2 = rreplace.pop(0)
    for e in rold:
        if e.id == e2.id:
            rnew.append(e2)
            if rreplace != []:
                e2 = rreplace.pop(0)
        else:
            rnew.append(e)

    save_enhancers(rnew, randoms_new)
示例#4
0
def count_all_means(enhancers_file, directories, name):
    enhancers = load_enhancers(enhancers_file)
    print "nr of sequences:", len(enhancers)
    names = [str(x.id) for x in enhancers]
    try:
        fails_file = open(DATAPATH + "%s.fails" % (name), "a")
    except:
        fails_file = open(DATAPATH + "%s.fails" % (name), "w+")
        fails_file.write('\t'.join([' '] + names) + "\n")

    print "saving fails number to %s%s.fails" % (DATAPATH, name)
    n = len(names)
    result = [0] * n

    filenames = []
    for directory in directories:
        filenames += glob(directory + "/*.bigWig") + glob(directory + "/*.bw")

    random.shuffle(filenames)
    for filename in filenames:
        fails = count_mean_signal(enhancers, filename, name)
        if fails != []:
            fails_file.write("\t".join([filename.split('/')[-1]] +
                                       map(str, fails)) + "\n")
    fails_file.close()
示例#5
0
def count_all_means(enhancers_file, directories, name):
    enhancers = load_enhancers(enhancers_file)
    #print len(enhancers)
    names = [str(x.id) for x in enhancers]
    try:
        fails_file = open(DATAPATH + "%s.fails" % (name), "a")
    except:
        fails_file = open(DATAPATH + "%s.fails" % (name), "w+")
        fails_file.write('\t'.join([' '] + names) + "\n")

    print "saving fails number to %s%s.fails" % (DATAPATH, name)
    n = len(names)
    result = [0] * n

    filenames = []
    for directory in directories:
        filenames += glob(directory + "/*.bigWig") + glob(directory + "/*.bw")

    pool = ThreadPool(N_pools)
    fail_list = pool.map(lambda x: count_mean_signal(enhancers, x, name),
                         filenames)
    pool.close()

    for filename, fails in zip(filenames, fail_list):
        if fails != []:
            fails_file.write("\t".join([filename.split('/')[-1]] +
                                       map(str, fails)) + "\n")
def replace_results(randoms_old, randoms_replace, directory, suffix, replace_suffix, new_suffix):
    rold = load_enhancers(randoms_old)
    rreplace = load_enhancers(randoms_replace)
    if rreplace == []:
        return
    e2 = rreplace.pop(0)
    lines = []
    for e in rold:
       if e.id == e2.id:
           lines.append(True)
           if rreplace != []:
               e2 = rreplace.pop(0)
       else:
           lines.append(False)
    for filename in glob(directory + "/*" + suffix):
       frep = open(filename + replace_suffix)
       fnew = open(filename + new_suffix)
       for i, line in enumerate(open(filename)):
           if lines[i]:
              fnew.write(frep.readline()) 
           else:
              fnew.write(line)
       frep.close()
       fnew.close()
def change_random(randoms_file, to_remove):
    enhancers = load_enhancers(randoms_file)
    record_dict = SeqIO.index(home+"data/female.hg19.fa", "fasta")
    to_return = []
    for e in enhancers:
        if e.id not in to_remove:
            continue
        while True: 
            l = e.end - e.start
            start = random.randint(0,chr_lens[e.chromosome]-l-1)
            if record_dict[e.chromosome].seq[start:(start+l)].find('N') == -1:
                to_return.append(Position(e.chromosome, start, start + l, e.id, False, []))
                break
            else:
                print e.chromosome
                
    save_enhancers(to_return, randoms_file+".change")
    return to_return
示例#8
0
def change_random(randoms_file, to_remove):
    enhancers = load_enhancers(randoms_file)
    record_dict = SeqIO.index(home + "data/female.hg19.fa", "fasta")
    to_return = []
    for e in enhancers:
        if e.id not in to_remove:
            continue
        while True:
            l = e.end - e.start
            start = random.randint(0, chr_lens[e.chromosome] - l - 1)
            if record_dict[e.chromosome].seq[start:(start +
                                                    l)].find('N') == -1:
                to_return.append(
                    Position(e.chromosome, start, start + l, e.id, False, []))
                break
            else:
                print e.chromosome

    save_enhancers(to_return, randoms_file + ".change")
    return to_return
示例#9
0
from ..shared import *
import load_vista

import sys

if __name__ == '__main__':

    if len(sys.argv) < 4:
        print "USAGE: add_sequences.py TYPE inputfile outputfile [start_id] [tissue]"
        print "Genome file: %s%s, files_directory: %s" % (
            DATAPATH, GENOME_FILE, DATAPATH)
        sys.exit(1)

    if len(sys.argv) == 6:
        tissue = [sys.argv[5]]
        positive = True
    else:
        tissue = []
        positive = False

    if sys.argv[1] == 'bed':
        positions = load_vista.load_bed(sys.argv[2],
                                        positive,
                                        tissue,
                                        startid=int(sys.argv[4]))
        load_vista.add_seqs(positions, sys.argv[3])
    else:  #vista type
        positions = load_vista.load_enhancers(sys.argv[2])
        load_vista.add_seqs(positions, sys.argv[3])
示例#10
0
                           x.end))
                    overlaps.append(x.id)
                    break

    overlaps = list(set(overlaps))
    return overlaps


if __name__ == "__main__":
    dist = True
    if len(sys.argv) < 4:
        print "USAGE: overlaps.py from_file with_db tissue output"
        print "output_directory: %s" % (DATAPATH)
        sys.exit(1)

    file1 = sys.argv[1]
    db = sys.argv[2]
    tissue = sys.argv[3]
    outname = sys.argv[4]

    remove = sorted(
        check_overlap(load_enhancers(file1), load_target(db, tissue, False)))
    print remove

    with open(DATAPATH + outname, 'a+') as f:

        #remove = [x  for x in remove]
        #print "removing %d"% len(set(remove))
        f.write("\n".join([str(x) for x in remove]) + "\n")
    f.close()
import sys








if  __name__ =='__main__':
    
    if len(sys.argv) < 4:
        print "USAGE: add_sequences.py TYPE inputfile outputfile [start_id] [tissue]"
        print "Genome file: %s%s, files_directory: %s"% (DATAPATH, GENOME_FILE, DATAPATH)
        sys.exit(1)
    
    if len(sys.argv) == 6:
        tissue = [sys.argv[5]]
        positive = True
    else:
        tissue = []
        positive = False
    
    if sys.argv[1] == 'bed':
        positions = load_vista.load_bed(sys.argv[2], positive, tissue, startid=int(sys.argv[4]))
        load_vista.add_seqs(positions, sys.argv[3])
    else: #vista type
        positions = load_vista.load_enhancers(sys.argv[2])
        load_vista.add_seqs(positions, sys.argv[3])
        
示例#12
0
    overlaps=list(set(overlaps))
    return overlaps
 
 
 

if __name__ == "__main__": 
    dist = True
    if len(sys.argv) < 4:
        print "USAGE: overlaps.py from_file with_db tissue output"
        print "output_directory: %s"% ( DATAPATH)
        sys.exit(1)
    
    file1 = sys.argv[1]
    db = sys.argv[2]
    tissue = sys.argv[3]
    outname = sys.argv[4]
    
    
    remove = sorted(check_overlap(load_enhancers(file1), load_target(db, tissue, False)))
    print remove
    
    with open(DATAPATH+outname, 'a+') as f:
        
        #remove = [x  for x in remove]
        #print "removing %d"% len(set(remove))
        f.write("\n".join([str(x) for x in remove])+"\n")
    f.close()