def output(pas_dict,scan_file,out,window,max_shift,species):
    
    extend  = int(window/2)
    
    f = open(scan_file,'r')
    lines = f.readlines()
    
    ww = open(out,'w')

    for i,line in enumerate(lines):
        line = line.rstrip('\n')
        pas_id,rpm,base = line.split('\t')
        chromosome,pos,strand = pas_id.split(':')
        pos = int(pos)
        if pos in pas_dict.keys():
            pas_type = pas_dict[pos]
            symbol   = 'unknown'
            if(i-extend>0 and i+extend+1<len(lines)):
                for j in range(-max_shift,max_shift+1):
                    k = i+j
                    start = k-extend
                    end   = k+extend
                    if(start>0 and end+1<len(lines)):
                        if(check(lines[start],lines[end],window)):
                            collpase(pas_id,pas_type,symbol,lines[start:end+1],ww,species,j)
    ww.close()
    f.close()
Пример #2
0
def output(pas_array,scan_file,out,window,max_shift,species,prob,number_pas,rst):
    
    extend  = int(window/2)
    
    f = open(scan_file,'r')
    lines = f.readlines()
    
    
    ww = open(out,'w')

    pre_pos = 0
    negative_candidate = dict()
    for i,line in enumerate(lines):
        line = line.rstrip('\n')
        pas_id,rpm,base = line.split('\t')
        chromosome,pos,strand = pas_id.split(':')
        pos = int(pos)
        if(i-extend>0 and i+extend+1+max_shift<len(lines)):
            if(random.random()<prob):
                accept = 1
                if (abs(pos-pre_pos)<Threshold):
                    continue
                for true_pos in pas_array:
                    if(abs(pos-true_pos)<Threshold):
                        accept = 0
                if(accept==0):
                    continue 
                        
                pre_pos = pos
                negative_candidate[pas_id] = i
    count = 0
    items = list(negative_candidate.items())
    random.shuffle(items)
    for pas_id,i in items:
        start = i-extend
        end   = i+extend
        if(not check(lines[start-max_shift],lines[end+max_shift],window+2*max_shift)):
            continue
        success = collpase(pas_id,'unknown','unknown',lines[start:end+1],ww,species,0,rst)
        count += success
        if(success==0):
            continue
        for j in range(-max_shift,max_shift+1):
            if(j==0):
                continue
            k = i+j
            start = k-extend
            end   = k+extend
            if(start>0 and end+1<len(lines)):
                if(check(lines[start],lines[end],window)):
                    collpase(pas_id,'unknown','unknown',lines[start:end+1],ww,species,j,rst)
        if(count>=number_pas):
            break
    if(count<number_pas):
        raise Warning("not engough negative candidates, please incerase the probability for selecting!")
    else:
        print("successfully randomly get same number of negative pas as ground truth")
    f.close()