def multithreadedJaccardBed(*bedSortedPairs):
    # bedSortedPairs = jaccardBed()
    jaccardScores = dict()
    errorBedSortedPairs = []
    try:
        if (os.stat(bedSortedPairs[0][0]).st_size >= os.stat(
                bedSortedPairs[0][1]).st_size):
            print('true')
            file1 = BedTool(bedSortedPairs[0][0])
            file2 = BedTool(bedSortedPairs[0][1])
            jaccardResults = BedTool.jaccard(file1, file2)
            jaccardScore = jaccardResults['jaccard']
            # jaccardScore = 0
            scoredPair = (file1, file2)
            jaccardScores[str(scoredPair)] = jaccardScore
        else:
            print('yuck')
            file1 = BedTool(bedSortedPairs[0][1])
            file2 = BedTool(bedSortedPairs[0][0])
            jaccardResults = BedTool.jaccard(file1, file2)
            jaccardScore = jaccardResults['jaccard']
            # jaccardScore = 0
            scoredPair = (file1, file2)
            jaccardScores[str(scoredPair)] = jaccardScore
        '''
        jaccardResults = BedTool.jaccard(file1, file2)
        jaccardScore = jaccardResults['jaccard']
        scoredPair = (file1, file2)
        jaccardScores[str(scoredPair)] = jaccardScore
        '''
    except Exception as error:
        if (os.stat(bedSortedPairs[0][0]).st_size >= os.stat(
                bedSortedPairs[0][1]).st_size):
            print('ok!')
            print('***')
            print(bedSortedPairs[0][0])
            print(bedSortedPairs[0][1])
            print('***')
        else:
            print('youch!')
            print('-----')
            print(bedSortedPairs[0][0])
            print(bedSortedPairs[0][1])
            print('-----')
        errorBedSortedPair = [file1, file2]
        errorBedSortedPairs.append(errorBedSortedPair)
        print('error')
        print(error)

    return jaccardScores, errorBedSortedPairs
Exemplo n.º 2
0
def run_jaccard(fileA, fileB, genomefile):
    """ Running bedtools. Reads in two bedtools approved file types, sorts the files, 
    and calculates a jaccard score.
    """
    a = BedTool(fileA)
    a = a.sort(g=genomefile)
    b = BedTool(fileB)
    b = b.sort(g=genomefile)
    j = a.jaccard(b, g=genomefile)
    j["fileA"] = fileA.split("/")[-1]
    j["fileB"] = fileB.split("/")[-1]
    keylist = list(j.keys())
    keylist.sort()
    data = [str(j[key]) for key in keylist]
    return (data, keylist)
def multithreadedJaccardBed(*bedSortedPairs):
    # bedSortedPairs = jaccardBed()
    jaccardScores = dict()
    errorBedSortedPairs = []
    try:
        file1 = BedTool(bedSortedPairs[0][0])
        file2 = BedTool(bedSortedPairs[0][1])
        jaccardResults = BedTool.jaccard(file1, file2)
        jaccardScore = jaccardResults['jaccard']
        scoredPair = (file1, file2)
        jaccardScores[str(scoredPair)] = jaccardScore
    except Exception as error:
        errorBedSortedPair = [file1, file2]
        errorBedSortedPairs.append(errorBedSortedPair)
        # print('error')
        # print(error)

    return jaccardScores, errorBedSortedPairs
def sequentialJaccardBed():
    sequential_time = time.time()
    bedSortedPairs = jaccardBed()
    jaccardScores = []
    errorBedSortedPairs = []

    for pair in bedSortedPairs:
        try:
            file1 = BedTool(pair[0])
            file2 = BedTool(pair[1])
            jaccardResults = BedTool.jaccard(file1, file2)
            jaccardScores.append(jaccardResults['jaccard'])
        except Exception as error:
            errorBedSortedPair = [file1, file2]
            errorBedSortedPairs.append(errorBedSortedPair)
            # print('error')
            # print(error)

    print('Sequentially calculating jaccard scores --- %.2f seconds ---' %
          (time.time() - sequential_time))
Exemplo n.º 5
0
cons_bedgraph = BedTool('/vol1/opt/data/hg19.100way.phyloP100way.bg.gz')

header_fields = ['#cell.type.1','cell.type.2','type','jaccard','mean.cons']
print '\t'.join(header_fields)

for fname1, fname2 in combinations(subset, r=2):

    ctype1 = get_cell_type(fname1)
    ctype2 = get_cell_type(fname2)

    tool1 = BedTool(fname1)
    tool2 = BedTool(fname2)

    # jaccard statistic
    result = tool1.jaccard(tool2, f=0.5, r=False)    
    stat = result['jaccard']

    # conservation measurement
    result1 = tool1.intersect(tool2, v=True)    
    result2 = tool2.intersect(tool1, v=True)    

    cons1 = result1.map(cons_bedgraph, o='mean', c=4)
    cons2 = result2.map(cons_bedgraph, o='mean', c=4)

    pbd.set_trace()

    fields = [ctype1, ctype2, stat]
    print '\t'.join(map(str, fields))

Exemplo n.º 6
0
subset = random.sample(filenames, 10)

cons_bedgraph = BedTool('/vol1/opt/data/hg19.100way.phyloP100way.bg.gz')

header_fields = ['#cell.type.1', 'cell.type.2', 'type', 'jaccard', 'mean.cons']
print '\t'.join(header_fields)

for fname1, fname2 in combinations(subset, r=2):

    ctype1 = get_cell_type(fname1)
    ctype2 = get_cell_type(fname2)

    tool1 = BedTool(fname1)
    tool2 = BedTool(fname2)

    # jaccard statistic
    result = tool1.jaccard(tool2, f=0.5, r=False)
    stat = result['jaccard']

    # conservation measurement
    result1 = tool1.intersect(tool2, v=True)
    result2 = tool2.intersect(tool1, v=True)

    cons1 = result1.map(cons_bedgraph, o='mean', c=4)
    cons2 = result2.map(cons_bedgraph, o='mean', c=4)

    pbd.set_trace()

    fields = [ctype1, ctype2, stat]
    print '\t'.join(map(str, fields))
Exemplo n.º 7
0
def score(onlyfiles):
	for bed in onlyfiles:
		a = BedTool(mypath+bed)
		jac=BedTool.jaccard(userfile.sort(),a)
		if jac['jaccard']>jaccard:
			scores[bed]=jac['jaccard']