示例#1
0
def getProblems(data, txn=None):
    if 'genome' not in data:
        data['genome'] = getGenome(data, txn=txn)

    problems = db.Problems(data['genome'])

    problemsInBounds = problems.getInBounds(data['ref'], data['start'],
                                            data['end'])

    if problemsInBounds is None:
        problemsPath = os.path.join(cfg.jbrowsePath, cfg.dataPath, 'genomes',
                                    data['genome'], 'problems.bed')

        if not os.path.exists(problemsPath):
            location = Hubs.generateProblems(data['genome'], problemsPath)
            if not location == problemsPath:
                raise Exception

        problemsDf = pd.read_csv(problemsPath, sep='\t', header=None)
        problemsDf.columns = problemColumns
        problems.put(problemsDf, txn=txn)

        problemsIsInBounds = problemsDf.apply(db.checkInBounds,
                                              axis=1,
                                              args=(data['ref'], data['start'],
                                                    data['end']))

        return problemsDf[problemsIsInBounds].to_dict('records')
    else:
        return problemsInBounds.to_dict('records')
示例#2
0
def generateProblems(genome, path, txn=None):
    genesUrl = "%s%s/database/" % (cfg.geneUrl, genome)
    genomePath = os.path.join(path, 'genomes', genome)
    outputFile = os.path.join(genomePath, 'problems.bed')

    if db.Problems.has_key(genome):
        return outputFile

    if not os.path.exists(genomePath):
        try:
            os.makedirs(genomePath)
        except OSError:
            return

    files = []

    for file in ['chromInfo', 'gap']:
        outputPath = os.path.join(genomePath, file + '.txt')
        fileUrl = genesUrl + file + '.txt.gz'
        files.append(downloadAndUnpackFile(fileUrl, outputPath))

    chromInfo = pd.read_csv(files[0], sep='\t', header=None).iloc[:, 0:2]
    chromInfo.columns = ['chrom', 'bases']

    gap = pd.read_csv(files[1], sep='\t', header=None).iloc[:, 1:4]
    gap.columns = ['chrom', 'gapStart', 'gapEnd']

    join = gap.merge(chromInfo, on='chrom', how='outer')

    nan = join.isnull()['gapStart' and 'gapEnd']
    nonNan = join.notnull()['gapStart' and 'gapEnd']

    nanOutput = join[nan].groupby(['chrom']).apply(createNanProblems)

    nonNanOutput = join[nonNan].groupby(['chrom']).apply(createProblems)

    frames = [nonNanOutput, nanOutput]

    output = pd.concat(frames, sort=False)

    # Removes all entries with an _, not needed because these are genome "Fixes"
    output = output[~output['chrom'].str.contains('_')]

    output['chromStart'] = output['chromStart'].astype(int)
    output['chromEnd'] = output['chromEnd'].astype(int)

    db.Problems(genome).put(output, txn=txn)

    output.to_csv(outputFile, sep='\t', index=False, header=False)

    return outputFile
示例#3
0
def getProblemsForChrom(genome, chrom, txn=None):

    problems = db.Problems(genome).get(txn=txn)

    return problems[problems['chrom'] == chrom].copy()