Exemplo n.º 1
0
def findPriorResults(pairedEnd, resultsFolder, projectId, maplambda=False):
    '''Looks for all result files in the results folder.'''
    priors = {}
    steps = []
    if pairedEnd:
        steps = STEP_ORDER['pe']
    else:
        steps = STEP_ORDER['se']

    for step in steps:
        for fileToken in STEPS[step]['results'].keys():
            fid = dxencode.find_file(resultsFolder +
                                     STEPS[step]['results'][fileToken],
                                     project=projectId,
                                     recurse=False)
            if fid != None:
                priors[fileToken] = fid
            elif maplambda and step.find('trim') > -1:
                ## giant kludge
                folder = resultsFolder.rstrip('/lambda')
                fid = dxencode.find_file(folder +
                                         STEPS[step]['results'][fileToken],
                                         project=projectId,
                                         recurse=False)
                if fid != None:
                    priors[fileToken] = fid
    return priors
Exemplo n.º 2
0
def checkRunsPreviouslyLaunched(resultsFolder,projectId):
    '''Checks for currently running jobs and will exit if found.'''
    launchFilePath = resultsFolder + '/' + RUNS_LAUNCHED_FILE
    launchFids = dxencode.find_file(launchFilePath,projectId,multiple=True)
    if launchFids == None:
        print "  No prior jobs launched."
    else:
        # NOTE: Appending to the one file, but just in case handle multiple files.
        for fid in launchFids:
            with dxpy.open_dxfile(fid) as fd:
                for line in fd:
                    #print "Looking for job ["+line+"]"
                    runId = line.split(None,1)
                    if not runId[0].startswith('analysis-'):
                        continue
                    analysis = dxpy.DXAnalysis(dxid=runId[0])
                    if analysis == None:
                        continue
                    state = analysis.describe()['state']
                    # states I have seen: in_progress, terminated, done, failed
                    if state not in [ "done", "failed", "terminated" ]:
                        msg="Exiting: Can't launch because prior run ["+runId[0]+"] "
                        if len(runId) > 1:
                            msg+="("+runId[1]+") "
                        msg+= "has not finished (currently '"+state+"')."
                        print msg
                        sys.exit(1)
                    else:
                        msg="  Prior run ["+runId[0]+"] "
                        if len(runId) > 1:
                            msg+="("+runId[1]+") "
                        msg+= "is '"+state+"'."
                        print msg
Exemplo n.º 3
0
def checkRunsPreviouslyLaunched(resultsFolder, projectId):
    '''Checks for currently running jobs and will exit if found.'''
    launchFilePath = resultsFolder + '/' + RUNS_LAUNCHED_FILE
    launchFids = dxencode.find_file(launchFilePath, projectId, multiple=True)
    if launchFids == None:
        print "  No prior jobs launched."
    else:
        # NOTE: Appending to the one file, but just in case handle multiple files.
        for fid in launchFids:
            with dxpy.open_dxfile(fid) as fd:
                for line in fd:
                    #print "Looking for job ["+line+"]"
                    runId = line.split(None, 1)
                    if not runId[0].startswith('analysis-'):
                        continue
                    analysis = dxpy.DXAnalysis(dxid=runId[0])
                    if analysis == None:
                        continue
                    state = analysis.describe()['state']
                    # states I have seen: in_progress, terminated, done, failed
                    if state not in ["done", "failed", "terminated"]:
                        msg = "Exiting: Can't launch because prior run [" + runId[
                            0] + "] "
                        if len(runId) > 1:
                            msg += "(" + runId[1] + ") "
                        msg += "has not finished (currently '" + state + "')."
                        print msg
                        sys.exit(1)
                    else:
                        msg = "  Prior run [" + runId[0] + "] "
                        if len(runId) > 1:
                            msg += "(" + runId[1] + ") "
                        msg += "is '" + state + "'."
                        print msg
def parse_map_report(folder, project):

    mapreport = "/*_bismark_map_report.txt"
    report_link = dxencode.find_file(folder + mapreport,
                                     project.get_id(),
                                     recurse=False)

    metrics = {}
    res = {}
    for lab in labels:
        res[lab] = re.compile("(%s):\s+(.+)" % lab)

    try:
        with dxpy.open_dxfile(report_link) as rfd:
            for line in rfd:
                m = False
                for metric in res.values():
                    m = metric.match(line)
                    if m:
                        metrics.update({m.group(1): m.group(2).strip()})
                        continue

    except Exception, e:
        print "ERROR: Could not read Bismark mapping report in %s (%s) \n%s" % (
            folder, report_link, e)
def get_fastqc(accession, project):
    summary_fn = accession+"_summary.txt"
    report_fn = accession+"_data.txt"

    summary_link = dxencode.find_file(summary_fn, project.get_id())
    report_link = dxencode.find_file(report_fn, project.get_id())

    metrics = {}
    try:
        with dxpy.open_dxfile(report_link) as rfd:
            total = re.compile('Total Sequences\s+(\d+)')
            for line in rfd:
                m = total.match(line)
                if m:
                    metrics.update({ 'Total Sequences': m.group(1) })
    except Exception, e:
        print "ERROR: Could not read FastQC summary: %s (%s) \n%s" % (summary_fn, summary_link, e)
        metrics.update({'Total Sequences': -999.999 })
Exemplo n.º 6
0
def findReferenceFiles(refs, priors,refLoc,extras):
    '''Locates all reference files based upon gender, genome and annotation.'''
    #TODO move to module?  Have to determine dx file structure.
    refLoc=refLoc+'/'+extras['genome']
    for ref in refs:
        dxfile = refLoc+'/'+GENOME_REFERENCES[ref][extras['genome']][extras['gender']]
        fid = dxencode.find_file(dxfile,REF_PROJECT_DEFAULT)
        if fid == None:
            sys.exit("ERROR: Unable to locate DNA Methylation ref file: '" + dxfile + "'")
        else:
            priors[ref] = fid
Exemplo n.º 7
0
def findPriorResults(pairedEnd,resultsFolder,projectId, maplambda=False):
    '''Looks for all result files in the results folder.'''
    priors = {}
    steps = []
    if pairedEnd:
        steps = STEP_ORDER['pe']
    else:
        steps = STEP_ORDER['se']

    for step in steps:
        for fileToken in STEPS[step]['results'].keys():
            fid = dxencode.find_file(resultsFolder + STEPS[step]['results'][fileToken],project=projectId, recurse=False)
            if fid != None:
                priors[fileToken] = fid
            elif maplambda and step.find('trim') > -1:
                ## giant kludge
                folder = resultsFolder.rstrip('/lambda')
                fid = dxencode.find_file(folder + STEPS[step]['results'][fileToken],project=projectId, recurse=False)
                if fid != None:
                    priors[fileToken] = fid
    return priors
Exemplo n.º 8
0
def findReferenceFiles(refs, priors, refLoc, extras):
    '''Locates all reference files based upon gender, genome and annotation.'''
    #TODO move to module?  Have to determine dx file structure.
    refLoc = refLoc + '/' + extras['genome']
    for ref in refs:
        dxfile = refLoc + '/' + GENOME_REFERENCES[ref][extras['genome']][
            extras['gender']]
        fid = dxencode.find_file(dxfile, REF_PROJECT_DEFAULT)
        if fid == None:
            sys.exit("ERROR: Unable to locate DNA Methylation ref file: '" +
                     dxfile + "'")
        else:
            priors[ref] = fid
Exemplo n.º 9
0
def logThisRun(runId,resultsFolder,projectId):
    '''Adds a runId to the runsLaunched file in resultsFolder.'''
    # NOTE: DX manual lies?!  Append not possible?!  Then write new/delete old
    launchFilePath = resultsFolder + '/' + RUNS_LAUNCHED_FILE
    oldFid = dxencode.find_file(launchFilePath,projectId)
    newFh = dxpy.new_dxfile('a',project=projectId,folder=resultsFolder,name=RUNS_LAUNCHED_FILE)
    newFh.write(runId+' started:'+str(datetime.now())+'\n')
    if oldFid is not None:
        with dxpy.open_dxfile(oldFid) as oldFh:
            for oldRunId in oldFh:
                newFh.write(oldRunId+'\n')
        proj = dxpy.DXProject(projectId)
        proj.remove_objects([oldFid])
    newFh.close()
Exemplo n.º 10
0
def logThisRun(runId, resultsFolder, projectId):
    '''Adds a runId to the runsLaunched file in resultsFolder.'''
    # NOTE: DX manual lies?!  Append not possible?!  Then write new/delete old
    launchFilePath = resultsFolder + '/' + RUNS_LAUNCHED_FILE
    oldFid = dxencode.find_file(launchFilePath, projectId)
    newFh = dxpy.new_dxfile('a',
                            project=projectId,
                            folder=resultsFolder,
                            name=RUNS_LAUNCHED_FILE)
    newFh.write(runId + ' started:' + str(datetime.now()) + '\n')
    if oldFid is not None:
        with dxpy.open_dxfile(oldFid) as oldFh:
            for oldRunId in oldFh:
                newFh.write(oldRunId + '\n')
        proj = dxpy.DXProject(projectId)
        proj.remove_objects([oldFid])
    newFh.close()
Exemplo n.º 11
0
def parse_map_report(folder, project):

    mapreport = "/*_bismark_map_report.txt"
    report_link = dxencode.find_file(folder + mapreport, project.get_id(), recurse=False)

    metrics = {}
    res = {}
    for lab in labels:
        res[lab] = re.compile("(%s):\s+(.+)" % lab)

    try:
        with dxpy.open_dxfile(report_link) as rfd:
            for line in rfd:
                m = False
                for metric in res.values():
                    m = metric.match(line)
                    if m:
                        metrics.update({m.group(1): m.group(2).strip()})
                        continue

    except Exception, e:
        print "ERROR: Could not read Bismark mapping report in %s (%s) \n%s" % (folder, report_link, e)