示例#1
0
def mapReads(in_fastq, ref_fasta, out_dir, experiment):
    '''use mapPacBio.sh from bbmap to identify reference sequenecs matched by one or more PacBio reads with no substitutions (indels allowed)'''

    # mapPacBio path (first part gets path to folder running script)
    bbmap_pacbio = (os.path.dirname(
        os.path.realpath(__file__))) + '/bbmap_37_28/mapPacBio.sh'

    # get sample name from input file
    # need to strip off .gz and .fastq extensions sequentially

    sample_name = os.path.splitext(
        os.path.splitext(os.path.basename(in_fastq))[0])[0]
    print('Sample name: ' + sample_name)

    # create output genotyping folder if it doesn't exist
    sample_dir = utils.createOutputFolder(out_dir + '/genotyping/' +
                                          sample_name)

    # create bbmap command
    cmd = [
        bbmap_pacbio, 'in=' + in_fastq, 'ref=' + ref_fasta,
        'covstats=' + sample_dir + '/' + sample_name + '.covstats.tmp.txt',
        'outm=' + sample_dir + '/' + sample_name + '.mapped.bam',
        'outu=' + sample_dir + '/' + sample_name + '.unmapped.fastq.gz',
        'statsfile=' + sample_dir + '/' + sample_name + '.mapping_stats.txt',
        'subfilter=0', 'nzo=t', 'ambiguous=all', 'maxlen=1500', 'minid=0.9',
        'maxindel=10', 'minratio=0.8', 'twocolumn=t', 'ow=t'
    ]

    # print bbmap command
    status.printStatus(' '.join(cmd))

    # call bbmap
    # suppress stats output (saved to file, no need to clutter stderr)
    # FNULL = open(os.devnull, 'w')
    subprocess.call(cmd)
    # FNULL.close()

    # add descriptors to covstats output
    with open(sample_dir + '/' + sample_name + '.covstats.tmp.txt', 'r') as f:
        with open(sample_dir + '/' + sample_name + '.covstats.txt', 'w') as g:
            for idx, line in enumerate(f):
                # print header in first line, otherwise value of sample_name
                if idx == 0:
                    g.write('sample_name' + '\t' + line.rstrip('\n') + '\t' +
                            'ref_fasta\tanalysis_path\texperiment\n')
                else:
                    g.write(sample_name + '\t' + line.rstrip('\n') + '\t' +
                            ref_fasta + '\t' + out_dir + '\t' + experiment +
                            '\n')

    # remove temporary covstats.tmp.txt file after covstats.txt with sample ID prepared
    if os.path.exists(sample_dir + '/' + sample_name + '.covstats.tmp.txt'):
        os.remove(sample_dir + '/' + sample_name + '.covstats.tmp.txt')

    # copy reference file to output folder
    copyfile(ref_fasta, out_dir + '/genotyping/' + os.path.basename(ref_fasta))

    # return covstats file
    return sample_dir + '/' + sample_name + '.covstats.txt'
示例#2
0
def mapReadsFolder(fastq_folder, ref_fasta, out_dir, experiment):
    '''map FASTQ reads to reference for all files in folder and make pivottable from results'''

    # create list to store covstats paths
    covstats = []

    # count number of fastq files that will be processed
    fastq_count = 0
    for filename in os.listdir(fastq_folder):
        if filename.endswith(".fastq.gz"):
            fastq_count += 1

    # run mapReads on FASTQ files in specific folder

    for idx, filename in enumerate(os.listdir(fastq_folder)):
        if filename.endswith(".fastq.gz"):
            # run mapReads for each file
            # return covstats file path - add to covstats list
            status.printStatus('Genotyping FASTQ file ' + str(idx + 1) +
                               ' of ' + str(fastq_count))
            covstats.append(
                mapReads(fastq_folder + '/' + filename, ref_fasta, out_dir,
                         experiment))
            continue
        else:
            continue

    status.printStatus('Make pivot table from: ' + ', '.join(covstats))

    # create pivottable
    pivotTable(covstats, out_dir)
示例#3
0
def makeFastq(ccs_bam):
    '''use smrtlink bam2fq to produce gzip compressed FASTQ file from CCS bam'''
    '''/slipstream/oc/pacbio/smrtlink_v6/smrtcmds/bin/'''

    # path to smrtlink bam2fastq
    smrtlink_bam2fastq_path = '/slipstream/oc/pacbio/smrtlink_v6/smrtcmds/bin/bam2fastq'

    # create fastq output file name
    ccs_basename = os.path.splitext(os.path.basename(ccs_bam))[0]
    fastq_output = os.path.dirname(ccs_bam) + '/' + ccs_basename
    print(fastq_output)

    # call bam2fastq
    cmd = [
        smrtlink_bam2fastq_path,
        ccs_bam,
        '-o',
        fastq_output,
    ]

    status.printStatus('bam2fastq command: ' + ' '.join(cmd))
    status.printStatus('bam2fastq processing of ' + ccs_bam + ' started')
    subprocess.call(cmd)
    status.printStatus('bam2fastq processing of ' + ccs_bam + ' completed')
    status.printStatus('gzip compressed FASTQ file saved to ' + fastq_output +
                       '.fastq.gz')

    # return path to output fastq file
    return fastq_output + '.fastq.gz'
示例#4
0
def importLabkey(df):
    '''import tabular genotypes into https://dholk.primate.wisc.edu/list/dho/gs/grid.view?listId=1630'''

    # make list of records from tabular dataframe
    labkey_data = df.to_dict('records')

    # add to labkey
    x = labkeyInteract.LabkeyInsertRows()
    x.serverContext('/dho/gs/')
    x.labkey_schema = 'lists'
    x.labkey_table = 'pacbio_genotypes'
    x.insertRows(x.labkey_schema, x.labkey_table, labkey_data)

    # log
    status.printStatus(
        str(len(labkey_data)) + ' sample genotypes added to dholk')
def makeCcs(subreads,
            out_dir,
            minPredictedAccuracy='0.9',
            minLength='1000',
            maxLength='1500'):
    '''use smrtlink ccs to produce consensus sequence'''

    # path to smrtlink ccs
    smrtlink_ccs_path = '/slipstream/SMRT4/SMRT/smrtcmds/bin/ccs'

    # check that subreads file exists
    if os.path.exists(subreads) == False:
        status.printStatus(
            'Error: Specified subread file does not exist. Check your file path and try again.'
        )
        return

    # filename of input file
    subreads_basename = os.path.splitext(os.path.basename(subreads))[0]
    print(subreads_basename)

    # create output directory if it doesn't exist
    utils.createOutputFolder(out_dir)

    # call ccs
    cmd = [
        smrtlink_ccs_path, '--minPredictedAccuracy', minPredictedAccuracy,
        '--minLength', minLength, '--maxLength', maxLength, subreads,
        out_dir + '/' + subreads_basename + '.ccs.bam'
    ]

    status.printStatus('CCS command: ' + ' '.join(cmd))
    status.printStatus('CCS processing of ' + subreads + ' started')
    subprocess.call(cmd)
    status.printStatus('CCS processing of ' + subreads + ' completed')
    status.printStatus('Output CCS file saved to ' + out_dir + '/' +
                       subreads_basename + '.ccs.bam')

    # create fastq file
    fastq_path = makeFastq(out_dir + '/' + subreads_basename + '.ccs.bam')
    return fastq_path
示例#6
0
def extractSequenceNames(gzip_fastq):
    '''convert FASTQ to FASTA and then extract sequence names to new file'''

    # path to reformat.sh, update as needed
    bbmap_reformat_sh = '/slipstream/oc/jrcanalysisdata/mhcAnalysis/bbmap/reformat.sh'

    # create temporary whitelist sequence path
    whitelist_sequences = gzip_fastq + '.whitelist.tmp.txt'
    print(whitelist_sequences)

    # create reformat.sh command to convert fastq to fasta
    cmd = [
        bbmap_reformat_sh, 'in=' + gzip_fastq,
        'out=' + whitelist_sequences + '.tmp.fasta'
    ]

    # print bbmap command
    status.printStatus(' '.join(cmd))

    # call reformat.sh
    subprocess.call(cmd)

    # need to remove trailing /ccs from FASTA file
    # use code from https://stackoverflow.com/questions/17140886/how-to-search-and-replace-text-in-a-file-using-python

    with fileinput.FileInput(whitelist_sequences + '.tmp.fasta',
                             inplace=True) as file:
        for line in file:
            print(line.replace('/ccs', ''), end='')

    # extract sequence names to new file
    with open(whitelist_sequences, 'w') as the_file:
        for seq_record in SeqIO.parse(whitelist_sequences + '.tmp.fasta',
                                      "fasta"):
            the_file.write(seq_record.id + '\n')

    # return path to fasta_output
    return whitelist_sequences
示例#7
0
def getSamples(pacbio_id):
    '''retrieve sample information from genotyping Samples table'''

    # get runId corresponding to pacbio_id
    runId = getRunId(pacbio_id)

    # get samples from specified PacBio run
    pacbio_samples = labkeyInteract.LabkeySelectRows()
    pacbio_samples.serverContext('dho/pacbio')
    pacbio_samples.set_filters('run_id', runId)
    result = pacbio_samples.selectRows(labkey_schema='genotyping', labkey_table='Samples')

    # log count of samples in pacbio_id
    status.printStatus(str(result['rowCount']) + ' samples detected in ' + pacbio_id)
    status.printStatus('Barcode configuration')
    # log information on each sample
    print('OC_ID\tForward Barcode\tReverse Barcode')

    samples = {} # initialize samples dictionary

    for i in result['rows']:
        # use oc_id if it exists, otherwise use animal_id to identify sample name
        if i['oc_animal_id'] == None:
            sample_name = i['animal_id']
        else:
            sample_name = i['oc_animal_id']

        # run normalizeBarcodes to create PacBio standard identifiers
        renamed_barcodes = normalizeBarcodes(i)

        # print samples
        print(sample_name + '\t' + renamed_barcodes[0]+ '\t' + renamed_barcodes[1])

        # create dictionary with sample name and barcodes
        samples[sample_name] = [renamed_barcodes[0], renamed_barcodes[1]]

    return samples
示例#8
0
def getRunId(pacbio_id):
    '''inherit pacbio_id (e.g., PacBio48) from parent function and retrieve run identifier'''

    # get Pacbio run ID for specified run
    # necessary because Samples table stores run_id as foreign key lookup to runs table
    # debug modification by JRC 09202018

    pacbio_run_id = labkeyInteract.LabkeySelectRows()
    pacbio_run_id.serverContext('dho/pacbio')
    pacbio_run_id.set_filters('pacbio_id', pacbio_id)
    result = pacbio_run_id.selectRows(labkey_schema='lists', labkey_table='runs')

    # debug modification by JRC 09202018
    print('result is')
    print(result)
    time.sleep(5)
    # extract run number from result
    runNumber = result['rows'][0]['run_num']

    # log whether pacbio_id corresponding to run_id is found
    if runNumber != '':
        status.printStatus(pacbio_id + ' found in dholk.primate.wisc.edu')

    return runNumber
def makeFastq(ccs_bam):
    '''use smrtlink bam2fq to produce gzip compressed FASTQ file from CCS bam'''

    # path to smrtlink bam2fastq
    smrtlink_bam2fastq_path = '/slipstream/SMRT4/SMRT/smrtcmds/bin/bam2fastq'

    # create fastq output file name
    ccs_basename = os.path.splitext(os.path.basename(ccs_bam))[0]
    fastq_output = os.path.dirname(ccs_bam) + '/' + ccs_basename

    # call bam2fastq
    cmd = [
        smrtlink_bam2fastq_path,
        ccs_bam,
        '-o',
        fastq_output,
    ]

    status.printStatus('bam2fastq command: ' + ' '.join(cmd))
    status.printStatus('bam2fastq processing of ' + ccs_bam + ' started')
    subprocess.call(cmd)
    status.printStatus('bam2fastq processing of ' + ccs_bam + ' completed')
    status.printStatus('gzip compressed FASTQ file saved to ' + fastq_output +
                       '.fastq.gz')

    # return path to output fastq file
    return fastq_output + '.fastq.gz'


# if __name__ == '__main__':  # if run directly from the command line
#     # command line parameters
#     import argparse
#     parser = argparse.ArgumentParser()
#     parser.add_argument("out_dir", help='Folder that will store all output files')
#     parser.add_argument("--subreads", required=True,
#                         help='Path to file of PacBio subreads. Will be converted to CCS file.')
#     parser.add_argument("--ccsMinAccuracy", required=False,
#                         help='Set minPredicted accuracy (from 0-1) for retaining CCS reads. Default=0.9. Recommend 0.999 for de novo allele discovery.')
#     parser.add_argument("--ccsMinLength", required=False,
#                         help='Set minLength in bp for retaining CCS reads. Default=1000. Set to minimum expected amplicon size.')
#     parser.add_argument("--ccsMaxLength", required=False,
#                         help='Set maxLength in bp for retaining CCS reads. Default=1500. Set to minimum expected amplicon size.')
#     args = parser.parse_args()
#
#     # make output folder if it doesn't exist
#     utils.createOutputFolder(args.out_dir)
#
#     # configure log to stdout
#     logging.basicConfig(filename=args.out_dir + '/log.txt', filemode='w', level=logging.DEBUG,
#                         format='%(asctime)s %(message)s', datefmt='%Y-%m-%d %I:%M:%S')
#
#     # run with command line parameters
#     d = {}
#     d['subreads'] = args.subreads
#     d['out_dir'] = args.out_dir
#     if args.ccsMinAccuracy is not None: d['minPredictedAccuracy'] = args.ccsMinAccuracy
#     if args.ccsMinLength is not None: d['minLength'] = args.ccsMinLength
#     if args.ccsMaxLength is not None: d['maxLength'] = args.ccsMaxLength
#
#     # log command line
#     status.printStatus('Command line statment: ' + ' '.join(sys.argv))
#
#     # run makeCcs function
#     makeCcs(**d)

# # test invokation
#     d = {}
#     d['subreads'] = '/slipstream/pacbio/pacbio_raw/pacbio48/3_C01/m54178_170519_124037.subreads.bam'
#     d['out_dir'] = '/slipstream/shared_data/19070/pacbio48-default-minQuality/11'
#     d['minPredictedAccuracy'] = '0.9'
#     d['minLength'] = '1000'
#     d['maxLength'] = '1500'
#
#     # log command line
#     status.printStatus('CCS paramaeters: ' + d)
#
#     # run makeCcs function
#     makeCcs(**d)

# test bam2fastq
# makeFastq('/slipstream/shared_data/19070/pacbio48//20170604082124/ccs//m54178_170519_124037.subreads.ccs.bam')
示例#10
0
    env["TEXTTEST_TMP"] = os.path.join(options.rootDir, env["FILEPREFIX"]+"texttesttmp")
    env["TEXTTEST_HOME"] = os.path.join(options.rootDir, options.testsDir)
    if "SUMO_HOME" not in env:
        env["SUMO_HOME"] = os.path.join(os.path.dirname(__file__), '..', '..')
    shutil.rmtree(env["TEXTTEST_TMP"], True)
    if not os.path.exists(env["SUMO_REPORT"]):
        os.makedirs(env["SUMO_REPORT"])
    for name in ["dfrouter", "duarouter", "jtrrouter", "netconvert", "netgenerate", "od2trips", "sumo", "polyconvert", "sumo-gui", "activitygen"]:
        binary = os.path.join(options.rootDir, options.binDir, name + programSuffix + ".exe")
        if name == "sumo-gui":
            if os.path.exists(binary):
                env["GUISIM_BINARY"] = binary
        elif os.path.exists(binary):
            env[name.upper()+"_BINARY"] = binary
    log = open(testLog, 'w')
    # provide more information than just the date:
    nameopt = " -name %sr%s" % (date.today().strftime("%d%b%y"), svnrev)
    if options.sumoExe == "meso":
        runInternalTests.runInternal(programSuffix, "-b "+env["FILEPREFIX"]+nameopt, log)
    else:
        subprocess.call("texttest.py -b "+env["FILEPREFIX"]+nameopt, stdout=log, stderr=subprocess.STDOUT, shell=True)
    subprocess.call("texttest.py -a sumo.gui -b "+env["FILEPREFIX"]+nameopt, stdout=log, stderr=subprocess.STDOUT, shell=True)
    subprocess.call("texttest.py -b "+env["FILEPREFIX"]+" -coll", stdout=log, stderr=subprocess.STDOUT, shell=True)
    ago = datetime.datetime.now() - datetime.timedelta(50)
    subprocess.call('texttest.py -s "batch.ArchiveRepository session='+env["FILEPREFIX"]+' before=%s"' % ago.strftime("%d%b%Y"),
                    stdout=log, stderr=subprocess.STDOUT, shell=True)
    log.close()
    log = open(statusLog, 'w')
    status.printStatus(makeLog, makeAllLog, env["TEXTTEST_TMP"], env["SMTP_SERVER"], log)
    log.close()
示例#11
0
def main(options, platform="x64"):
    env["FILEPREFIX"] = options.msvc_version + options.suffix + platform
    prefix = os.path.join(options.remoteDir, env["FILEPREFIX"])
    makeLog = prefix + "Release.log"
    makeAllLog = prefix + "Debug.log"
    testLog = prefix + "Test.log"
    testDebugLog = prefix + "DebugTest.log"
    statusLog = prefix + "status.log"
    log_handler = status.set_rotating_log(makeLog)

    status.killall(("", "D"), BINARIES)
    toClean = []
    for ext in ("*.exe", "*.ilk", "*.pdb", "*.py", "*.pyd", "*.dll", "*.lib", "*.exp", "*.jar", "*.manifest", "*.fmu"):
        toClean += glob.glob(os.path.join(SUMO_HOME, "bin", ext))
    toClean += glob.glob(os.path.join(SUMO_HOME, "tools", "lib*", "*lib*"))
    toClean += glob.glob(os.path.join(SUMO_HOME, "share", "*", "*"))
    for f in toClean:
        try:
            os.remove(f)
        except Exception:
            pass
    for d in (glob.glob(os.path.join(SUMO_HOME, "bin", "osgPlugins*")) +
              glob.glob(os.path.join(SUMO_HOME, "tools", "*.egg-info"))):
        shutil.rmtree(d, ignore_errors=True)
    for d in glob.glob(os.path.join(SUMO_HOME, "docs", "*")):
        if os.path.basename(d) in ('examples', 'javadoc', 'man', 'pydoc', 'tutorial', 'userdoc'):
            shutil.rmtree(d, ignore_errors=True)

    status.printLog("Running %s build using python %s." % (options.msvc_version, sys.version))
    gitrev = repositoryUpdate(options)
    generator = "Visual Studio " + ("12 2013" if options.msvc_version == "msvc12" else "16 2019")
    buildDir = generateCMake(generator, platform, options.suffix == "extra", options.python)
    ret = status.log_subprocess(["cmake", "--build", ".", "--config", "Release"], cwd=buildDir)
    status.log_subprocess(["cmake", "--build", ".", "--config", "Release", "--target", "lisum"], cwd=buildDir)
    status.log_subprocess(["cmake", "--build", ".", "--config", "Release", "--target", "userdoc", "examples"],
                          cwd=buildDir)
    status.log_subprocess(["cmake", "--install", "."], cwd=buildDir)
    plat = platform.lower().replace("x", "win")
    if options.msvc_version != "msvc16":
        plat += options.msvc_version
    for d in glob.glob(os.path.join(buildDir, "sumo-*")):
        if os.path.isdir(d):
            installDir = d
    installBase = os.path.basename(installDir)
    binaryZip = os.path.join(buildDir, "sumo-%s%s-%s" % (plat, options.suffix, installBase[5:]))
    if ret == 0:
        try:
            for f in (glob.glob(os.path.join(SUMO_HOME, "*.md")) +
                      [os.path.join(SUMO_HOME, n) for n in ("AUTHORS", "ChangeLog", "LICENSE")]):
                shutil.copy(f, installDir)
            if options.suffix == "extra":
                shutil.copy(os.path.join(SUMO_HOME, "build", "wix", "gpl-2.0.txt"), os.path.join(installDir, "LICENSE"))
            for f in glob.glob(os.path.join(SUMO_HOME, "bin", "*.jar")):
                shutil.copy(f, os.path.join(installDir, "bin"))
            shutil.copytree(os.path.join(SUMO_HOME, "docs"), os.path.join(installDir, "docs"),
                            ignore=shutil.ignore_patterns('web'))
            shutil.copy(os.path.join(buildDir, "src", "version.h"), os.path.join(installDir, "include"))
            status.printLog("Creating sumo.zip.")
            shutil.make_archive(binaryZip, 'zip', buildDir, installBase)
            shutil.copy(binaryZip + ".zip", options.remoteDir)
            status.printLog("Creating sumo.msi.")
            if options.suffix == "extra":
                wix.buildMSI(binaryZip + ".zip", binaryZip + ".msi",
                             license=os.path.join(SUMO_HOME, "build", "wix", "gpl-2.0.rtf"))
            else:
                wix.buildMSI(binaryZip + ".zip", binaryZip + ".msi")
            shutil.copy(binaryZip + ".msi", options.remoteDir)
        except Exception as ziperr:
            status.printLog("Warning: Could not zip to %s.zip (%s)!" % (binaryZip, ziperr))

    gameZip = os.path.join(buildDir, "sumo-game-%s%s-%s.zip" % (plat, options.suffix, installBase[5:]))
    status.printLog("Creating sumo-game.zip.")
    try:
        status.log_subprocess(["cmake", "--build", ".", "--target", "game"], cwd=buildDir)
        shutil.move(os.path.join(buildDir, "sumo-game.zip"), gameZip)
        shutil.copy(gameZip, options.remoteDir)
    except Exception as e:
        status.printLog("Warning: Could not create nightly sumo-game.zip! (%s)" % e)

    debug_handler = status.set_rotating_log(makeAllLog, log_handler)
    ret = status.log_subprocess(["cmake", "--build", ".", "--config", "Debug"], cwd=buildDir)
    if ret == 0:
        debugZip = os.path.join(buildDir, "sumo-%s%sDebug-%s.zip" % (plat, options.suffix, installBase[5:]))
        status.printLog("Creating sumoDebug.zip.")
        try:
            with zipfile.ZipFile(debugZip, 'w', zipfile.ZIP_DEFLATED) as zipf:
                for ext in ("*D.exe", "*.dll", "*D.pdb"):
                    for f in glob.glob(os.path.join(SUMO_HOME, "bin", ext)):
                        zipf.write(f, os.path.join(installBase, "bin", os.path.basename(f)))
            shutil.copy(debugZip, options.remoteDir)
        except IOError as ziperr:
            status.printLog("Warning: Could not zip to %s (%s)!" % (debugZip, ziperr))

    log_handler = status.set_rotating_log(testLog, debug_handler)
    status.printLog("Running tests.")
    runTests(options, env, gitrev)
    with open(statusLog, 'w') as log:
        status.printStatus(makeLog, makeAllLog, env["SMTP_SERVER"], log, testLog=testLog)
    if not options.x64only:
        debug_handler = status.set_rotating_log(testDebugLog, log_handler)
        status.printLog("Running debug tests.")
        runTests(options, env, gitrev, "D")
        with open(prefix + "Dstatus.log", 'w') as log:
            status.printStatus(makeAllLog, testDebugLog, env["SMTP_SERVER"], log, testLog=testDebugLog)
示例#12
0
    # provide more information than just the date:
    nameopt = " -name %sr%s" % (date.today().strftime("%d%b%y"), svnrev)
    if options.sumoExe == "meso":
        runInternalTests.runInternal(programSuffix,
                                     "-b " + env["FILEPREFIX"] + nameopt, log)
    else:
        subprocess.call("texttest.py -b " + env["FILEPREFIX"] + nameopt,
                        stdout=log,
                        stderr=subprocess.STDOUT,
                        shell=True)
    subprocess.call("texttest.py -a sumo.gui -b " + env["FILEPREFIX"] +
                    nameopt,
                    stdout=log,
                    stderr=subprocess.STDOUT,
                    shell=True)
    subprocess.call("texttest.py -b " + env["FILEPREFIX"] + " -coll",
                    stdout=log,
                    stderr=subprocess.STDOUT,
                    shell=True)
    ago = datetime.datetime.now() - datetime.timedelta(50)
    subprocess.call('texttest.py -s "batch.ArchiveRepository session=' +
                    env["FILEPREFIX"] + ' before=%s"' % ago.strftime("%d%b%Y"),
                    stdout=log,
                    stderr=subprocess.STDOUT,
                    shell=True)
    log.close()
    log = open(statusLog, 'w')
    status.printStatus(makeLog, makeAllLog, env["TEXTTEST_TMP"],
                       env["SMTP_SERVER"], log)
    log.close()
示例#13
0
#!/usr/bin/env python

import json, yaml, argparse, httpd, status


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Check status script')
    parser.add_argument('--json', const='json', default='print', dest='output_format', action='store_const', help='return the status as json')
    parser.add_argument('--www', const=True, default=False, dest='start_httpd', action='store_const', help='return the status as json')
    args = parser.parse_args()

    if args.start_httpd:
        print 'Starting webserver...'
        httpd.LocalStatusHttpd() 
    else:
        if args.output_format == 'json':
            print status.getServicesStatus() 
        else:
            data = json.loads(status.getServicesStatus()) 
            for svc in data:
                print "{} ({}): {}".format(svc['service'], svc['port'], status.printStatus(svc['status']))
示例#14
0
def runLongAmpliconAnalysis(subreadsetXML,
                            whitelistSequences,
                            outputPrefix,
                            minLength='1000',
                            maxLength='1500',
                            maxReads='20000',
                            maxClusteringReads='5000'):
    '''run SMRT Link v5 long amplicon analysis'''

    # runs LAA to generate amplicon sequences from PacBio Sequel data
    # subreadsetXML can be from a single dataset, or merged datasets where new XML files are created using dataset create
    # whitelistFasta is a file containing sequences that will be analyzed by LAA, typically sequences from a single sample
    # defaults are set for typical MHC class I genotyping and should be adjusted depending on target
    # note: LAA default minLength=3000 will cause most of our analyses to fail so minLength should almost always be set
    # increasing maxClusteringReads will allow more alleles to be detected at the expense of speed:
    # LAA default of 500 clustering reads runs each sample in ~2 minutes, MHC class I default of 10000 takes ~30 minutes
    # but detects more alleles. Setting even higher values like 100,000 clustering reads causes runtimes of several hours.
    # maxReads can be set very high to ensure that all reads are used to accurately define clusters. This doesn't significantly
    # impact runtime.

    # use outputPrefix to specify the folder and prefix for output files
    # eg '/slipstream/shared_data/19364/09/'
    # eg '/slipstream/shared_data/19364/09/BM115.

    # path to SMRT Link v6.0 LAA
    laa_path = '/slipstream/oc/pacbio/smrtlink_v6/smrtcmds/bin/laa'

    # create output folder if it doesn't exist
    utils.createOutputFolder(os.path.dirname(outputPrefix))

    # create laa command
    laa_cmd = [
        laa_path, '--whitelist=' + whitelistSequences,
        '--logFile=' + outputPrefix + '.log.txt',
        '--resultFile=' + outputPrefix + '.amplicon_analysis.fastq',
        '--junkFile=' + outputPrefix +
        '.amplicon_analysis_chimeras_noise.fastq',
        '--reportFile=' + outputPrefix + '.amplicon_analysis_summary.csv',
        '--inputReportFile=' + outputPrefix + '.amplicon_analysis_input.csv',
        '--subreadsReportPrefix=' + outputPrefix +
        '.amplicon_analysis_subreads', subreadsetXML
    ]
    print(laa_cmd)
    #               '--minLength=' + minLength,
    #               '--maxLength=' + maxLength,
    #               '--maxReads=' + maxReads,
    #               '--maxClusteringReads=' + maxClusteringReads,'--whitelist=' + whitelistSequences,
    #               '--logFile=' + outputPrefix + '.log.txt',
    #               '--resultFile=' + outputPrefix + '.amplicon_analysis.fastq',
    #               '--junkFile=' + outputPrefix + '.amplicon_analysis_chimeras_noise.fastq',
    #               '--reportFile=' + outputPrefix + '.amplicon_analysis_summary.csv',
    #               '--inputReportFile=' + outputPrefix + '.amplicon_analysis_input.csv',
    #               '--subreadsReportPrefix=' + outputPrefix + '.amplicon_analysis_subreads',
    #               subreadsetXML]
    #    laa_cmd = [laa_path,
    #               '--minLength=' + minLength,
    #               '--maxLength=' + maxLength,
    #               '--maxReads=' + maxReads,
    #               '--maxClusteringReads=' + maxClusteringReads,
    #               '--whitelist=' + whitelistSequences,
    #               '--logFile=' + outputPrefix + '.log.txt',
    #               '--resultFile=' + outputPrefix + '.amplicon_analysis.fastq',
    #               '--junkFile=' + outputPrefix + '.amplicon_analysis_chimeras_noise.fastq',
    #               '--reportFile=' + outputPrefix + '.amplicon_analysis_summary.csv',
    #               '--inputReportFile=' + outputPrefix + '.amplicon_analysis_input.csv',
    #               '--subreadsReportPrefix=' + outputPrefix + '.amplicon_analysis_subreads',
    #               subreadsetXML]

    # print laa command
    status.printStatus(' '.join(laa_cmd))

    # call laa
    subprocess.call(laa_cmd)

    # return path to LAA fastq output
    return outputPrefix + 'amplicon_analysis.fastq'
示例#15
0
def parseBarcodes(samples, input_ccs_fastq, out_dir):
    '''parse barcodes from gzip-compressed FASTQ of PacBio CCS reads'''

    # create output directory if it doesn't exist
    utils.createOutputFolder(out_dir)

    # create PacBio barcode dictionary to lookup against
    pacbioLookup = pacbioBarcodeDict()

    # create dictionary of sample IDs and barcode sequences
    searchDict = {}
    for seq_name, barcode_seqs in samples.items():
        searchDict[seq_name] = [pacbioLookup[barcode_seqs[0]], pacbioLookup[barcode_seqs[1]]]

    # open gzip-compressed FASTQ
    with gzip.open(input_ccs_fastq, "rt") as handle:

        # make dictionary to hold barcode-split seq records
        perBarcodeDict = {}

        # initialize dictionary with names of each sample
        for j in searchDict:
            perBarcodeDict[j]=[]

        # log every 1000 sequences processed
        log_every_n = 1000

        # iterate through generator containing FASTQ sequences
        for idx, i in enumerate(SeqIO.parse(handle, "fastq")):

            # print status message every 1000 sequences processed
            if (idx % log_every_n) == 0:
                status.printStatus(str(idx) + ' FASTQ reads demultiplexed')

            # for each sequence, look for the presence of barcodes at the start and end
            for j in searchDict:
                # redo to use re.search to find barcodes not at very end of sequence
                # if i.seq.startswith(searchDict[j][0]) and i.seq.endswith(searchDict[j][1]):

                # regular expression to find barcodes in forward orientation
                prog = re.compile(searchDict[j][0] + ('.*') + searchDict[j][1])

                # test if regular expression is found in sequence
                # need to cast i.seq to string to use re.search

                if prog.search(str(i.seq)):
                    # write matching barcodes to perBarcodeDict - store in memory
                    x = perBarcodeDict[j]
                    x.append(i)
                    perBarcodeDict[j]= x

                # handle inserts in the opposite orientation
                # create Biopython sequence object containing barcode sequences
                forward_seq = Seq(searchDict[j][0])
                reverse_seq = Seq(searchDict[j][1])

                # reverse complement
                forward_seq_rc = forward_seq.reverse_complement()
                reverse_seq_rc = reverse_seq.reverse_complement()

                # find FASTQ sequences matching reverse complemented barcodes
                # if i.seq.startswith(forward_seq_rc) and i.seq.endswith(reverse_seq_rc):

                # because of the SMRTBell orientation, second barcode gets listed first in reverse complement orientation
                prog = re.compile(str(reverse_seq_rc) + '.*' + str(forward_seq_rc))

                # need to cast i.seq to string to use re.search
                if prog.search(str(i.seq)):
                    # store matches in dictionary
                    x = perBarcodeDict[j]
                    x.append(i)
                    perBarcodeDict[j]= x

        # write output files containing reads matching each barcode
        for i in perBarcodeDict:
            count = SeqIO.write(perBarcodeDict[i], out_dir + '/' + i + '.fastq', 'fastq')

            # compress fastq file and remove uncompressed version

            with open(out_dir + '/' + i + '.fastq', 'rb') as f_in:
                with gzip.open(out_dir + '/' + i + '.fastq.gz', 'wb') as f_out:
                    shutil.copyfileobj(f_in, f_out)

            os.remove(out_dir + '/' + i + '.fastq') # remove uncompressed

            # log
            status.printStatus(str(count) + ' barcoded reads saved from sample ' + i )
            status.printStatus('gzip-compressed demultipled FASTQ file saved to ' + out_dir + '/' + i + '.fastq.gz')
示例#16
0
        print("Warning: Could not create nightly sumo-game.zip! (%s)" % e, file=log)
    log.close()
    with open(makeAllLog, 'a') as log:
        subprocess.call(["cmake", "--build", ".", "--config", "Debug"],
                        cwd=buildDir, stdout=log, stderr=subprocess.STDOUT)
    if sumoAllZip:
        try:
            debugZip = sumoAllZip.replace("-all-", "Debug-%s-" % env["FILEPREFIX"])
            zipf = zipfile.ZipFile(debugZip, 'w', zipfile.ZIP_DEFLATED)
            debugDllPath = os.path.join(options.rootDir, "..", "debugDll")
            if platform == "x64":
                debugDllPath += "64"
            for dllPath in (os.path.join(options.rootDir, dllDir), debugDllPath):
                for f in glob.glob(os.path.join(dllPath, "*.dll")) + glob.glob(os.path.join(dllPath, "*", "*.dll")):
                    zipf.write(f, os.path.join(binDir, f[len(dllPath) + 1:]))
            for f in (glob.glob(os.path.join(options.rootDir, options.binDir, "*D.exe")) +
                      glob.glob(os.path.join(options.rootDir, options.binDir, "*D.pdb"))):
                zipf.write(f, os.path.join(binDir, os.path.basename(f)))
            zipf.close()
        except IOError as ziperr:
            (errno, strerror) = ziperr.args
            print("Warning: Could not zip to %s!" % binaryZip, file=log)
            print("I/O error(%s): %s" % (errno, strerror), file=log)
    runTests(options, env, gitrev, options.extended_tests and platform == "x64")
    with open(statusLog, 'w') as log:
        status.printStatus(makeLog, makeAllLog, env["SMTP_SERVER"], log)
if options.extended_tests:
    runTests(options, env, gitrev, True, "D")
    with open(prefix + "Dstatus.log", 'w') as log:
        status.printStatus(makeAllLog, makeAllLog, env["SMTP_SERVER"], log)
示例#17
0
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument("out_dir",
                        help='Folder that will store all output files')
    parser.add_argument(
        "fastq_folder",
        help='Path to folder containing FASTQ files to genotype')
    parser.add_argument(
        "ref_fasta", help='Path to reference FASTA file to map reads against')
    parser.add_argument("experiment", help='Experiment number')
    args = parser.parse_args()

    # make output folder if it doesn't exist
    utils.createOutputFolder(args.out_dir)

    # configure log to stdout
    logging.basicConfig(filename=args.out_dir + '/log.txt',
                        filemode='w',
                        level=logging.DEBUG,
                        format='%(asctime)s %(message)s',
                        datefmt='%Y-%m-%d %I:%M:%S')

    # log command line
    status.printStatus('Command line statment: ' + ' '.join(sys.argv))

    # map reads and summarize results
    mapReadsFolder(args.fastq_folder, args.ref_fasta, args.out_dir,
                   args.experiment)

    # example invokation
    # anaconda3/bin/python /slipstream/shared_data/pycharm/dhogal/19070/genotyping.py /slipstream/shared_data/19070/pacbio48-default-minQuality/16/ /slipstream/shared_data/19070/pacbio48-default-minQuality/12/fastq/ /slipstream/shared_data/19070/pacbio48-default-minQuality/ipd-mhc-20170523.fasta
示例#18
0
                for f in glob.glob(os.path.join(dllPath, "*.dll")) + glob.glob(
                        os.path.join(dllPath, "*", "*.dll")):
                    zipf.write(f, os.path.join(binDir, f[len(dllPath) + 1:]))
            buildDir = os.path.dirname(
                os.path.join(options.rootDir, options.project))
            for f in glob.glob(
                    os.path.join(options.rootDir, options.binDir, "*D.exe")):
                exe = os.path.basename(f)
                pdb = exe[:-3] + "pdb"
                zipf.write(f, os.path.join(binDir, exe))
                if platform == "x64":
                    pdbPath = os.path.join(buildDir, exe[:-5], "x64", "Debug",
                                           pdb)
                else:
                    pdbPath = os.path.join(buildDir, exe[:-5], "Debug", pdb)
                if os.path.exists(pdbPath):
                    zipf.write(pdbPath, os.path.join(binDir, pdb))
            zipf.close()
        except IOError as ziperr:
            (errno, strerror) = ziperr.args
            print("Warning: Could not zip to %s!" % binaryZip, file=log)
            print("I/O error(%s): %s" % (errno, strerror), file=log)
    runTests(options, env, gitrev)
    log = open(statusLog, 'w')
    status.printStatus(makeLog, makeAllLog, env["SMTP_SERVER"], log)
    log.close()
runTests(options, env, gitrev, "D")
log = open(prefix + "Dstatus.log", 'w')
status.printStatus(makeAllLog, makeAllLog, env["SMTP_SERVER"], log)
log.close()
示例#19
0
def main():

    runLog.logger.info("Starting GCO.py")
    egpg = easygopigo3.EasyGoPiGo3(
        use_mutex=True)  # Create an instance of the EasyGoPiGo3 class
    # Adjust GOPIGO3 CONSTANTS to my bot   default EasyGoPiGo3.WHEEL_DIAMETER = 66.5 mm
    myconfig.setParameters(egpg)
    ds = myDistSensor.init(egpg)
    tp = tiltpan.TiltPan(egpg)

    tp.tiltpan_center()

    dist_list_mm = []
    at_angle_list = []
    scan360speed = 150
    safe_distance = 20.32  # cm  8 inches wheels to wall/object
    ds_to_wheels = 7  # cm    distance sensor is 2.75 inches in front of wheels
    try:
        #  spin360 taking distance measurement
        print("\n360 degree scan  at speed={}".format(scan360speed))
        dist_list_mm, at_angle_list = scan360.spin_and_scan(
            egpg, ds, tp, 360,
            speed=scan360speed)  # spin taking distance readings
        range_list_cm = [dist / 10 for dist in dist_list_mm]
        printmaps.view360(
            range_list_cm,
            at_angle_list)  # print view (all r positive, theta 0=left
        print("Readings:{}".format(len(at_angle_list)))

        sleep(3)

        #  spin to face closest object
        dist_to_target, scan_angle_to_target = closest_obj(
            range_list_cm, at_angle_list)
        angle_to_target = scan_angle_to_target - 90  # adjust for 0=left
        print("\nClosest object is {:.1f} cm at {:.0f} degrees".format(
            dist_to_target, angle_to_target))

        sleep(3)

        print("\nTurning {:.0f} at {} dps to face closest object".format(
            angle_to_target, egpg.get_speed()))
        egpg.turn_degrees(angle_to_target)

        sleep(3)

        #  travel to point where wheels are 10 inches from object (will back up if too close)
        dist_to_guard_spot = dist_to_target + ds_to_wheels - safe_distance
        print("\nMoving {:.0f} cm to guard spot".format(dist_to_guard_spot))
        egpg.drive_cm(dist_to_guard_spot)

        sleep(3)

        #  perform a 160 degree scan with obj in the center
        #  spin 180 to face away from object
        print("\nTurning 180 to guard direction")
        egpg.turn_degrees(180)

        sleep(3)

        #  loop
        #  perform a quick 160 degree scan
        #  if something gets closer, wag head and announce "I saw that."
        while True:
            dist_l, angl_l = servoscan.ds_map(ds, tp, num_of_readings=72)
            printmaps.view180(dist_l,
                              angl_l,
                              grid_width=80,
                              units="cm",
                              ignore_over=230)
            #  turn  distance sensor (eyes) to face closest object
            dist_to_closest, scan_angle_to_closest = closest_obj(
                dist_l, angl_l)
            angle_to_closest = scan_angle_to_closest  # - 90   # adjust for 0=left
            print("\nClosest object is {:.1f} cm at {:.0f} degrees".format(
                dist_to_closest, angle_to_closest))
            print("\nPointing {:.0f} to face closest object".format(
                angle_to_closest))
            tp.pan(angle_to_closest)
            sleep(2)
            status.printStatus(egpg, ds)
            sleep(30)
            tp.tiltpan_center()


#            status.batterySafetyCheck()

    except KeyboardInterrupt:  # except the program gets interrupted by Ctrl+C on the keyboard.
        egpg.stop()  # stop motors
        runLog.logger.info("Exiting GCO.py")
        print("Ctrl-C detected - Finishing up")
    egpg.stop()