def testJsonSubmitjob():
    print "Running testJsonSubmitjob "
    # write out a temporary file with our query/dependencies
    query = tempfile.NamedTemporaryFile(delete=False)
    name = query.name
    queryFile = "pig-" + str(time.time()) + ".q"
    query.write("fs -copyFromLocal %s %s; cmd = load '%s'; dump cmd;" % (queryFile, queryFile, queryFile))
    query.close()

    # read it back in as base64 encoded binary
    query = open(name, "rb")
    contents = base64.b64encode(query.read())
    print contents
    query.close()
    os.unlink(name)

    payload = (
        """
    {  
        "jobInfo":
        {
            "jobName": "PIG-JOB-TEST",
            "description": "This is a test", 
            "userName" : "genietest", 
            "groupName" : "hadoop", 
            "jobType": "pig", 
            "configuration": "prod",
            "schedule": "adHoc",
            "cmdArgs": "-f """
        + queryFile
        + '''",
            "attachments": {
                "data": "'''
        + contents
        + '''",
                "name": "'''
        + queryFile
        + """"
            }
        }
    }
    """
    )
    print payload
    print "\n"
    return jobs.submitJob(serviceUrl, payload)
示例#2
0
def testXmlSubmitjob():
    print "Running testXmlSubmitjob"
    payload = '''
    <request>
      <jobInfo>
        <jobID>''' + jobID + '''</jobID>
        <jobName>HADOOP-FS-CLIENT-TEST</jobName>
        <userName>genietest</userName>
        <groupName>hadoop</groupName>
        <userAgent>laptop</userAgent>
        <jobType>hadoop</jobType>
        <schedule>adHoc</schedule>
        <cmdArgs>fs -ls /</cmdArgs>
      </jobInfo>
    </request>
    '''
    print payload
    return jobs.submitJob(serviceUrl, payload, 'application/xml')
示例#3
0
def testXmlSubmitjob():
    print "Running testXmlSubmitjob"
    payload = '''
    <request>
      <jobInfo>
        <jobID>''' + jobID + '''</jobID>
        <jobName>HADOOP-FS-CLIENT-TEST</jobName>
        <userName>genietest</userName>
        <groupName>hadoop</groupName>
        <userAgent>laptop</userAgent>
        <jobType>hadoop</jobType>
        <configuration>prod</configuration>
        <schedule>adHoc</schedule>
        <cmdArgs>fs -ls /</cmdArgs>
      </jobInfo>
    </request>
    '''
    print payload
    return jobs.submitJob(serviceUrl, payload, 'application/xml')
示例#4
0
def testJsonSubmitjob():
    print "Running testJsonSubmitjob "
    payload = '''
    {
        "jobInfo":
        {
            "jobName": "HADOOP-JOB-TEST", 
            "userName" : "genietest", 
            "groupName" : "hadoop", 
            "userAgent" : "laptop",
            "jobType": "hadoop", 
            "schedule": "adHoc",
            "cmdArgs":"jar hadoop-examples.jar sleep -m 1 -mt 1", 
            "fileDependencies":"''' + GENIE_TEST_PREFIX + '''/hadoop-examples.jar"
        }
    }
    '''
    print payload
    print "\n"
    return jobs.submitJob(serviceUrl, payload)
示例#5
0
def testJsonSubmitjob():
    print "Running testJsonSubmitjob "
    payload = '''
    {
        "jobInfo":
        {
            "jobName": "HADOOP-JOB-TEST", 
            "userName" : "blahgenieamsharma", 
            "groupName" : "hadoop", 
            "userAgent" : "laptop",
            "jobType": "hadoop", 
            "schedule": "ADHOC",
            "clusterName": "h2query",
            "cmdArgs":"jar hadoop-examples.jar pi 50 10", 
            "fileDependencies":"''' + GENIE_TEST_PREFIX + '''/hadoop-examples.jar"
        }
    }
    '''
    print payload
    print "\n"
    return jobs.submitJob(serviceUrl, payload)
示例#6
0
def testJsonSubmitjob():
    print "Running testJsonSubmitjob "
    payload = '''
    {
        "jobInfo":
        {
            "jobName": "HADOOP-JOB-TEST", 
            "userName" : "genietest", 
            "groupName" : "hadoop", 
            "userAgent" : "laptop",
            "jobType": "hadoop", 
            "schedule": "adHoc",
            "clusterName": "h2query",
            "cmdArgs":"jar hadoop-examples.jar sleep -m 1 -mt 1", 
            "fileDependencies":"''' + GENIE_TEST_PREFIX + '''/hadoop-examples.jar"
        }
    }
    '''
    print payload
    print "\n"
    return jobs.submitJob(serviceUrl, payload)
示例#7
0
def hiveSubmitJob():
    print "Running hiveSubmitJob"
    payload = '''
    {  
        "jobInfo":
        {
            "jobName": "HIVE-KILL-TEST", 
            "userName" : "genietest", 
            "groupName" : "hadoop", 
            "userAgent" : "laptop",
            "jobType": "hive", 
            "configuration": "prod",
            "schedule": "adHoc",
            "cmdArgs": "-f hive.q",
            "fileDependencies":"''' + GENIE_TEST_PREFIX + '''/hive.q"
        }
    }
    '''
    print payload
    print "\n"
    return jobs.submitJob(serviceUrl, payload)          
示例#8
0
def testXmlSubmitjob():
    print "Running testXmlSubmitjob"
    payload = (
        """
    <request>
      <jobInfo>
        <jobID>"""
        + jobID
        + """</jobID>
        <jobName>HADOOP-FS-CLIENT-TEST</jobName>
        <userName>genietest</userName>
        <groupName>hadoop</groupName>
        <userAgent>laptop</userAgent>
        <jobType>hadoop</jobType>
        <schedule>adHoc</schedule>
        <cmdArgs>fs -ls /</cmdArgs>
      </jobInfo>
    </request>
    """
    )
    print payload
    return jobs.submitJob(serviceUrl, payload, "application/xml")
def testJsonSubmitjob():
    print "Running testJsonSubmitjob "
    payload = '''
    {
        "jobInfo":
        {
            "jobName": "PIG-CLIENT-NO-UUID-JSON", 
            "userName" : "genietest", 
            "groupName" : "DSE", 
            "userAgent" : "laptop",
            "jobType": "pig", 
            "configuration": "prod", 
            "schedule": "adHoc",
            "cmdArgs":"-f pig.q",
            "pigVersion": "0.11",
            "fileDependencies":"''' + GENIE_TEST_PREFIX + '''/pig.q"
        }
    }
    '''
    print payload
    print "\n"
    return jobs.submitJob(serviceUrl, payload)
示例#10
0
def testJsonSubmitjob():
    print "Running testJsonSubmitjob "
    payload = '''
    {
        "jobInfo":
        {
            "jobName": "PIG-CLIENT-NO-UUID-JSON", 
            "userName" : "genietest", 
            "groupName" : "DSE", 
            "userAgent" : "laptop",
            "jobType": "pig", 
            "configuration": "prod", 
            "schedule": "adHoc",
            "cmdArgs":"-f pig.q",
            "pigVersion": "0.11",
            "fileDependencies":"''' + GENIE_TEST_PREFIX + '''/pig.q"
        }
    }
    '''
    print payload
    print "\n"
    return jobs.submitJob(serviceUrl, payload)
示例#11
0
def testJsonSubmitjob():
    print "Running testJsonSubmitjob "
    payload = '''
    {
        "jobInfo":
        {
            "jobName": "PIG-JOB-TEST", 
            "userName" : "amsharma", 
            "groupName" : "hadoop", 
            "userAgent" : "laptop",
            "jobType": "pig", 
            "configuration": "prod", 
            "schedule": "adHoc",
            "clusterName": "h2query",
            "cmdArgs":" -f pig2.q", 
            "fileDependencies":"''' + GENIE_TEST_PREFIX + '''/pig2.q"
        }
    }
    '''
    print payload
    print "\n"
    return jobs.submitJob(serviceUrl, payload)
示例#12
0
def testJsonSubmitjob():
    print "Running testJsonSubmitjob "
    payload = '''
    {  
        "jobInfo":
        {
            "jobName": "HIVE-JOB-TEST",
            "description": "This is a test", 
            "userName" : "amsharma", 
            "groupName" : "hadoop", 
            "jobType": "hive", 
            "configuration": "prod",
            "schedule": "ADHOC",
            "clusterName": "h24query",
            "cmdArgs": "-f hive.q",
            "fileDependencies":"''' + GENIE_TEST_PREFIX + '''/hive.q"
        }
    }
    '''
    print payload
    print "\n"
    return jobs.submitJob(serviceUrl, payload)
示例#13
0
def testJsonSubmitjob():
    print "Running testJsonSubmitjob "
    payload = '''
    {  
        "jobInfo":
        {
            "jobName": "HIVE-JOB-TEST",
            "description": "This is a test", 
            "userName" : "genietest", 
            "groupName" : "hadoop", 
            "jobType": "hive", 
            "configuration": "prod",
            "schedule": "adHoc",
            "clusterName": "h2query",
            "cmdArgs": "-f hive.q",
            "fileDependencies":"''' + GENIE_TEST_PREFIX + '''/hive.q"
        }
    }
    '''
    print payload
    print "\n"
    return jobs.submitJob(serviceUrl, payload)
示例#14
0
def testJsonSubmitjob():
    print "Running testJsonSubmitjob "
    payload = '''
    {
        "jobInfo":
        {
            "jobName": "PIG-JOB-TEST", 
            "userName" : "amsharma", 
            "groupName" : "hadoop", 
            "userAgent" : "laptop",
            "jobType": "pig", 
            "configuration": "prod", 
            "schedule": "adHoc",
            "clusterName": "h2query",
            "cmdArgs":" -f pig2.q", 
            "fileDependencies":"''' + GENIE_TEST_PREFIX + '''/pig2.q"
        }
    }
    '''
    print payload
    print "\n"
    return jobs.submitJob(serviceUrl, payload)
示例#15
0
def testJsonSubmitjob():
    print "Running testJsonSubmitjob "
    # write out a temporary file with our query/dependencies
    query = tempfile.NamedTemporaryFile(delete=False)
    name = query.name
    queryFile = 'pig-' + str(time.time()) + '.q'
    query.write("fs -copyFromLocal %s %s; cmd = load '%s'; dump cmd;" % (queryFile, queryFile, queryFile))
    query.close()
    
    # read it back in as base64 encoded binary
    query = open(name, "rb")
    contents = base64.b64encode(query.read())
    print contents
    query.close()
    os.unlink(name)
    
    payload = '''
    {  
        "jobInfo":
        {
            "jobName": "PIG-JOB-TEST",
            "description": "This is a test", 
            "userName" : "genietest", 
            "groupName" : "hadoop", 
            "jobType": "pig", 
            "configuration": "prod",
            "schedule": "adHoc",
            "cmdArgs": "-f ''' + queryFile + '''",
            "attachments": {
                "data": "''' + contents + '''",
                "name": "''' + queryFile + '''"
            }
        }
    }
    '''
    print payload
    print "\n"
    return jobs.submitJob(serviceUrl, payload)
示例#16
0
def testJsonSubmitjob():
    print "Running testJsonSubmitjob "
    payload = '''
    {  
        "jobInfo":
        {
            "jobName": "HIVE-VERSION-TEST",
            "description": "This is a test", 
            "userName" : "genietest", 
            "groupName" : "hadoop", 
            "userAgent" : "laptop",
            "jobType": "hive", 
            "configuration": "prod",
            "schedule": "adHoc",
            "hiveVersion": "0.8.1.7",
            "cmdArgs": "-f hive.q",
            "fileDependencies":"''' + GENIE_TEST_PREFIX + '''/hive.q"
        }
    }
    '''
    print payload
    print "\n"
    return jobs.submitJob(serviceUrl, payload)
示例#17
0
def testJsonSubmitjob():
    print "Running testJsonSubmitjob "
    # write out a temporary file with our query/dependencies
    query = tempfile.NamedTemporaryFile(delete=False)
    name = query.name
    query.write("show tables;")
    query.close()

    # read it back in as base64 encoded binary
    query = open(name, "rb")
    contents = base64.b64encode(query.read())
    print contents
    query.close()
    os.unlink(name)

    payload = '''
    {  
        "jobInfo":
        {
            "jobName": "HIVE-JOB-TEST",
            "description": "This is a test", 
            "userName" : "genietest", 
            "groupName" : "hadoop", 
            "jobType": "hive", 
            "configuration": "prod",
            "schedule": "adHoc",
            "cmdArgs": "-f hive.q",
            "attachments": {
                "data": "''' + contents + '''",
                "name": "hive.q"
            }
        }
    }
    '''
    print payload
    print "\n"
    return jobs.submitJob(serviceUrl, payload)
def testJsonSubmitjob():
    print "Running testJsonSubmitjob "
    # write out a temporary file with our query/dependencies
    query = tempfile.NamedTemporaryFile(delete=False)
    name = query.name
    query.write("show tables;")
    query.close()
    
    # read it back in as base64 encoded binary
    query = open(name, "rb")
    contents = base64.b64encode(query.read())
    print contents
    query.close()
    os.unlink(name)
    
    payload = '''
    {  
        "jobInfo":
        {
            "jobName": "HIVE-JOB-TEST",
            "description": "This is a test", 
            "userName" : "genietest", 
            "groupName" : "hadoop", 
            "jobType": "hive", 
            "configuration": "prod",
            "schedule": "adHoc",
            "cmdArgs": "-f hive.q",
            "attachments": {
                "data": "''' + contents + '''",
                "name": "hive.q"
            }
        }
    }
    '''
    print payload
    print "\n"
    return jobs.submitJob(serviceUrl, payload)
def testJsonSubmitjob():
    print "Running testJsonSubmitjob "
    payload = '''
    {  
        "jobInfo":
        {
            "jobName": "HIVE-VERSION-TEST",
            "description": "This is a test", 
            "userName" : "genietest", 
            "groupName" : "hadoop", 
            "userAgent" : "laptop",
            "jobType": "hive", 
            "configuration": "prod",
            "schedule": "adHoc",
            "hiveVersion": "0.8.1.7",
            "cmdArgs": "-f hive.q",
            "disableLogArchival": "true",
            "fileDependencies":"''' + GENIE_TEST_PREFIX + '''/hive.q"
        }
    }
    '''
    print payload
    print "\n"
    return jobs.submitJob(serviceUrl, payload)
示例#20
0
def customizeGenomePipeline(args):
    """Run the genome customization pipeline."""
    logs, scriptDir = jobs.baseDirs(args.logs, args.prefix,
                                    os.path.realpath(__file__))

    intermediateOutDir = os.path.join(args.intermediate, args.prefix)
    pathlib.Path(intermediateOutDir).mkdir(parents=True, exist_ok=True)
    intermediateOut = os.path.join(intermediateOutDir, args.prefix)

    resultsOutDir = os.path.join(args.results, args.prefix)
    pathlib.Path(resultsOutDir).mkdir(parents=True, exist_ok=True)
    resultsOut = os.path.join(resultsOutDir, args.new)

    variantsOutDir = os.path.join(resultsOutDir, "variants.sbatch")
    pathlib.Path(variantsOutDir).mkdir(parents=True, exist_ok=True)
    variantsOut = os.path.join(variantsOutDir, args.prefix)

    statsOutDir = os.path.join(resultsOutDir, "stats.sbatch")
    pathlib.Path(statsOutDir).mkdir(parents=True, exist_ok=True)
    statsOut = os.path.join(statsOutDir, args.prefix)

    prevJob = 0
    if args.bwa:
        cmd = (f"sbatch " +
               f"--output={intermediateOut}_bwa.sam " +
               f"--error={logs}_bwa_align_err.log " +
               f"--job-name={args.prefix}_bwa_align" +
               os.path.join(scriptDir, "bwa_mem.sbatch") +
               f" {args.genome} {args.reads1} {args.reads2}")
        prevJob = jobs.submitJob(cmd)
    prevJob = jobs.genericJob(prevJob, args.sort_bam, "sort_bam",
                              logs, scriptDir, args.prefix,
                              intermediateOut)
    if prevJob and args.flagstat:
        cmd = (f"sbatch --dependency=afterany:{prevJob}" +
               f"--output={statsOut}_alignment_metrics.txt " +
               f"--error={logs}_flagstats_err.log " +
               f"--job-name={args.prefix}_flagstat" +
               os.path.join(scriptDir, "flagstat.sbatch") +
               intermediateOut)
        prevJob = jobs.submitJob(cmd)
    elif args.flagstat:
        cmd = (f"sbatch " +
               f"--output={statsOut}_alignment_metrics.txt " +
               f"--error={logs}_flagstats_err.log " +
               f"--job-name={args.prefix}_flagstat" +
               os.path.join(scriptDir, "flagstat.sbatch") +
               intermediateOut)
        prevJob = jobs.submitJob(cmd)
    prevJob = jobs.genericJob(prevJob, args.mark_duplicates,
                              "mark_duplicates.sbatch", logs, scriptDir, args.prefix,
                              intermediateOut)
    prevJob = jobs.genericJob(prevJob, args.base_recalibrate,
                              "base_recalibrator.sbatch", logs, scriptDir,
                              args.prefix, intermediateOut, args.genome,
                              args.vcf)
    prevJob = jobs.genericJob(prevJob, args.caller_haplotype,
                              "haplotype_caller.sbatch", logs, scriptDir,
                              args.prefix, variantsOut, args.genome)
    prevJob = jobs.genericJob(prevJob, args.select_snps, "select_snps.sbatch",
                              logs, scriptDir, args.prefix, variantsOut,
                              args.genome)
    prevJob = jobs.genericJob(prevJob, args.select_indels, "select_indels.sbatch",
                              logs, scriptDir, args.prefix, variantsOut,
                              args.genome)
    prevJob = jobs.genericJob(prevJob, args.filter_snps, "filter_snps.sbatch",
                              logs, scriptDir, args.prefix, variantsOut,
                              args.genome)
    prevJob = jobs.genericJob(prevJob, args.alternate_ref_make,
                              "make_alternate_ref.sbatch", logs, scriptDir,
                              args.prefix, args.genome, variantsOut,
                              resultsOut)

    subprocess.run("squeue -u maxh")