Пример #1
0
def update_db_state(sample, status_tag, message=None):
    db_update = "/hwfssz1/ST_HEALTH/WGS/F16ZQSB1SY2582/personalgenome/lib/genome_api_for_gaea.pl"
    message_param = ''
    if message:
        message_param = '-message {}'.format(message)
    cmd = 'ssh 192.168.60.11 /hwfssz1/ST_MCHRI/CLINIC/SOFTWARES/bin/perl {} -sample_no {} -status {} {}'.format(
        db_update, sample, status_tag, message_param)
    printtime('INFO: {}'.format(cmd))
    subprocess.call(cmd, shell=True)
Пример #2
0
def update_db_state(sample, status_tag, message=None):
    db_update = "/hwfssz1/ST_HEALTH/WGS/F16ZQSB1SY2582/personalgenome/lib/genome_api_for_gaea.pl"
    message_param = ''
    if message:
        message_param = '-message {}'.format(message)
    cmd = 'ssh 192.168.60.11 /hwfssz1/ST_MCHRI/CLINIC/SOFTWARES/bin/perl {} -sample_no {} -status {} {}'.format(
        db_update, sample, status_tag, message_param)
    printtime('INFO: {}'.format(cmd))
    subprocess.call(cmd, shell=True)
Пример #3
0
def main():
    program_name = os.path.basename(sys.argv[0])
    program_license = '''{0}
      Created by huangzhibo on {1}.
      Last updated on {2}.
      Copyright 2017 BGI bigData. All rights reserved.
    USAGE'''.format(" v".join([program_name, __version__]), str(__date__),
                    str(__updated__))

    parser = ArgumentParser(description=program_license,
                            formatter_class=RawDescriptionHelpFormatter)
    parser.add_argument("-s",
                        "--state",
                        dest="state",
                        help="state file,[default: %(default)s]",
                        required=True)
    parser.add_argument("-r",
                        "--rerun",
                        dest="rerun",
                        help="rerun file,[default: %(default)s]",
                        required=True)

    if len(sys.argv) == 1:
        parser.print_help()
        exit(1)

    # Process arguments
    args = parser.parse_args()
    if not os.path.exists(args.state):
        printtime('ERROR: (--state: %s) - No such file or directory' %
                  args.state)
        return 1
    if not os.path.exists(args.rerun):
        printtime('ERROR: (--state: %s) - No such file or directory' %
                  args.state)
        return 1

    state = ParseConfig(args.state).parseState()
    if 'bamSort' in state:
        state.bamSort.exclusive_task = 'False'
    if 'bamSort_M' in state:
        state.bamSort_M.exclusive_task = 'False'
    state.init.exclusive_task = 'False'
    if 'bamindex' in state:
        state.bamindex.exclusive_task = 'False'

    logger = Logger(os.path.join(state.scriptsDir, 'log'), '1',
                    'job_scheduler', False).getlog()
    state.logger = logger
    sched = Scheduler(state)
    sched.parse_rerun(args.rerun)
    sched.start()

    with open(os.path.join(state.stateDir, 'success'), 'w') as f:
        f.write('done!')

    return 0
Пример #4
0
def main():
    program_name = os.path.basename(sys.argv[0])
    program_license = '''{0}
      Created by huangzhibo on {1}.
      Last updated on {2}.
      Copyright 2017 BGI bigData. All rights reserved.
    USAGE'''.format(" v".join([program_name, __version__]), str(__date__), str(__updated__))

    parser = ArgumentParser(description=program_license, formatter_class=RawDescriptionHelpFormatter)
    parser.add_argument("-b", "--bedlist", help="sample_name,[default: %(default)s]", required=True)
    parser.add_argument("-o", "--output", help="output file path,[default: %(default)s]", required=True)
    parser.add_argument("-g", "--gvcf_out", help="gvcf output file path,[default: %(default)s]", )
    parser.add_argument("-p", "--part_vcf_dir", help="part_vcf_dir,[default: %(default)s]", required=True)

    if len(sys.argv) == 1:
        parser.print_help()
        exit(1)
    
    # Process arguments
    args = parser.parse_args()
    if not os.path.exists(args.bedlist):
        printtime('ERROR: (--bedlist: %s) - No such file or directory' % args.state)
        return 1

    if not os.path.exists(args.part_vcf_dir):
        printtime('ERROR: (--part_vcf_dir: %s) - No such file or directory' % args.state)
        return 1

    bed_prefix = []
    with open(args.bedlist, 'r') as beds:
        for bed in beds:
            bed_prefix.append(os.path.splitext(os.path.basename(bed))[0])
 
    i = 30
    while i:
        status = check_part_vcf(bed_prefix, args.part_vcf_dir) 
        if status:
            break
        time.sleep(5)
        i -= 1 
   
    if status:
        print "part_vcf_dir is good!" 
        merge_vcf(bed_prefix, args.part_vcf_dir, args.output)
        if args.gvcf_out:
             merge_vcf(bed_prefix, args.part_vcf_dir, args.gvcf_out, True)
    else:
        print "part_vcf_dir is bad!"

    return 0
Пример #5
0
def check_out_sdn(p,failFile,is_at_TH=False):
    JobId = ''
    for line in p.stdout.readlines():
        printtime(line[:-1])
        if is_at_TH:
            jobInfo = re.match(r'^Submitted batch job (\d+)$', line)
        else:
            jobInfo = re.match(r'^Your job (\d+) \("(.*?)"\) has been submitted$', line)
        if jobInfo:
            JobId = jobInfo.group(1)
        else:
            writefail("err happened when submit (qsub/sbatch). ", failFile) 
    for line in p.stderr.readlines():
        printtime('ERROR: %s' % line[:-1])
    return JobId
Пример #6
0
def merge_vcf(bed_prefix, part_vcf_dir, out, gvcf=False):
    part_vcf_list = os.path.join(part_vcf_dir, 'part_vcf.list')
    suffix = vcf_suffix
    if gvcf:
        suffix = gvcf_suffix
        part_vcf_list = os.path.join(part_vcf_dir, 'part_gvcf.list')
    with open(part_vcf_list, 'w') as wf:
        for p in bed_prefix:
            part_vcf = os.path.join(part_vcf_dir, p+suffix)
            wf.write(part_vcf)
            wf.write('\n')

    cmd = '/hwfssz1/BIGDATA_COMPUTING/software/bin/bcftools concat --threads 24 -O z -a -f {} -o {}'.format(part_vcf_list, out)
    printtime('INFO: {}'.format(cmd))
    subprocess.call(cmd, shell=True)
Пример #7
0
def merge_vcf(bed_prefix, part_vcf_dir, out, gvcf=False):
    part_vcf_list = os.path.join(part_vcf_dir, 'part_vcf.list')
    suffix = vcf_suffix
    if gvcf:
        suffix = gvcf_suffix
        part_vcf_list = os.path.join(part_vcf_dir, 'part_gvcf.list')
    with open(part_vcf_list, 'w') as wf:
        for p in bed_prefix:
            part_vcf = os.path.join(part_vcf_dir, p + suffix)
            wf.write(part_vcf)
            wf.write('\n')

    cmd = '/hwfssz1/BIGDATA_COMPUTING/software/bin/bcftools concat --threads 24 -O z -a -f {} -o {}'.format(
        part_vcf_list, out)
    printtime('INFO: {}'.format(cmd))
    subprocess.call(cmd, shell=True)
Пример #8
0
def main():
    program_name = os.path.basename(sys.argv[0])
    program_license = '''{0}
      Created by huangzhibo on {1}.
      Last updated on {2}.
      Copyright 2017 BGI bigData. All rights reserved.
    USAGE'''.format(" v".join([program_name, __version__]), str(__date__), str(__updated__))

    parser = ArgumentParser(description=program_license, formatter_class=RawDescriptionHelpFormatter)
    parser.add_argument("-s", "--state", dest="state", help="state file,[default: %(default)s]", required=True)
    parser.add_argument("-r", "--rerun", dest="rerun", help="rerun file,[default: %(default)s]", required=True)

    if len(sys.argv) == 1:
        parser.print_help()
        exit(1)

    # Process arguments
    args = parser.parse_args()
    if not os.path.exists(args.state):
        printtime('ERROR: (--state: %s) - No such file or directory' % args.state)
        return 1
    if not os.path.exists(args.rerun):
        printtime('ERROR: (--state: %s) - No such file or directory' % args.state)
        return 1

    state = ParseConfig(args.state).parseState()
    if 'bamSort' in state:
        state.bamSort.exclusive_task = 'False'
    if 'bamSort_M' in state:
        state.bamSort_M.exclusive_task = 'False'
    state.init.exclusive_task = 'False'
    if 'bamindex' in state:
        state.bamindex.exclusive_task = 'False'

    logger = Logger(os.path.join(state.scriptsDir,'log'),'1','job_scheduler',False).getlog()
    state.logger = logger
    sched = Scheduler(state)
    sched.parse_rerun(args.rerun)
    sched.start()

    with open(os.path.join(state.stateDir,'success'), 'w') as f:
       f.write('done!')

    return 0
Пример #9
0
def main():
    program_name = os.path.basename(sys.argv[0])
    program_license = '''{0}
      Created by huangzhibo on {1}.
      Last updated on {2}.
      Copyright 2017 BGI bigData. All rights reserved.
    USAGE'''.format(" v".join([program_name, __version__]), str(__date__), str(__updated__))

    parser = ArgumentParser(description=program_license, formatter_class=RawDescriptionHelpFormatter)
    parser.add_argument("-s", "--state", dest="state", help="state file,[default: %(default)s]", required=True)
    parser.add_argument("-n", "--sample_name", dest="sample_name", help="sample_name,[default: %(default)s]",
                        required=True)
    parser.add_argument("-t", "--step", dest="step", help="step,[default: %(default)s]")
    parser.add_argument("-d", "--db_state", action="store_true", help="update db state,[default: %(default)s]")

    if len(sys.argv) == 1:
        parser.print_help()
        exit(1)

    # Process arguments
    args = parser.parse_args()
    if not os.path.exists(args.state):
        printtime('ERROR: (--state: %s) - No such file or directory' % args.state)
        return 1

    state = ParseConfig(args.state).parseState()
    state_dir = os.path.join(state.stateDir, 'sample_state')
    if not os.path.exists(state_dir):
        os.mkdir(state_dir)

    status_tag = 'done'

    if args.step:
        steps = args.step.split(',')

        for step in steps:
            status = result_check(args.sample_name, state, step)
            if not status:
                printtime('ERROR: (step: %s) - No such file or directory' % step)
                if args.db_state:
                    update_local_state(args.sample_name, state_dir, 'error', 'No results for step: {}'.format(step))
                    update_db_state(args.sample_name, 'error', 'No results for step: {}'.format(step))
                else:
                    update_local_state(args.sample_name, state_dir, 'error', 'No results for step: {}'.format(step))
                return 1
            else:
                printtime('INFO:  (step: %s) - completed' % step)
    else:
        status_tag = 'running'
    print status_tag
    if args.db_state:
        update_local_state(args.sample_name, state_dir, status_tag)
        update_db_state(args.sample_name, status_tag)
    else:
        update_local_state(args.sample_name, state_dir, status_tag)
    return 0
Пример #10
0
def run(args, state):
    analysisDict = state.analysisDict
    sampleName = args.sampleName
    logger = Logger(os.path.join(state.scriptsDir, 'log'), '1',
                    'gaeaJobMonitor', False).getlog()
    isComplete = bundle()

    all_done = True

    jobList = args.jobs.split(',')

    if jobList[0] == 'init':
        if not state.results['init'].get('script'):
            jobList = jobList[1:]

    for num, step in enumerate(jobList):
        if analysisDict[step].platform == 'S':
            continue

        n = state.analysisList.index(step)
        if state.analysisList[0] != 'init':
            n += 1

        script = state.results[step]['script'][sampleName]
        if num > 0:
            for depStep in analysisDict[step].depend:
                if not isComplete[depStep]:
                    isComplete[step] = False
                    break
        if isComplete.has_key(step) and isComplete[step] == False:
            logger.warning('%s - step %d: %s failed' % (sampleName, n, step))
            continue

        printtime('step: %s start...' % step)
        p = subprocess.Popen('sh %s' % script,
                             shell=True,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE)
        isComplete[step] = check_log(p, script, sampleName, n, step)
        if isComplete[step] or step == 'alignment':
            if step == 'alignment':
                isComplete[step] = True
            printtime("step: %s complete" % step)
            logger.info('%s - step %d: %s complete' % (sampleName, n, step))
            out_fh = open(script + '.o', 'w')
            for line in p.stdout.readlines():
                print >> out_fh, line[:-1]
            p.wait()
        else:
            all_done = False
            printtime("%s failed" % step)
            logger.warning('%s - step %d: %s failed' % (sampleName, n, step))
            if p.returncode == None:
                p.kill()

    return all_done
Пример #11
0
def run(args,state):
    analysisDict = state.analysisDict
    sampleName = args.sampleName
    logger = Logger(os.path.join(state.scriptsDir,'log'),'1','gaeaJobMonitor',False).getlog()
    isComplete = bundle()
    
    all_done = True    

    jobList = args.jobs.split(',')
    
    if jobList[0] == 'init':
        if not state.results['init'].get('script'):
            jobList = jobList[1:]
    
    for num,step in enumerate(jobList):
        if analysisDict[step].platform == 'S':
            continue
        
        n = state.analysisList.index(step)
        if state.analysisList[0] != 'init':
            n += 1
        
        script = state.results[step]['script'][sampleName]
        if num > 0:
            for depStep in analysisDict[step].depend:
                if not isComplete[depStep]:
                    isComplete[step] = False
                    break
        if isComplete.has_key(step) and isComplete[step] == False:
            logger.warning('%s - step %d: %s failed' % (sampleName, n, step))
            continue
        
        printtime('step: %s start...' % step)
        p = subprocess.Popen('sh %s' % script, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        isComplete[step] = check_log(p,script,sampleName,n, step)
        if isComplete[step] or step == 'alignment':
            if step == 'alignment':
                isComplete[step] = True
            printtime("step: %s complete" % step)
            logger.info('%s - step %d: %s complete' % (sampleName, n, step))
            out_fh = open(script+'.o', 'w')
            for line in p.stdout.readlines():    
                print >>out_fh, line[:-1]
            p.wait()
        else:
            all_done = False    
            printtime("%s failed" % step)
            logger.warning('%s - step %d: %s failed' % (sampleName, n, step))
            if p.returncode == None:
                p.kill()
                
    return all_done
Пример #12
0
def main(argv=None): # IGNORE:C0111
    '''Command line options.'''

    if argv is None:
        argv = sys.argv
    else:
        sys.argv.extend(argv)

    program_name = os.path.basename(sys.argv[0])
    program_version = "v%s" % __version__
    program_build_date = str(__updated__)
    program_version_message = '%%(prog)s %s (%s)' % (program_version, program_build_date)
    program_shortdesc = __import__('__main__').__doc__.split("\n")[1]
    program_license = '''%s

  Created by huangzhibo on %s.
  Copyright 2016 BGI_bigData. All rights reserved.

USAGE
''' % (program_shortdesc, str(__date__))

    try:
        # Setup argument parser
        parser = ArgumentParser(description=program_license, formatter_class=RawDescriptionHelpFormatter)
        parser.add_argument("-s", "--state", dest="state", help="state file,[default: %(default)s]",required=True)
        parser.add_argument("-r", "--rerun", dest="rerun", default='all',help="rerun file,[default: %(default)s]")
#         parser.add_argument("-s", "--submit", action="store_true", default=False, help="submit to SGE. if False , just generator gaea.sh. [default: %(default)s]")
        parser.add_argument("-t", "--type", dest="type", choices=['write','local','submit'], type=str, default="write", help="1.write: just write run scripts; 2.local: run tasks on one local node; 3:submit: submit tasks to SGE [default: %(default)s]")
        parser.add_argument("-q", "--queue", dest="queue", help="the queue of the job. [default: %(default)s]")
        parser.add_argument("-p", "--partition", dest="partition",  help="the job partition. [default: %(default)s]")
        parser.add_argument('-V', '--version', action='version', version=program_version_message)

        # Process arguments
        args = parser.parse_args()
        if not os.path.exists(args.state):
            printtime('ERROR: (--state: %s) - No such file or directory' % args.state)
            return 2
        state = ParseConfig(args.state).parseState()
#         if state.init.has_key("GAEA_HOME"):
#             os.environ["GAEA_HOME"] = state.GAEA_HOME
#             os.environ["PATH"] = os.environ["GAEA_HOME"] + ':' + os.environ["PATH"]

        state.failFile = os.path.join(state.stateDir,"failed")
        state.successFile = os.path.join(state.stateDir,"success")
        deleteFile(state.failFile)
        deleteFile(state.successFile)
        
        if os.path.exists(state.logfile):
            if not os.path.exists('%s.backup' % state.logfile):
                os.rename(state.logfile,'%s.backup' % state.logfile)
            else:
                subprocess.call("cat %s >>%s.backup" % (state.logfile,state.logfile),shell=True)
                deleteFile(state.logfile)
                
        if args.rerun == 'all':
            rerunInfo = parseRerun(state,False)
        elif args.rerun:
            rerunInfo = parseRerun(args.rerun)
            if not rerunInfo:
                rerunInfo = parseRerun(state,False)
                
        if args.type == 'submit' and state.hadoop.is_at_TH:
            args.type = 'local'
         
        state.hasSDNstep = False 
        for l in rerunInfo:
            for s in l[1].split(','):
                if state.analysisDict[s].platform == 'S':
                    state.hasSDNstep = True
                    
        if state.hasSDNstep:
            if args.type == 'local' and not state.hadoop.is_at_TH:
                writefail("Has standalone step, please submit tasks to SGE. (-t submit)",state.failFile) 
                
            if not state.hadoop.is_at_TH:
                if not args.queue: # or not args.partition:
                    writefail("Has standalone step, please set parameters: -q -P ",state.failFile) 
                    
        lastJobId = multi_run(args,state,rerunInfo[0])
            
            
        json.dump(state.results, open('%s/results.json' % state.stateDir, 'w'),indent=4)
        
        if args.type == 'submit':
            script = os.path.join(state.gaeaScriptsDir,"check_complete.sh")
            writeCheckShell(script,state,state.failFile,state.successFile)
            sh_err = '%s.e' % script
            sh_out = '%s.o' % script
            
            hold_jid = ''
            printtime("check end (%s) " %lastJobId)
            if re.match("\d+", lastJobId):
                hold_jid = lastJobId
            
            cmd = []
            if hold_jid:
                if state.hasSDNstep:
                    if args.partition:
                        cmd = ['qsub','-cwd','-l','vf=0.5g', '-hold_jid',hold_jid,'-q',args.queue,'-P',args.partition,'-e',sh_err,'-o',sh_out, script]
                    else:
                        cmd = ['qsub','-cwd','-l','vf=0.5g', '-hold_jid',hold_jid,'-q',args.queue,'-e',sh_err,'-o',sh_out, script]
                else:
                    cmd = ['qsub','-cwd','-l','vf=0.5g', '-hold_jid',hold_jid,'-q','gaea.q','-P','hadoop','-e',sh_err,'-o',sh_out, script]
            else:
                if state.hasSDNstep:
                    if args.partition:
                        cmd = ['qsub','-cwd','-l','vf=0.5g', '-q',args.queue,'-P',args.partition,'-e',sh_err,'-o',sh_out, script]
                    else:
                        cmd = ['qsub','-cwd','-l','vf=0.5g', '-q',args.queue,'-e',sh_err,'-o',sh_out, script]
                else:
                    cmd = ['qsub','-cwd','-l','vf=0.5g', '-q','gaea.q','-P','hadoop','-e',sh_err,'-o',sh_out, script]
            
            p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            for line in p.stdout.readlines():
                printtime(line[:-1])
                jobInfo = re.match(r'^Your job (\d+) \("(.*?)"\) has been submitted$', line)
                if not jobInfo:
                    writefail("err happened when qsub. ", state.failFile) 
            for line in p.stderr.readlines():
                printtime('ERROR: (check_complete.sh) %s' % line[:-1])
            p.wait()
        elif args.type == 'local':       
            failed = False
            logFile = open(state.logfile,'r')
            if os.path.exists(state.logfile):
                logFile = open(state.logfile,'r')
                for line in logFile:
                    m = re.match('.*fail.*', line)
                    if m:
                        writefail("%s "% line, state.failFile) 
                        failed = True
            else:
                writefail("no start", state.failFile) 
                failed = True
            if not failed:
                stat = open(os.path.join(state.stateDir,'success'), 'w')
                print >> stat, 'success'
            stat.close()
            
        return 0
    except KeyboardInterrupt:
        ### handle keyboard interrupt ###
        return 0
    except Exception, e:
        with open(os.path.join(state.stateDir,'failed'), 'w') as f:
            f.write("Error in submit jobs!")
        
        indent = len(program_name) * " "
        sys.stderr.write(program_name + ": " + repr(e) + "\n")
        sys.stderr.write(indent + "  for help use --help")
        return 2
Пример #13
0
def multi_run(args,state,rerunInfo):
    failFile = state.failFile
    analysisDict = state.analysisDict
    
    lastJobId = []
    
    sampleName = rerunInfo[0]
    jobList = getJobList(analysisDict,rerunInfo[1].split(','))
    
    for n,job in enumerate(jobList):
        if len(jobList) > 1: 
            shellName = 'gaea_%d' % n
        else:
            shellName = 'gaea'
            
        gaeaShell = os.path.join(state.gaeaScriptsDir,sampleName,'%s.sh'%shellName)
        writeGaeaShell(gaeaShell,state,job,sampleName)
        
        if args.type == 'local':
            p = subprocess.Popen('sh %s' % gaeaShell, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            check_out(p, gaeaShell)
            p.wait()
            stat = p.returncode
            if stat != 0:
                writefail('(gaea.sh) step failed with status %d' % stat, failFile) 
            
            for step in job.split(','):
                if analysisDict[step].platform == 'H':
                    continue
                
                lastJobId = []
                scriptsDict = state.results[step]['script']
                jobIdDict = bundle()
                
                depStep = []
                for deps in analysisDict[step].depend:
                    if analysisDict[deps].platform == 'S':
                        depStep.append(deps)
                        
                for sample_name in scriptsDict:
                    script = scriptsDict[sample_name]
                    sh_err = '%s.e' % script
                    sh_out = '%s.o' % script
                    
                    cmd = []
                    if state.hadoop.is_at_TH:
                        hold_jid = 'afterok'
                        for ds in depStep:
                            if state.results[ds].multiscript:
                                hold_jid = hold_jid + ':%s' % state.results[ds]['jobId'][state.option.multiSampleName]
                            elif state.results[step].multiscript:
                                for sample in state.results[ds].jobId:
                                    hold_jid =  hold_jid + ':%s' % state.results[ds].jobId[sample]
                            else:
                                hold_jid = hold_jid + ':%s' % state.results[ds]['jobId'][sample_name]
                        
                        if hold_jid == 'afterok':
                            cmd = ['sbatch','-p',args.partition,'-e',sh_err,'-o',sh_out, script]
                        else:
                            cmd = ['sbatch','-p',args.partition,'-d',hold_jid,'-e',sh_err,'-o',sh_out, script]
                        p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
                        jobIdDict[sample_name] = check_out_sdn(p, failFile,True)
                    else:
                        hold_jid = ''
                        for ds in depStep:
                            if state.results[ds].multiscript:
                                hold_jid = hold_jid + '%s,' % state.results[ds]['jobId'][state.option.multiSampleName]
                            elif state.results[step].multiscript:
                                for sample in state.results[ds].jobId:
                                    hold_jid =  hold_jid + '%s,' % state.results[ds].jobId[sample]
                            else:
                                hold_jid = hold_jid + '%s,' % state.results[ds]['jobId'][sample_name]
                                
                        if state[step].get('mem'):
                            vf = 'vf=%s' % state[step].mem
                        if hold_jid:    
                            if args.partition:
                                cmd = ['qsub','-cwd','-l',vf,'-hold_jid',hold_jid,'-q',args.queue,'-P',args.partition,'-e',sh_err,'-o',sh_out, script]
                            else:
                                cmd = ['qsub','-cwd','-l',vf,'-hold_jid',hold_jid,'-q',args.queue,'-e',sh_err,'-o',sh_out, script]
                        else:
                            if args.partition:
                                cmd = ['qsub','-cwd','-l',vf,'-q',args.queue,'-P',args.partition,'-e',sh_err,'-o',sh_out, script]
                            else:
                                cmd = ['qsub','-cwd','-l',vf,'-q',args.queue,'-e',sh_err,'-o',sh_out, script]
                        p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
                        jobIdDict[sample_name] = check_out_sdn(p, failFile)
                    lastJobId.append(jobIdDict[sample_name])
                state.results[step]['jobId'] = jobIdDict
                
                        
                
        elif args.type == 'submit':
            sh_err = '%s.e' % gaeaShell
            sh_out = '%s.o' % gaeaShell
            gaea_queue = 'gaea.q'
            gaea_partition = 'hadoop'
            if state.hadoop.cluster != 'cluster35':
                gaea_queue = args.queue
                gaea_partition = args.partition
            if gaea_partition:
                cmd = ['qsub','-cwd','-l','vf=2g','-q',gaea_queue,'-P',gaea_partition,'-e',sh_err,'-o',sh_out, gaeaShell]
            else:
                cmd = ['qsub','-cwd','-l','vf=2g','-q',gaea_queue,'-e',sh_err,'-o',sh_out, gaeaShell]
            hold_jid = ''
            if n > 0:
                for ds in analysisDict[job.split(',')[0]].depend:
                    if analysisDict[ds].platform == 'S':
                        for sampleName in state.results[ds].jobId:
                            hold_jid =  hold_jid + '%s,' % state.results[ds].jobId[sampleName]
                if hold_jid:
                    if gaea_partition:
                        cmd = ['qsub','-cwd','-l','vf=2g','-hold_jid',hold_jid,'-q',gaea_queue,'-P',gaea_partition,'-e',sh_err,'-o',sh_out, gaeaShell]
                    else:
                        cmd = ['qsub','-cwd','-l','vf=2g','-hold_jid',hold_jid,'-q',gaea_queue,'-e',sh_err,'-o',sh_out, gaeaShell]
            p = subprocess.Popen(cmd,stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            for line in p.stderr.readlines():
                printtime('ERROR: (%s) %s' % (shellName,line[:-1]))
            for line in p.stdout.readlines():
                printtime(line[:-1])
                jobInfo = re.match(r'^Your job (\d+) \("(.*?)"\) has been submitted$', line)
                if jobInfo:
                    state.results.gaeaJobId = jobInfo.group(1)
                    lastJobId.append(jobInfo.group(1))
                else:
                    writefail("err happened when qsub. (%s) " % shellName, failFile)
                    exit(1) 
            p.wait()
            
            for step in job.split(','):
                if analysisDict[step].platform == 'H':
                    continue
                jobIdDict = bundle()
                scriptsDict = state.results[step]['script']
                
                depStep = []
                depHDP = False
                for deps in analysisDict[step].depend:
                    if analysisDict[deps].platform == 'S':
                        depStep.append(deps)
                    else:
                        depHDP = True
                        
                lastJobId = []
                for sample_name in scriptsDict:
                    script = scriptsDict[sample_name]
                    sh_err = '%s.e' % script
                    sh_out = '%s.o' % script
                    
                    
                    hold_jid = ''
                    for ds in depStep:
                        if state.results[ds].multiscript:
                            hold_jid = hold_jid + '%s,' % state.results[ds]['jobId'][state.option.multiSampleName]
                        elif state.results[step].multiscript:
                            for sample in state.results[ds].jobId:
                                hold_jid =  hold_jid + '%s,' % state.results[ds].jobId[sample]
                        else:
                            hold_jid = hold_jid + '%s,' % state.results[ds]['jobId'][sample_name]
                    if depHDP:
                        hold_jid = hold_jid + '%s,' % state.results.gaeaJobId
                    
                    vf = 'vf=5g'
                    if state[step].get('mem'):
                        vf = 'vf=%s' % state[step].mem
                    else:
                        printtime("Standalone Step %s: No set mem info for SGE. Default:'5G'. " % step)
                        
                    if hold_jid:    
                        if args.partition:
                            cmd = ['qsub','-cwd','-l',vf,'-hold_jid',hold_jid,'-q',args.queue,'-P',args.partition,'-e',sh_err,'-o',sh_out, script]
                        else:
                            cmd = ['qsub','-cwd','-l',vf,'-hold_jid',hold_jid,'-q',args.queue,'-e',sh_err,'-o',sh_out, script]
                    else:
                        if args.partition:
                            cmd = ['qsub','-cwd','-l',vf,'-q',args.queue,'-P',args.partition,'-e',sh_err,'-o',sh_out, script]
                        else:
                            cmd = ['qsub','-cwd','-l',vf,'-q',args.queue,'-e',sh_err,'-o',sh_out, script]
                    p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
                    jobIdDict[sample_name] = check_out_sdn(p, failFile)
                    lastJobId.append(jobIdDict[sample_name])
                    
                state.results[step]['jobId'] = jobIdDict    
                
    allLastJobId = ','.join(lastJobId)
    return allLastJobId         
Пример #14
0
            if state.analysisList[0] == 'init':
                state.analysisList = state.analysisList[1:]
            jobs = ','.join(state.analysisList)
            rerun_fh.write('%s\t%s\n' % (state.option.multiSampleName, jobs))

    json.dump(state, open('%s/state.json' % usercfg.stateDir, 'w'), indent=4)
    sys.stdout.flush()
    sys.stderr.flush()

    #write run.sh
    runShell = os.path.join(usercfg.scriptsDir, 'run.sh')
    writeRunShell(runShell, state)
    os.chmod(
        runShell, stat.S_IRWXU + stat.S_IRGRP + stat.S_IXGRP + stat.S_IROTH +
        stat.S_IXOTH)
    printtime("\nPlease run scripts/run.sh to submit tasks.")
    subprocess.call("sh %s write" % runShell, shell=True)


def main(argv=None):  # IGNORE:C0111
    '''Command line options.'''

    if argv is None:
        argv = sys.argv
    else:
        sys.argv.extend(argv)

    program_name = os.path.basename(sys.argv[0])
    program_version = "v%s" % __version__
    program_build_date = str(__updated__)
    program_version_message = '%%(prog)s %s (%s)' % (program_version,
Пример #15
0
def result_check(data, size_threshold=4096):
    if not os.path.exists(data) or os.path.getsize(data) < size_threshold:
        printtime('ERROR: (data: %s) - Is incomplete!!!' % data)
        return False
    return True
Пример #16
0
def main():
    program_name = os.path.basename(sys.argv[0])
    program_license = '''{0}
      Created by huangzhibo on {1}.
      Last updated on {2}.
      Copyright 2017 BGI bigData. All rights reserved.
    USAGE'''.format(" v".join([program_name, __version__]), str(__date__),
                    str(__updated__))

    parser = ArgumentParser(description=program_license,
                            formatter_class=RawDescriptionHelpFormatter)
    parser.add_argument("-s",
                        "--state",
                        dest="state",
                        help="state file,[default: %(default)s]",
                        required=True)
    parser.add_argument("-n",
                        "--sample_name",
                        dest="sample_name",
                        help="sample_name,[default: %(default)s]",
                        required=True)
    parser.add_argument("-t",
                        "--step",
                        dest="step",
                        help="step,[default: %(default)s]")
    parser.add_argument("-d",
                        "--db_state",
                        action="store_true",
                        help="update db state,[default: %(default)s]")

    if len(sys.argv) == 1:
        parser.print_help()
        exit(1)

    # Process arguments
    args = parser.parse_args()
    if not os.path.exists(args.state):
        printtime('ERROR: (--state: %s) - No such file or directory' %
                  args.state)
        return 1

    state = ParseConfig(args.state).parseState()
    state_dir = os.path.join(state.stateDir, 'sample_state')
    if not os.path.exists(state_dir):
        os.mkdir(state_dir)

    status_tag = 'done'

    if args.step:
        steps = args.step.split(',')

        for step in steps:
            status = result_check(args.sample_name, state, step)
            if not status:
                printtime('ERROR: (step: %s) - No such file or directory' %
                          step)
                if args.db_state:
                    update_local_state(args.sample_name, state_dir, 'error',
                                       'No results for step: {}'.format(step))
                    update_db_state(args.sample_name, 'error',
                                    'No results for step: {}'.format(step))
                else:
                    update_local_state(args.sample_name, state_dir, 'error',
                                       'No results for step: {}'.format(step))
                return 1
            else:
                printtime('INFO:  (step: %s) - completed' % step)
    else:
        status_tag = 'running'
    print status_tag
    if args.db_state:
        update_local_state(args.sample_name, state_dir, status_tag)
        update_db_state(args.sample_name, status_tag)
    else:
        update_local_state(args.sample_name, state_dir, status_tag)
    return 0
Пример #17
0
def main():
    program_name = os.path.basename(sys.argv[0])
    program_license = '''{0}
      Created by huangzhibo on {1}.
      Last updated on {2}.
      Copyright 2017 BGI bigData. All rights reserved.
    USAGE'''.format(" v".join([program_name, __version__]), str(__date__), str(__updated__))

    parser = ArgumentParser(description=program_license, formatter_class=RawDescriptionHelpFormatter)
    parser.add_argument("-b", "--bedlist", help="the bed.list to run HC streaming [required]", required=True)
    parser.add_argument("-o", "--outdir",
                        help="outdir of part_vcf's symbolic link [%(default)s]", )
    parser.add_argument("-s", "--suffix", help="part_vcf file suffix [%(default)s]", default='.hc.vcf.gz', )
    parser.add_argument("-p", "--part_vcf_dir", help="the tmpfir of part_vcf [required]", required=True)
    parser.add_argument("-i", "--index_check", action="store_true", default=False,
                        help="check vcf index [%(default)s]", )

    if len(sys.argv) == 1:
        parser.print_help()
        exit(1)


    # Process arguments
    args = parser.parse_args()
    if not os.path.exists(args.bedlist):
        printtime('ERROR: (--bedlist: %s) - No such file or directory' % args.state)
        return 1

    if not os.path.exists(args.part_vcf_dir):
        printtime('ERROR: (--part_vcf_dir: %s) - No such file or directory' % args.state)
        return 1

    vcf_basebane = []
    with open(args.bedlist, 'r') as beds:
        for bed in beds:
            vcf_basebane.append('{}{}'.format(os.path.splitext(os.path.basename(bed))[0],args.suffix))

    status = False
    i = 30
    while i:
        status = check_part_vcf(vcf_basebane, args.part_vcf_dir)
        if status:
            if args.index_check:
                status = check_part_vcf_index(vcf_basebane, args.part_vcf_dir, '.tbi')
            if not status:
                status = check_part_vcf_index(vcf_basebane, args.part_vcf_dir, '.idx')
            break
        time.sleep(5)
        i -= 1

    link_vcf_dir = ''
    if args.outdir:
        link_vcf_dir = os.path.abspath(args.outdir)
        if os.path.exists(link_vcf_dir):
            printtime('ERROR: (--outdir: %s) - This directory is already exists! Please remove it and check again!' % link_vcf_dir)
            return -1
        os.makedirs(link_vcf_dir)

    if status:
        print("part_vcf_dir is good!")
        if link_vcf_dir:
            link_vcf(vcf_basebane, args.part_vcf_dir, link_vcf_dir)
    else:
        print("part_vcf_dir is bad!")
        return -1

    return 0
Пример #18
0
        else:
            if state.analysisList[0] == 'init' and state.analysisList[1] == 'filter':
                state.analysisList = state.analysisList[1:]
            jobs = ','.join(state.analysisList)
            rerun_fh.write('%s\t%s\n' % (state.option.multiSampleName,jobs))
    
    json.dump(state, open('%s/state.json' % usercfg.stateDir, 'w'),indent=4)
    sys.stdout.flush()
    sys.stderr.flush()
    
#     json.dump(state, open('%s/state.json' % usercfg.stateDir, 'w'),indent=4)
    #write run.sh
    runShell = os.path.join(usercfg.scriptsDir,'run.sh')
    writeRunShell(runShell,state)
    os.chmod(runShell, stat.S_IRWXU+stat.S_IRGRP+stat.S_IXGRP+stat.S_IROTH+stat.S_IXOTH)
    printtime("\nPlease run scripts/run.sh to submit tasks.")
    subprocess.call("sh %s write" % runShell,shell=True)
    
    runShellTest = os.path.join(usercfg.scriptsDir,'test.sh')
    with open(runShellTest, 'w') as f:
        state_file = os.path.join(state.stateDir,'state.json')
        rerun_list_file = os.path.join(state.stateDir,'rerun.list')
        f.write("#!/bin/sh\n")
        f.write("source {}/bin/activate\n".format(state.GAEA_HOME))
        f.write("job_scheduler.py -s {} -r {}\n".format(state_file, rerun_list_file))
    os.chmod(runShell, stat.S_IRWXU+stat.S_IRGRP+stat.S_IXGRP+stat.S_IROTH+stat.S_IXOTH)

def main(argv=None): # IGNORE:C0111
    '''Command line options.'''
    
    if argv is None:
Пример #19
0
def main():
    program_name = os.path.basename(sys.argv[0])
    program_license = '''{0}
      Created by huangzhibo on {1}.
      Last updated on {2}.
      Copyright 2017 BGI bigData. All rights reserved.
    USAGE'''.format(" v".join([program_name, __version__]), str(__date__),
                    str(__updated__))

    parser = ArgumentParser(description=program_license,
                            formatter_class=RawDescriptionHelpFormatter)
    parser.add_argument("-b",
                        "--bedlist",
                        help="sample_name,[default: %(default)s]",
                        required=True)
    parser.add_argument("-o",
                        "--output",
                        help="output file path,[default: %(default)s]",
                        required=True)
    parser.add_argument(
        "-g",
        "--gvcf_out",
        help="gvcf output file path,[default: %(default)s]",
    )
    parser.add_argument("-p",
                        "--part_vcf_dir",
                        help="part_vcf_dir,[default: %(default)s]",
                        required=True)

    if len(sys.argv) == 1:
        parser.print_help()
        exit(1)

    # Process arguments
    args = parser.parse_args()
    if not os.path.exists(args.bedlist):
        printtime('ERROR: (--bedlist: %s) - No such file or directory' %
                  args.state)
        return 1

    if not os.path.exists(args.part_vcf_dir):
        printtime('ERROR: (--part_vcf_dir: %s) - No such file or directory' %
                  args.state)
        return 1

    bed_prefix = []
    with open(args.bedlist, 'r') as beds:
        for bed in beds:
            bed_prefix.append(os.path.splitext(os.path.basename(bed))[0])

    i = 30
    while i:
        status = check_part_vcf(bed_prefix, args.part_vcf_dir)
        if status:
            break
        time.sleep(5)
        i -= 1

    if status:
        print "part_vcf_dir is good!"
        merge_vcf(bed_prefix, args.part_vcf_dir, args.output)
        if args.gvcf_out:
            merge_vcf(bed_prefix, args.part_vcf_dir, args.gvcf_out, True)
    else:
        print "part_vcf_dir is bad!"

    return 0