Exemplo n.º 1
0
def index(request):#{{{
    path_tmp = "%s/static/tmp"%(SITE_ROOT)
    path_md5 = "%s/static/md5"%(SITE_ROOT)
    if not os.path.exists(path_result):
        os.mkdir(path_result, 0o755)
    if not os.path.exists(path_result):
        os.mkdir(path_tmp, 0o755)
    if not os.path.exists(path_md5):
        os.mkdir(path_md5, 0o755)
    base_www_url_file = "%s/static/log/base_www_url.txt"%(SITE_ROOT)
    if not os.path.exists(base_www_url_file):
        base_www_url = "http://" + request.META['HTTP_HOST']
        myfunc.WriteFile(base_www_url, base_www_url_file, "w", True)

    # read the local config file if exists
    configfile = "%s/config/config.json"%(SITE_ROOT)
    config = {}
    if os.path.exists(configfile):
        text = myfunc.ReadFile(configfile)
        config = json.loads(text)

    if rootname_progname in config:
        g_params.update(config[rootname_progname])
        g_params['MAXSIZE_UPLOAD_FILE_IN_BYTE'] = g_params['MAXSIZE_UPLOAD_FILE_IN_MB'] * 1024*1024

    return submit_seq(request)
def download(request):  #{{{
    info = {}
    webcom.set_basic_config(request, info, g_params)

    for key in ["db_prodres", "db_subcons"]:
        zipfile = os.path.join(path_static, "download", f"{key}.zip")
        md5file = f"{zipfile}.md5"
        if os.path.exists(zipfile):
            filesize = os.path.getsize(os.path.realpath(zipfile))
            filesize_humanreadable = myfunc.Size_byte2human(filesize)
            info[f"size_{key}"] = filesize_humanreadable
            info[f"zipfile_{key}"] = os.path.basename(zipfile)
        else:
            info[f"zipfile_{key}"] = ""
            info[f"size_{key}"] = ""
        if os.path.exists(md5file):
            try:
                md5_key = myfunc.ReadFile(md5file).strip().split()[0]
                info[f"md5_key_{key}"] = md5_key
            except (OSError, IndexError):
                info[f"md5_key_{key}"] = ""
        else:
            info[f"md5_key_{key}"] = ""

    info['jobcounter'] = webcom.GetJobCounter(info)
    return render(request, 'pred/download.html', info)
Exemplo n.º 3
0
 def checkjob(ctx, jobid=""):#{{{
     rstdir = "%s/%s"%(path_result, jobid)
     soap_req = ctx.transport.req
     hostname = soap_req.META['HTTP_HOST']
     result_url = "http://" + hostname + "/static/" + "result/%s/%s.zip"%(jobid, jobid)
     status = "None"
     url = ""
     errinfo = ""
     if not os.path.exists(rstdir):
         status = "None"
         errinfo = "Error! jobid %s does not exist."%(jobid)
     else:
         starttagfile = "%s/%s"%(rstdir, "runjob.start")
         finishtagfile = "%s/%s"%(rstdir, "runjob.finish")
         failtagfile = "%s/%s"%(rstdir, "runjob.failed")
         errfile = "%s/%s"%(rstdir, "runjob.err")
         if os.path.exists(failtagfile):
             status = "Failed"
             errinfo = ""
             if os.path.exists(errfile):
                 errinfo = myfunc.ReadFile(errfile)
         elif os.path.exists(finishtagfile):
             status = "Finished"
             url = result_url
             errinfo = ""
         elif os.path.exists(starttagfile):
             status = "Running"
         else:
             status = "Wait"
     for s in [status, url, errinfo]:
         yield s
Exemplo n.º 4
0
def get_results_eachseq(request, jobid="1", seqindex="1"):  #{{{
    resultdict = {}
    webcom.set_basic_config(request, resultdict, g_params)

    rstdir = "%s/%s" % (path_result, jobid)
    outpathname = jobid

    jobinfofile = "%s/jobinfo" % (rstdir)
    jobinfo = myfunc.ReadFile(jobinfofile).strip()
    jobinfolist = jobinfo.split("\t")
    if len(jobinfolist) >= 8:
        submit_date_str = jobinfolist[0]
        numseq = int(jobinfolist[3])
        jobname = jobinfolist[5]
        email = jobinfolist[6]
        method_submission = jobinfolist[7]
    else:
        submit_date_str = ""
        numseq = 1
        jobname = ""
        email = ""
        method_submission = "web"

    status = ""

    resultdict['jobid'] = jobid
    resultdict['subdirname'] = seqindex
    resultdict['jobname'] = jobname
    resultdict['outpathname'] = os.path.basename(outpathname)
    resultdict['BASEURL'] = g_params['BASEURL']
    resultdict['status'] = status
    resultdict['numseq'] = numseq
    base_www_url = "http://" + request.META['HTTP_HOST']

    resultfile = "%s/%s/%s/%s" % (rstdir, outpathname, seqindex,
                                  "query.result.txt")
    htmlfigure_file = "%s/%s/%s/plot/%s" % (rstdir, outpathname, seqindex,
                                            "query_0.html")
    if os.path.exists(htmlfigure_file):
        resultdict['htmlfigure'] = "%s/%s/%s/%s/plot/%s" % (
            "result", jobid, jobid, seqindex,
            os.path.basename(htmlfigure_file))
    else:
        resultdict['htmlfigure'] = ""

    if os.path.exists(rstdir):
        resultdict['isResultFolderExist'] = True
    else:
        resultdict['isResultFolderExist'] = False

    if os.path.exists(resultfile):
        resultdict['resultfile'] = os.path.basename(resultfile)
    else:
        resultdict['resultfile'] = ""

    resultdict['jobcounter'] = webcom.GetJobCounter(resultdict)
    return render(request, 'pred/get_results_eachseq.html', resultdict)
Exemplo n.º 5
0
def get_results(request, jobid="1"):#{{{
    resultdict = {}
    webcom.set_basic_config(request, resultdict, g_params)

    rstdir = "%s/%s"%(path_result, jobid)
    outpathname = jobid
    resultfile = "%s/%s/%s/%s"%(rstdir, jobid, outpathname, "query.result.txt")
    tarball = "%s/%s.tar.gz"%(rstdir, outpathname)
    zipfile = "%s/%s.zip"%(rstdir, outpathname)
    starttagfile = "%s/%s"%(rstdir, "runjob.start")
    finishtagfile = "%s/%s"%(rstdir, "runjob.finish")
    failtagfile = "%s/%s"%(rstdir, "runjob.failed")
    errfile = "%s/%s"%(rstdir, "runjob.err")
    query_seqfile = "%s/%s"%(rstdir, "query.fa")
    raw_query_seqfile = "%s/%s"%(rstdir, "query.raw.fa")
    seqid_index_mapfile = "%s/%s/%s"%(rstdir,jobid, "seqid_index_map.txt")
    finished_seq_file = "%s/%s/finished_seqs.txt"%(rstdir, jobid)
    statfile = "%s/%s/stat.txt"%(rstdir, jobid)
    method_submission = "web"

    jobinfofile = "%s/jobinfo"%(rstdir)
    jobinfo = myfunc.ReadFile(jobinfofile).strip()
    jobinfolist = jobinfo.split("\t")
    if len(jobinfolist) >= 8:
        submit_date_str = jobinfolist[0]
        numseq = int(jobinfolist[3])
        jobname = jobinfolist[5]
        email = jobinfolist[6]
        method_submission = jobinfolist[7]
    else:
        submit_date_str = ""
        numseq = 1
        jobname = ""
        email = ""
        method_submission = "web"

    isValidSubmitDate = True
    try:
        submit_date = webcom.datetime_str_to_time(submit_date_str)
    except ValueError:
        isValidSubmitDate = False
    current_time = datetime.now(timezone(TZ))

    resultdict['isResultFolderExist'] = True
    resultdict['errinfo'] = ""
    if os.path.exists(errfile):
        resultdict['errinfo'] = myfunc.ReadFile(errfile)

    status = ""
    queuetime = ""
    runtime = ""
    if not os.path.exists(rstdir):
        resultdict['isResultFolderExist'] = False
        resultdict['isFinished'] = False
        resultdict['isFailed'] = True
        resultdict['isStarted'] = False
    elif os.path.exists(failtagfile):
        resultdict['isFinished'] = False
        resultdict['isFailed'] = True
        resultdict['isStarted'] = True
        status = "Failed"
        start_date_str = ""
        if os.path.exists(starttagfile):
            start_date_str = myfunc.ReadFile(starttagfile).strip()
        isValidStartDate = True
        isValidFailedDate = True
        try:
            start_date = webcom.datetime_str_to_time(start_date_str)
        except ValueError:
            isValidStartDate = False
        failed_date_str = myfunc.ReadFile(failtagfile).strip()
        try:
            failed_date = webcom.datetime_str_to_time(failed_date_str)
        except ValueError:
            isValidFailedDate = False
        if isValidSubmitDate and isValidStartDate:
            queuetime = myfunc.date_diff(submit_date, start_date)
        if isValidStartDate and isValidFailedDate:
            runtime = myfunc.date_diff(start_date, failed_date)
    else:
        resultdict['isFailed'] = False
        if os.path.exists(finishtagfile):
            resultdict['isFinished'] = True
            resultdict['isStarted'] = True
            status = "Finished"
            isValidStartDate = True
            isValidFinishDate = True
            if os.path.exists(starttagfile):
                start_date_str = myfunc.ReadFile(starttagfile).strip()
            else:
                start_date_str = ""
            try:
                start_date = webcom.datetime_str_to_time(start_date_str)
            except ValueError:
                isValidStartDate = False
            finish_date_str = myfunc.ReadFile(finishtagfile).strip()
            try:
                finish_date = webcom.datetime_str_to_time(finish_date_str)
            except ValueError:
                isValidFinishDate = False
            if isValidSubmitDate and isValidStartDate:
                queuetime = myfunc.date_diff(submit_date, start_date)
            if isValidStartDate and isValidFinishDate:
                runtime = myfunc.date_diff(start_date, finish_date)
        else:
            resultdict['isFinished'] = False
            if os.path.exists(starttagfile):
                isValidStartDate = True
                start_date_str = ""
                if os.path.exists(starttagfile):
                    start_date_str = myfunc.ReadFile(starttagfile).strip()
                try:
                    start_date = webcom.datetime_str_to_time(start_date_str)
                except ValueError:
                    isValidStartDate = False
                resultdict['isStarted'] = True
                status = "Running"
                if isValidSubmitDate and isValidStartDate:
                    queuetime = myfunc.date_diff(submit_date, start_date)
                if isValidStartDate:
                    runtime = myfunc.date_diff(start_date, current_time)
            else:
                resultdict['isStarted'] = False
                status = "Wait"
                if isValidSubmitDate:
                    queuetime = myfunc.date_diff(submit_date, current_time)

    color_status = webcom.SetColorStatus(status)

    file_seq_warning = "%s/%s/%s/%s"%(SITE_ROOT, "static/result", jobid, "query.warn.txt")
    seqwarninfo = ""
    if os.path.exists(file_seq_warning):
        seqwarninfo = myfunc.ReadFile(file_seq_warning)
        seqwarninfo = seqwarninfo.strip()

    resultdict['file_seq_warning'] = os.path.basename(file_seq_warning)
    resultdict['seqwarninfo'] = seqwarninfo
    resultdict['jobid'] = jobid
    resultdict['subdirname'] = "seq_0"
    resultdict['jobname'] = jobname
    resultdict['outpathname'] = os.path.basename(outpathname)
    resultdict['resultfile'] = os.path.basename(resultfile)
    resultdict['tarball'] = os.path.basename(tarball)
    resultdict['zipfile'] = os.path.basename(zipfile)
    resultdict['submit_date'] = submit_date_str
    resultdict['queuetime'] = queuetime
    resultdict['runtime'] = runtime
    resultdict['BASEURL'] = g_params['BASEURL']
    resultdict['status'] = status
    resultdict['color_status'] = color_status
    resultdict['numseq'] = numseq
    resultdict['query_seqfile'] = os.path.basename(query_seqfile)
    resultdict['raw_query_seqfile'] = os.path.basename(raw_query_seqfile)
    base_www_url = "http://" + request.META['HTTP_HOST']
#   note that here one must add http:// in front of the url
    resultdict['url_result'] = "%s/pred/result/%s"%(base_www_url, jobid)

    sum_run_time = 0.0
    average_run_time = float(g_params['AVERAGE_RUNTIME_PER_SEQ_IN_SEC'])  # default average_run_time
    num_finished = 0
    cntnewrun = 0
    cntcached = 0
    newrun_table_list = [] # this is used for calculating the remaining time
# get seqid_index_map
    if os.path.exists(finished_seq_file):
        resultdict['index_table_header'] = ["No.", "ID", "Length", 
                "Variant", "Prediction", "Prediction score", "Severity", "Severity score", "RunTime(s)", "Source"]
        index_table_content_list = []
        indexmap_content = myfunc.ReadFile(finished_seq_file).split("\n")
        cnt = 0
        for line in indexmap_content:
            strs = line.split("\t")
            if len(strs)>=7:
                subfolder = strs[0]
                length_str = strs[1]
                source = strs[4]
                try:
                    runtime_in_sec_str = "%.1f"%(float(strs[5]))
                    if source == "newrun":
                        sum_run_time += float(strs[5])
                        cntnewrun += 1
                    elif source == "cached":
                        cntcached += 1
                except:
                    runtime_in_sec_str = ""
                desp = strs[6]

                #Attempt to read the output prediction file in the subfolder
                output_pred_file = "%s/%s/%s/output_predictions"%(rstdir, jobid, subfolder)
                resultdict['output_pred_file'] = None   
                if os.path.exists(output_pred_file):
                        resultdict['output_pred_file'] = os.path.basename(output_pred_file)
                        prediction_lines = myfunc.ReadFile(output_pred_file).split("\n")
                        for pred_line in prediction_lines:
                            if pred_line == "": continue
                            print(pred_line)
                            print((pred_line.split('\t')))
                            pred_identifier = pred_line.split('\t')[0]
                            pred_variant = pred_line.split('\t')[1]
                            pred_class = pred_line.split('\t')[2]
                            pred_score = pred_line.split('\t')[3]
                            pred_severity = pred_line.split('\t')[4]
                            pred_sev_score = pred_line.split('\t')[5]
                            rank = "%d"%(cnt+1)
                            if cnt < g_params['MAX_ROWS_TO_SHOW_IN_TABLE']:
                                    content_line = [rank, pred_identifier, length_str, pred_variant, pred_class,
                                            pred_score, pred_severity, pred_sev_score, runtime_in_sec_str, source]
                                    index_table_content_list.append(content_line)

                                    cnt += 1

                #See if the entropy csv exists in result directory
                resultdict['entropy_data_file'] = None
                output_entropy_csv = "%s/%s/%s/entropy_data.csv"%(rstdir, jobid, subfolder)
                if os.path.exists(output_entropy_csv):
                    resultdict['entropy_data_file'] = os.path.basename(output_entropy_csv)

                    if source == "newrun":
                        newrun_table_list.append([rank, subfolder])

        if cntnewrun > 0:
            average_run_time = sum_run_time / float(cntnewrun)

        resultdict['index_table_content_list'] = index_table_content_list
        resultdict['indexfiletype'] = "finishedfile"
        resultdict['num_finished'] = cnt
        num_finished = cnt
        resultdict['percent_finished'] = "%.1f"%(float(cnt)/numseq*100)
    else:
        resultdict['index_table_header'] = []
        resultdict['index_table_content_list'] = []
        resultdict['indexfiletype'] = "finishedfile"
        resultdict['num_finished'] = 0
        resultdict['percent_finished'] = "%.1f"%(0.0)

    num_remain = numseq - num_finished
    time_remain_in_sec = num_remain * average_run_time # set default value

    resultdict['num_row_result_table'] = len(resultdict['index_table_content_list'])


    # calculate the remaining time based on the average_runtime of the last x
    # number of newrun sequences

    avg_newrun_time = webcom.GetAverageNewRunTime(finished_seq_file, window=10)

    if cntnewrun > 0 and avg_newrun_time >= 0:
        time_remain_in_sec = int(avg_newrun_time*num_remain+0.5)

    time_remain = myfunc.second_to_human(time_remain_in_sec)
    resultdict['time_remain'] = time_remain
    qdinittagfile = "%s/runjob.qdinit"%(rstdir)

    if os.path.exists(rstdir):
        resultdict['isResultFolderExist'] = True
    else:
        resultdict['isResultFolderExist'] = False

    if numseq <= 1:
        if method_submission == "web":
            resultdict['refresh_interval'] = 2
        else:
            resultdict['refresh_interval'] = 5
    else:
        #resultdict['refresh_interval'] = numseq * 2
        if os.path.exists(qdinittagfile):
            addtime = int(math.sqrt(max(0,min(num_remain, num_finished))))+1
            resultdict['refresh_interval'] = average_run_time + addtime
        else:
            resultdict['refresh_interval'] = 2

    # get stat info
    if os.path.exists(statfile):#{{{
        content = myfunc.ReadFile(statfile)
        lines = content.split("\n")
        for line in lines:
            strs = line.split()
            if len(strs) >= 2:
                resultdict[strs[0]] = strs[1]
                percent =  "%.1f"%(int(strs[1])/float(numseq)*100)
                newkey = strs[0].replace('num_', 'per_')
                resultdict[newkey] = percent
#}}}
    resultdict['MAX_ROWS_TO_SHOW_IN_TABLE'] = g_params['MAX_ROWS_TO_SHOW_IN_TABLE']
    resultdict['jobcounter'] = webcom.GetJobCounter(resultdict)
    return render(request, 'pred/get_results.html', resultdict)
Exemplo n.º 6
0
def main(g_params):#{{{
    submitjoblogfile = "%s/submitted_seq.log"%(path_log)
    runjoblogfile = "%s/runjob_log.log"%(path_log)
    finishedjoblogfile = "%s/finished_job.log"%(path_log)

    if not os.path.exists(path_cache):
        os.mkdir(path_cache)

    loop = 0
    while 1:
        if os.path.exists("%s/CACHE_CLEANING_IN_PROGRESS"%(path_result)):#pause when cache cleaning is in progress
            continue
        # load the config file if exists
        configfile = "%s/config/config.json"%(basedir)
        config = {}
        if os.path.exists(configfile):
            text = myfunc.ReadFile(configfile)
            config = json.loads(text)

        if rootname_progname in config:
            g_params.update(config[rootname_progname])

        if os.path.exists(black_iplist_file):
            g_params['blackiplist'] = myfunc.ReadIDList(black_iplist_file)

        os.environ['TZ'] = g_params['TZ']
        time.tzset()

        avail_computenode = webcom.ReadComputeNode(computenodefile) # return value is a dict
        g_params['vip_user_list'] = myfunc.ReadIDList2(vip_email_file,  col=0)
        num_avail_node = len(avail_computenode)

        webcom.loginfo("loop %d"%(loop), gen_logfile)

        isOldRstdirDeleted = False
        if loop % g_params['STATUS_UPDATE_FREQUENCY'][0] == g_params['STATUS_UPDATE_FREQUENCY'][1]:
            qdcom.RunStatistics_basic(webserver_root, gen_logfile, gen_errfile)
            isOldRstdirDeleted = webcom.DeleteOldResult(path_result, path_log,
                    gen_logfile, MAX_KEEP_DAYS=g_params['MAX_KEEP_DAYS'])
            webcom.CleanServerFile(path_static, gen_logfile, gen_errfile)

        if 'DEBUG_ARCHIVE' in g_params and g_params['DEBUG_ARCHIVE']:
            webcom.loginfo("Run ArchiveLogFile, path_log=%s, threshold_logfilesize=%d"%(path_log, threshold_logfilesize), gen_logfile)
        webcom.ArchiveLogFile(path_log, threshold_logfilesize=threshold_logfilesize) 

        qdcom.CreateRunJoblog(loop, isOldRstdirDeleted, g_params)

        # Get number of jobs submitted to the remote server based on the
        # runjoblogfile
        runjobidlist = myfunc.ReadIDList2(runjoblogfile,0)
        remotequeueDict = {}
        for node in avail_computenode:
            remotequeueDict[node] = []
        for jobid in runjobidlist:
            rstdir = "%s/%s"%(path_result, jobid)
            remotequeue_idx_file = "%s/remotequeue_seqindex.txt"%(rstdir)
            if os.path.exists(remotequeue_idx_file):
                content = myfunc.ReadFile(remotequeue_idx_file)
                lines = content.split('\n')
                for line in lines:
                    strs = line.split('\t')
                    if len(strs)>=5:
                        node = strs[1]
                        remotejobid = strs[2]
                        if node in remotequeueDict:
                            remotequeueDict[node].append(remotejobid)

        cntSubmitJobDict = {} # format of cntSubmitJobDict {'node_ip': [INT, INT, STR]}
        for node in avail_computenode:
            queue_method = avail_computenode[node]['queue_method']
            num_queue_job = len(remotequeueDict[node])
            if num_queue_job >= 0:
                cntSubmitJobDict[node] = [num_queue_job,
                        g_params['MAX_SUBMIT_JOB_PER_NODE'], queue_method]
            else:
                cntSubmitJobDict[node] = [g_params['MAX_SUBMIT_JOB_PER_NODE'],
                        g_params['MAX_SUBMIT_JOB_PER_NODE'], queue_method]

# entries in runjoblogfile includes jobs in queue or running
        hdl = myfunc.ReadLineByBlock(runjoblogfile)
        if not hdl.failure:
            lines = hdl.readlines()
            while lines != None:
                for line in lines:
                    strs = line.split("\t")
                    if len(strs) >= 11:
                        jobid = strs[0]
                        email = strs[4]
                        try:
                            numseq = int(strs[5])
                        except:
                            numseq = 1
                        try:
                            numseq_this_user = int(strs[10])
                        except:
                            numseq_this_user = 1
                        rstdir = "%s/%s"%(path_result, jobid)
                        finishtagfile = "%s/%s"%(rstdir, "runjob.finish")
                        status = strs[1]
                        webcom.loginfo("CompNodeStatus: %s"%(str(cntSubmitJobDict)), gen_logfile)

                        runjob_lockfile = "%s/%s/%s.lock"%(path_result, jobid, "runjob.lock")
                        if os.path.exists(runjob_lockfile):
                            msg = "runjob_lockfile %s exists, ignore the job %s" %(runjob_lockfile, jobid)
                            webcom.loginfo(msg, gen_logfile)
                            continue

                        #if IsHaveAvailNode(cntSubmitJobDict):
                        if not g_params['DEBUG_NO_SUBMIT']:
                            qdcom.SubmitJob(jobid, cntSubmitJobDict, numseq_this_user, g_params)
                        qdcom.GetResult(jobid, g_params) # the start tagfile is written when got the first result
                        qdcom.CheckIfJobFinished(jobid, numseq, email, g_params)

                lines = hdl.readlines()
            hdl.close()

        myfunc.WriteFile("sleep for %d seconds\n"%(g_params['SLEEP_INTERVAL']), gen_logfile, "a", True)
        time.sleep(g_params['SLEEP_INTERVAL'])
        loop += 1

    return 0
Exemplo n.º 7
0
def get_results(request, jobid="1"):  #{{{
    resultdict = {}
    webcom.set_basic_config(request, resultdict, g_params)

    #img1 = "%s/%s/%s/%s"%(SITE_ROOT, "result", jobid, "PconsC2.s400.jpg")
    #url_img1 =  serve(request, os.path.basename(img1), os.path.dirname(img1))
    rstdir = "%s/%s" % (path_result, jobid)
    outpathname = jobid
    resultfile = "%s/%s/%s/%s" % (rstdir, jobid, outpathname,
                                  "query.result.txt")
    tarball = "%s/%s.tar.gz" % (rstdir, outpathname)
    zipfile = "%s/%s.zip" % (rstdir, outpathname)
    starttagfile = "%s/%s" % (rstdir, "runjob.start")
    finishtagfile = "%s/%s" % (rstdir, "runjob.finish")
    failtagfile = "%s/%s" % (rstdir, "runjob.failed")
    errfile = "%s/%s" % (rstdir, "runjob.err")
    query_seqfile = "%s/%s" % (rstdir, "query.fa")
    raw_query_seqfile = "%s/%s" % (rstdir, "query.raw.fa")
    seqid_index_mapfile = "%s/%s/%s" % (rstdir, jobid, "seqid_index_map.txt")
    finished_seq_file = "%s/%s/finished_seqs.txt" % (rstdir, jobid)
    statfile = "%s/%s/stat.txt" % (rstdir, jobid)
    method_submission = "web"

    jobinfofile = "%s/jobinfo" % (rstdir)
    jobinfo = myfunc.ReadFile(jobinfofile).strip()
    jobinfolist = jobinfo.split("\t")
    if len(jobinfolist) >= 8:
        submit_date_str = jobinfolist[0]
        numseq = int(jobinfolist[3])
        jobname = jobinfolist[5]
        email = jobinfolist[6]
        method_submission = jobinfolist[7]
    else:
        submit_date_str = ""
        numseq = 1
        jobname = ""
        email = ""
        method_submission = "web"

    isValidSubmitDate = True
    try:
        submit_date = webcom.datetime_str_to_time(submit_date_str)
    except ValueError:
        isValidSubmitDate = False
    current_time = datetime.now(timezone(TZ))

    resultdict['isResultFolderExist'] = True
    resultdict['errinfo'] = ""
    if os.path.exists(errfile):
        resultdict['errinfo'] = myfunc.ReadFile(errfile)

    status = ""
    queuetime = ""
    runtime = ""
    queuetime_in_sec = 0
    runtime_in_sec = 0
    if not os.path.exists(rstdir):
        resultdict['isResultFolderExist'] = False
        resultdict['isFinished'] = False
        resultdict['isFailed'] = True
        resultdict['isStarted'] = False
    elif os.path.exists(failtagfile):
        resultdict['isFinished'] = False
        resultdict['isFailed'] = True
        resultdict['isStarted'] = True
        status = "Failed"
        start_date_str = ""
        if os.path.exists(starttagfile):
            start_date_str = myfunc.ReadFile(starttagfile).strip()
        isValidStartDate = True
        isValidFailedDate = True
        try:
            start_date = webcom.datetime_str_to_time(start_date_str)
        except ValueError:
            isValidStartDate = False
        failed_date_str = myfunc.ReadFile(failtagfile).strip()
        try:
            failed_date = webcom.datetime_str_to_time(failed_date_str)
        except ValueError:
            isValidFailedDate = False
        if isValidSubmitDate and isValidStartDate:
            queuetime = myfunc.date_diff(submit_date, start_date)
            queuetime_in_sec = (start_date - submit_date).total_seconds()
        if isValidStartDate and isValidFailedDate:
            runtime = myfunc.date_diff(start_date, failed_date)
            runtime_in_sec = (failed_date - start_date).total_seconds()
    else:
        resultdict['isFailed'] = False
        if os.path.exists(finishtagfile):
            resultdict['isFinished'] = True
            resultdict['isStarted'] = True
            status = "Finished"
            isValidStartDate = True
            isValidFinishDate = True
            if os.path.exists(starttagfile):
                start_date_str = myfunc.ReadFile(starttagfile).strip()
            else:
                start_date_str = ""
            try:
                start_date = webcom.datetime_str_to_time(start_date_str)
            except ValueError:
                isValidStartDate = False
            finish_date_str = myfunc.ReadFile(finishtagfile).strip()
            try:
                finish_date = webcom.datetime_str_to_time(finish_date_str)
            except ValueError:
                isValidFinishDate = False
            if isValidSubmitDate and isValidStartDate:
                queuetime = myfunc.date_diff(submit_date, start_date)
                queuetime_in_sec = (start_date - submit_date).total_seconds()
            if isValidStartDate and isValidFinishDate:
                runtime = myfunc.date_diff(start_date, finish_date)
                runtime_in_sec = (finish_date - start_date).total_seconds()
        else:
            resultdict['isFinished'] = False
            if os.path.exists(starttagfile):
                isValidStartDate = True
                start_date_str = ""
                if os.path.exists(starttagfile):
                    start_date_str = myfunc.ReadFile(starttagfile).strip()
                try:
                    start_date = webcom.datetime_str_to_time(start_date_str)
                except ValueError:
                    isValidStartDate = False
                resultdict['isStarted'] = True
                status = "Running"
                if isValidSubmitDate and isValidStartDate:
                    queuetime = myfunc.date_diff(submit_date, start_date)
                    queuetime_in_sec = (start_date -
                                        submit_date).total_seconds()
                if isValidStartDate:
                    runtime = myfunc.date_diff(start_date, current_time)
                    runtime_in_sec = (current_time -
                                      start_date).total_seconds()
            else:
                resultdict['isStarted'] = False
                status = "Wait"
                if isValidSubmitDate:
                    queuetime = myfunc.date_diff(submit_date, current_time)
                    queuetime_in_sec = (current_time -
                                        submit_date).total_seconds()

    color_status = webcom.SetColorStatus(status)

    file_seq_warning = "%s/%s/%s/%s" % (SITE_ROOT, "static/result", jobid,
                                        "query.warn.txt")
    seqwarninfo = ""
    if os.path.exists(file_seq_warning):
        seqwarninfo = myfunc.ReadFile(file_seq_warning)
        seqwarninfo = seqwarninfo.strip()

    resultdict['file_seq_warning'] = os.path.basename(file_seq_warning)
    resultdict['seqwarninfo'] = seqwarninfo
    resultdict['jobid'] = jobid
    resultdict['subdirname'] = "seq_0"
    resultdict['jobname'] = jobname
    resultdict['outpathname'] = os.path.basename(outpathname)
    resultdict['resultfile'] = os.path.basename(resultfile)
    resultdict['tarball'] = os.path.basename(tarball)
    resultdict['zipfile'] = os.path.basename(zipfile)
    resultdict['submit_date'] = submit_date_str
    resultdict['queuetime'] = queuetime
    resultdict['runtime'] = runtime
    resultdict['BASEURL'] = g_params['BASEURL']
    resultdict['status'] = status
    resultdict['color_status'] = color_status
    resultdict['numseq'] = numseq
    resultdict['query_seqfile'] = os.path.basename(query_seqfile)
    resultdict['raw_query_seqfile'] = os.path.basename(raw_query_seqfile)
    base_www_url = "http://" + request.META['HTTP_HOST']
    #   note that here one must add http:// in front of the url
    resultdict['url_result'] = "%s/pred/result/%s" % (base_www_url, jobid)

    sum_run_time = 0.0
    average_run_time = float(
        g_params['AVERAGE_RUNTIME_PER_SEQ_IN_SEC'])  # default average_run_time
    num_finished = 0
    cntnewrun = 0
    cntcached = 0
    newrun_table_list = []  # this is used for calculating the remaining time
    # get seqid_index_map
    if os.path.exists(finished_seq_file):
        resultdict['index_table_header'] = [
            "No.", "Length", "LOC_DEF", "LOC_DEF_SCORE", "RunTime(s)",
            "SequenceName", "Source", "FinishDate"
        ]
        index_table_content_list = []
        indexmap_content = myfunc.ReadFile(finished_seq_file).split("\n")
        cnt = 0
        for line in indexmap_content:
            strs = line.split("\t")
            if len(strs) >= 7:
                subfolder = strs[0]
                length_str = strs[1]
                loc_def_str = strs[2]
                loc_def_score_str = strs[3]
                source = strs[4]
                try:
                    finishdate = strs[7]
                except IndexError:
                    finishdate = "N/A"

                try:
                    runtime_in_sec_str = "%.1f" % (float(strs[5]))
                    if source == "newrun":
                        sum_run_time += float(strs[5])
                        cntnewrun += 1
                    elif source == "cached":
                        cntcached += 1
                except:
                    runtime_in_sec_str = ""
                desp = strs[6]
                rank = "%d" % (cnt + 1)
                if cnt < g_params['MAX_ROWS_TO_SHOW_IN_TABLE']:
                    index_table_content_list.append([
                        rank, length_str, loc_def_str, loc_def_score_str,
                        runtime_in_sec_str, desp[:30], subfolder, source,
                        finishdate
                    ])
                if source == "newrun":
                    newrun_table_list.append([rank, subfolder])
                cnt += 1
        if cntnewrun > 0:
            average_run_time = sum_run_time / float(cntnewrun)

        resultdict['index_table_content_list'] = index_table_content_list
        resultdict['indexfiletype'] = "finishedfile"
        resultdict['num_finished'] = cnt
        num_finished = cnt
        resultdict['percent_finished'] = "%.1f" % (float(cnt) / numseq * 100)
    else:
        resultdict['index_table_header'] = []
        resultdict['index_table_content_list'] = []
        resultdict['indexfiletype'] = "finishedfile"
        resultdict['num_finished'] = 0
        resultdict['percent_finished'] = "%.1f" % (0.0)

    num_remain = numseq - num_finished
    time_remain_in_sec = num_remain * average_run_time  # set default value

    # re-define runtime as the sum of all real running time
    if sum_run_time > 0.0:
        resultdict['runtime'] = myfunc.second_to_human(int(sum_run_time + 0.5))

    resultdict['num_row_result_table'] = len(
        resultdict['index_table_content_list'])

    # calculate the remaining time based on the average_runtime of the last x
    # number of newrun sequences

    avg_newrun_time = webcom.GetAverageNewRunTime(finished_seq_file, window=10)

    if cntnewrun > 0 and avg_newrun_time >= 0:
        time_remain_in_sec = int(avg_newrun_time * num_remain + 0.5)

    time_remain = myfunc.second_to_human(int(time_remain_in_sec + 0.5))
    resultdict['time_remain'] = time_remain
    qdinittagfile = "%s/runjob.qdinit" % (rstdir)

    if os.path.exists(rstdir):
        resultdict['isResultFolderExist'] = True
    else:
        resultdict['isResultFolderExist'] = False

    if numseq <= 1:
        resultdict['refresh_interval'] = webcom.GetRefreshInterval(
            queuetime_in_sec, runtime_in_sec, method_submission)
    else:
        if os.path.exists(qdinittagfile):
            addtime = int(math.sqrt(max(0, min(num_remain, num_finished)))) + 1
            resultdict['refresh_interval'] = average_run_time + addtime
        else:
            resultdict['refresh_interval'] = webcom.GetRefreshInterval(
                queuetime_in_sec, runtime_in_sec, method_submission)

    # get stat info
    if os.path.exists(statfile):  #{{{
        content = myfunc.ReadFile(statfile)
        lines = content.split("\n")
        for line in lines:
            strs = line.split()
            if len(strs) >= 2:
                resultdict[strs[0]] = strs[1]
                percent = "%.1f" % (int(strs[1]) / float(numseq) * 100)
                newkey = strs[0].replace('num_', 'per_')
                resultdict[newkey] = percent
#}}}
    resultdict['MAX_ROWS_TO_SHOW_IN_TABLE'] = g_params[
        'MAX_ROWS_TO_SHOW_IN_TABLE']
    resultdict['jobcounter'] = webcom.GetJobCounter(resultdict)
    return render(request, 'pred/get_results.html', resultdict)
Exemplo n.º 8
0
def SubmitJobToQueue(
        jobid,
        datapath,
        outpath,
        numseq,
        numseq_this_user,
        email,  #{{{
        host_ip,
        base_www_url):
    myfunc.WriteFile("Entering SubmitJobToQueue()\n", g_params['debugfile'],
                     "a", True)
    fafile = "%s/query.fa" % (datapath)

    if numseq == -1:
        numseq = myfunc.CountFastaSeq(fafile)
    if numseq_this_user == -1:
        numseq_this_user = numseq

    query_parafile = "%s/query.para.txt" % (outpath)

    query_para = {}
    content = myfunc.ReadFile(query_parafile)
    para_str = content
    if content != "":
        query_para = json.loads(content)

    try:
        name_software = query_para['name_software']
    except KeyError:
        name_software = "prodres"

    runjob = "%s %s/run_job.py" % (python_exec, rundir)
    scriptfile = "%s/runjob,%s,%s,%s,%s,%d.sh" % (
        outpath, name_software, jobid, host_ip, email, numseq)
    code_str_list = []
    code_str_list.append("#!/bin/bash")
    code_str_list.append("source %s/bin/activate" % (virt_env_path))
    cmdline = "%s %s -outpath %s -tmpdir %s -jobid %s " % (
        runjob, fafile, outpath, datapath, jobid)
    if email != "":
        cmdline += "-email \"%s\" " % (email)
    if base_www_url != "":
        cmdline += "-baseurl \"%s\" " % (base_www_url)
    if g_params['isForceRun']:
        cmdline += "-force "
    if g_params['isOnlyGetCache']:
        cmdline += "-only-get-cache "
    code_str_list.append(cmdline)

    code = "\n".join(code_str_list)

    msg = "Writting scriptfile %s" % (scriptfile)
    webcom.loginfo(msg, g_params['debugfile'])

    myfunc.WriteFile(code, scriptfile, mode="w", isFlush=True)
    os.chmod(scriptfile, 0o755)

    webcom.loginfo("Getting priority", g_params['debugfile'])
    priority = myfunc.GetSuqPriority(numseq_this_user)

    if email in vip_user_list:
        priority = 999999999.0

    webcom.loginfo("priority=%d" % (priority), g_params['debugfile'])

    st1 = webcom.SubmitSlurmJob(datapath, outpath, scriptfile,
                                g_params['debugfile'])

    return st1
Exemplo n.º 9
0
def get_results(request, jobid="1"):  #{{{
    resultdict = {}

    webcom.set_basic_config(request, resultdict, g_params)

    #img1 = "%s/%s/%s/%s"%(SITE_ROOT, "result", jobid, "PconsC2.s400.jpg")
    #url_img1 =  serve(request, os.path.basename(img1), os.path.dirname(img1))
    rstdir = "%s/%s" % (path_result, jobid)
    outpathname = jobid
    resultfile = "%s/%s/%s/%s" % (rstdir, jobid, outpathname,
                                  "query.result.txt")
    tarball = "%s/%s.tar.gz" % (rstdir, outpathname)
    zipfile = "%s/%s.zip" % (rstdir, outpathname)
    starttagfile = "%s/%s" % (rstdir, "runjob.start")
    finishtagfile = "%s/%s" % (rstdir, "runjob.finish")
    failtagfile = "%s/%s" % (rstdir, "runjob.failed")
    runjob_errfile = "%s/%s" % (rstdir, "runjob.err")
    query_seqfile = "%s/%s" % (rstdir, "query.fa")
    raw_query_seqfile = "%s/%s" % (rstdir, "query.raw.fa")
    seqid_index_mapfile = "%s/%s/%s" % (rstdir, jobid, "seqid_index_map.txt")
    finished_seq_file = "%s/%s/finished_seqs.txt" % (rstdir, jobid)
    statfile = "%s/%s/stat.txt" % (rstdir, jobid)
    method_submission = "web"
    finished_seq_file = "%s/%s/finished_seqs.txt" % (rstdir, jobid)
    part_predfile = "%s/%s/query.part.top" % (rstdir, jobid)

    jobinfofile = "%s/jobinfo" % (rstdir)
    jobinfo = myfunc.ReadFile(jobinfofile).strip()
    jobinfolist = jobinfo.split("\t")
    app_type = "SCAMPI-single"
    if len(jobinfolist) >= 8:
        submit_date_str = jobinfolist[0]
        numseq = int(jobinfolist[3])
        jobname = jobinfolist[5]
        email = jobinfolist[6]
        method_submission = jobinfolist[7]
        try:
            app_type = jobinfolist[8]
        except:
            pass
    else:
        submit_date_str = ""
        numseq = 1
        jobname = ""
        email = ""
        method_submission = "web"

    isValidSubmitDate = True
    try:
        submit_date = webcom.datetime_str_to_time(submit_date_str)
    except ValueError:
        isValidSubmitDate = False
    current_time = datetime.now(timezone(TZ))

    resultdict['isResultFolderExist'] = True
    resultdict['errinfo'] = myfunc.ReadFile(runjob_errfile)

    status = ""
    queuetime = ""
    runtime = ""
    if not os.path.exists(rstdir):
        resultdict['isResultFolderExist'] = False
        resultdict['isFinished'] = False
        resultdict['isFailed'] = True
        resultdict['isStarted'] = False
    elif os.path.exists(failtagfile):
        resultdict['isFinished'] = False
        resultdict['isFailed'] = True
        resultdict['isStarted'] = True
        status = "Failed"
        start_date_str = myfunc.ReadFile(starttagfile).strip()
        isValidStartDate = True
        isValidFailedDate = True
        try:
            start_date = webcom.datetime_str_to_time(start_date_str)
        except ValueError:
            isValidStartDate = False
        failed_date_str = myfunc.ReadFile(failtagfile).strip()
        try:
            failed_date = webcom.datetime_str_to_time(failed_date_str)
        except ValueError:
            isValidFailedDate = False
        if isValidSubmitDate and isValidStartDate:
            queuetime = myfunc.date_diff(submit_date, start_date)
        if isValidStartDate and isValidFailedDate:
            runtime = myfunc.date_diff(start_date, failed_date)
    else:
        resultdict['isFailed'] = False
        if os.path.exists(finishtagfile):
            resultdict['isFinished'] = True
            resultdict['isStarted'] = True
            status = "Finished"
            isValidStartDate = True
            isValidFinishDate = True
            start_date_str = myfunc.ReadFile(starttagfile).strip()
            try:
                start_date = webcom.datetime_str_to_time(start_date_str)
            except ValueError:
                isValidStartDate = False
            finish_date_str = myfunc.ReadFile(finishtagfile).strip()
            try:
                finish_date = webcom.datetime_str_to_time(finish_date_str)
            except ValueError:
                isValidFinishDate = False
            if isValidSubmitDate and isValidStartDate:
                queuetime = myfunc.date_diff(submit_date, start_date)
            if isValidStartDate and isValidFinishDate:
                runtime = myfunc.date_diff(start_date, finish_date)
        else:
            resultdict['isFinished'] = False
            if os.path.exists(starttagfile):
                isValidStartDate = True
                start_date_str = myfunc.ReadFile(starttagfile).strip()
                try:
                    start_date = webcom.datetime_str_to_time(start_date_str)
                except ValueError:
                    isValidStartDate = False
                resultdict['isStarted'] = True
                status = "Running"
                if isValidSubmitDate and isValidStartDate:
                    queuetime = myfunc.date_diff(submit_date, start_date)
                if isValidStartDate:
                    runtime = myfunc.date_diff(start_date, current_time)
            else:
                resultdict['isStarted'] = False
                status = "Wait"
                if isValidSubmitDate:
                    queuetime = myfunc.date_diff(submit_date, current_time)

    color_status = webcom.SetColorStatus(status)

    file_seq_warning = "%s/%s/%s/%s" % (SITE_ROOT, "static/result", jobid,
                                        "query.warn.txt")
    seqwarninfo = ""
    if os.path.exists(file_seq_warning):
        seqwarninfo = myfunc.ReadFile(file_seq_warning).strip()

    resultdict['file_seq_warning'] = os.path.basename(file_seq_warning)
    resultdict['seqwarninfo'] = seqwarninfo
    resultdict['app_type'] = app_type
    resultdict['jobid'] = jobid
    resultdict['jobname'] = jobname
    resultdict['outpathname'] = os.path.basename(outpathname)
    resultdict['resultfile'] = os.path.basename(resultfile)
    resultdict['tarball'] = os.path.basename(tarball)
    resultdict['zipfile'] = os.path.basename(zipfile)
    resultdict['submit_date'] = submit_date_str
    resultdict['queuetime'] = queuetime
    resultdict['runtime'] = runtime
    resultdict['status'] = status
    resultdict['color_status'] = color_status
    resultdict['numseq'] = numseq
    resultdict['query_seqfile'] = os.path.basename(query_seqfile)
    resultdict['raw_query_seqfile'] = os.path.basename(raw_query_seqfile)
    base_www_url = "http://" + request.META['HTTP_HOST']
    #   note that here one must add http:// in front of the url
    resultdict['url_result'] = "%s/pred/result/%s" % (base_www_url, jobid)

    num_finished = 0
    if os.path.exists(finished_seq_file):
        lines = myfunc.ReadFile(finished_seq_file).split("\n")
        lines = [_f for _f in lines if _f]
        num_finished = len(lines)

    sum_run_time = 0.0
    average_run_time_single = 0.1  # default average_run_time
    average_run_time_msa = 300  # default average_run_time
    num_finished = 0
    cntnewrun = 0
    cntcached = 0
    topcontentList = []
    # get seqid_index_map
    if os.path.exists(finished_seq_file):
        resultdict['index_table_header'] = [
            "No.", "Length", "numTM", "RunTime(s)", "SequenceName", "Source"
        ]
        index_table_content_list = []
        indexmap_content = myfunc.ReadFile(finished_seq_file).split("\n")
        cnt = 0
        added_idx_set = set([])
        for line in indexmap_content:
            strs = line.split("\t")
            if len(strs) >= 8:
                subfolder = strs[0]
                if not subfolder in added_idx_set:
                    length_str = strs[1]
                    numTM_str = strs[2]
                    source = strs[3]
                    try:
                        runtime_in_sec_str = "%.1f" % (float(strs[4]))
                        if source == "newrun":
                            sum_run_time += float(strs[4])
                            cntnewrun += 1
                        elif source == "cached":
                            cntcached += 1
                    except:
                        runtime_in_sec_str = ""
                    desp = strs[5]
                    top = strs[7]
                    rank = "%d" % (cnt + 1)
                    index_table_content_list.append([
                        rank, length_str, numTM_str, runtime_in_sec_str,
                        desp[:30], source
                    ])
                    cnt += 1
                    added_idx_set.add(subfolder)
                    topcontentList.append(">%s\n%s" % (desp, top))
        if cntnewrun > 0:
            average_run_time_msa = sum_run_time / cntnewrun

        resultdict['index_table_content_list'] = index_table_content_list
        resultdict['indexfiletype'] = "finishedfile"
        resultdict['num_finished'] = cnt
        num_finished = cnt
        resultdict['percent_finished'] = "%.1f" % (float(cnt) / numseq * 100)
    else:
        resultdict['index_table_header'] = []
        resultdict['index_table_content_list'] = []
        resultdict['indexfiletype'] = "finishedfile"
        resultdict['num_finished'] = 0
        resultdict['percent_finished'] = "%.1f" % (0.0)

    num_remain = numseq - num_finished
    myfunc.WriteFile("\n".join(topcontentList), part_predfile, "w")

    time_remain_in_sec = numseq * 120  # set default value

    if os.path.exists(starttagfile):
        start_date_str = myfunc.ReadFile(starttagfile).strip()
        isValidStartDate = False
        try:
            start_date_epoch = webcom.datetime_str_to_epoch(start_date_str)
            isValidStartDate = True
        except:
            pass
        if isValidStartDate:
            time_now = time.time()
            runtime_total_in_sec = float(time_now) - float(start_date_epoch)
            cnt_torun = numseq - cntcached  #

            if cntnewrun <= 0:
                time_remain_in_sec = cnt_torun * 120
            else:
                time_remain_in_sec = int(runtime_total_in_sec /
                                         float(cntnewrun) * cnt_torun + 0.5)

    time_remain = myfunc.second_to_human(time_remain_in_sec)
    resultdict['time_remain'] = time_remain
    qdinittagfile = "%s/runjob.qdinit" % (rstdir)

    if numseq <= 1:
        if method_submission == "web":
            if app_type == "SCAMPI-single":
                resultdict['refresh_interval'] = 1
            else:
                resultdict['refresh_interval'] = 5.0

        else:
            if app_type == "SCAMPI-single":
                resultdict['refresh_interval'] = 1.0
            else:
                resultdict['refresh_interval'] = 5.0
    else:
        #resultdict['refresh_interval'] = numseq * 2
        addtime = int(math.sqrt(max(0, min(num_remain, num_finished)))) + 1
        if app_type == "SCAMPI-single":
            resultdict['refresh_interval'] = max(
                1, num_remain * average_run_time_single)
        else:
            if not os.path.exists(qdinittagfile):
                resultdict['refresh_interval'] = 2
            else:
                if num_finished == 0:
                    resultdict['refresh_interval'] = 5
                else:
                    resultdict['refresh_interval'] = 10 + addtime

    # get stat info
    if os.path.exists(statfile):  #{{{
        content = myfunc.ReadFile(statfile)
        lines = content.split("\n")
        for line in lines:
            strs = line.split()
            if len(strs) >= 2:
                resultdict[strs[0]] = strs[1]
                percent = "%.1f" % (int(strs[1]) / float(numseq) * 100)
                newkey = strs[0].replace('num_', 'per_')
                resultdict[newkey] = percent
#}}}

    topfile = "%s/%s/query.top" % (rstdir, jobid)
    TM_listfile = "%s/%s/query.TM_list.txt" % (rstdir, jobid)
    nonTM_listfile = "%s/%s/query.nonTM_list.txt" % (rstdir, jobid)
    str_TMlist = []
    str_nonTMlist = []
    lenseq_list = []
    num_TMPro = 0
    if os.path.exists(topfile):
        (tmpidlist, tmpannolist, tmptoplist) = myfunc.ReadFasta(topfile)
        cnt_TMPro = 0
        for ii in range(len(tmpidlist)):
            top = tmptoplist[ii]
            lenseq_list.append(len(top))
            if top.find('M') != -1:
                cnt_TMPro += 1
                str_TMlist.append(tmpannolist[ii])
            else:
                str_nonTMlist.append(tmpannolist[ii])
        num_TMPro = cnt_TMPro

    if not os.path.exists(TM_listfile) or os.path.getsize(TM_listfile) < 1:
        myfunc.WriteFile("\n".join(str_TMlist), TM_listfile, "w")
    if not os.path.exists(
            nonTM_listfile) or os.path.getsize(nonTM_listfile) < 1:
        myfunc.WriteFile("\n".join(str_nonTMlist), nonTM_listfile, "w")

    avg_lenseq = myfunc.FloatDivision(sum(lenseq_list), len(lenseq_list))
    resultdict['avg_lenseq'] = int(avg_lenseq + 0.5)
    resultdict['app_type'] = app_type
    resultdict['num_TMPro'] = num_TMPro
    resultdict['per_TMPro'] = "%.1f" % (
        myfunc.FloatDivision(num_TMPro, numseq) * 100)
    resultdict['num_nonTMPro'] = numseq - num_TMPro
    resultdict['per_nonTMPro'] = "%.1f" % (
        100.0 - myfunc.FloatDivision(num_TMPro, numseq) * 100)
    resultdict['num_finished'] = num_finished
    resultdict['percent_finished'] = "%.1f" % (float(num_finished) / numseq *
                                               100)

    resultdict['jobcounter'] = webcom.GetJobCounter(resultdict)
    return render(request, 'pred/get_results.html', resultdict)
def RunJob(infile, outpath, tmpdir, email, jobid, g_params):  #{{{
    all_begin_time = time.time()

    rootname = os.path.basename(os.path.splitext(infile)[0])
    starttagfile = "%s/runjob.start" % (outpath)
    runjob_errfile = "%s/runjob.err" % (outpath)
    runjob_logfile = "%s/runjob.log" % (outpath)
    app_logfile = "%s/app.log" % (outpath)
    finishtagfile = "%s/runjob.finish" % (outpath)
    failedtagfile = "%s/runjob.failed" % (outpath)
    query_parafile = "%s/query.para.txt" % (outpath)

    query_para = ""
    content = myfunc.ReadFile(query_parafile)
    if content != "":
        query_para = json.loads(content)

    rmsg = ""

    resultpathname = jobid

    outpath_result = "%s/%s" % (outpath, resultpathname)
    tmp_outpath_result = "%s/%s" % (tmpdir, resultpathname)

    tarball = "%s.tar.gz" % (resultpathname)
    zipfile = "%s.zip" % (resultpathname)
    tarball_fullpath = "%s.tar.gz" % (outpath_result)
    zipfile_fullpath = "%s.zip" % (outpath_result)
    resultfile_text = "%s/%s" % (outpath_result, "query.result.txt")
    mapfile = "%s/seqid_index_map.txt" % (outpath_result)
    finished_seq_file = "%s/finished_seqs.txt" % (outpath_result)

    for folder in [outpath_result, tmp_outpath_result]:
        try:
            os.makedirs(folder)
        except OSError:
            msg = "Failed to create folder %s" % (folder)
            myfunc.WriteFile(msg + "\n", gen_errfile, "a")
            return 1
    try:
        open(finished_seq_file, 'w').close()
    except:
        pass
#first getting result from caches
# ==================================

    maplist = []
    maplist_simple = []
    toRunDict = {}
    hdl = myfunc.ReadFastaByBlock(infile, method_seqid=0, method_seq=0)
    if hdl.failure:
        isOK = False
    else:
        webcom.WriteDateTimeTagFile(starttagfile, runjob_logfile,
                                    runjob_errfile)
        recordList = hdl.readseq()
        cnt = 0
        origpath = os.getcwd()
        while recordList != None:
            for rd in recordList:
                isSkip = False
                # temp outpath for the sequence is always seq_0, and I feed
                # only one seq a time to the workflow
                tmp_outpath_this_seq = "%s/%s" % (tmp_outpath_result,
                                                  "seq_%d" % 0)
                outpath_this_seq = "%s/%s" % (outpath_result, "seq_%d" % cnt)
                subfoldername_this_seq = "seq_%d" % (cnt)
                if os.path.exists(tmp_outpath_this_seq):
                    try:
                        shutil.rmtree(tmp_outpath_this_seq)
                    except OSError:
                        pass

                maplist.append(
                    "%s\t%d\t%s\t%s" %
                    ("seq_%d" % cnt, len(rd.seq), rd.description, rd.seq))
                maplist_simple.append(
                    "%s\t%d\t%s" %
                    ("seq_%d" % cnt, len(rd.seq), rd.description))
                if not g_params['isForceRun']:
                    md5_key = hashlib.md5(
                        (rd.seq +
                         str(query_para)).encode('utf-8')).hexdigest()
                    subfoldername = md5_key[:2]
                    cachedir = "%s/%s/%s" % (path_cache, subfoldername,
                                             md5_key)
                    zipfile_cache = cachedir + ".zip"

                    if os.path.exists(cachedir) or os.path.exists(
                            zipfile_cache):
                        if os.path.exists(cachedir):
                            try:
                                shutil.copytree(cachedir, outpath_this_seq)
                            except Exception as e:
                                msg = "Failed to copytree  %s -> %s" % (
                                    cachedir, outpath_this_seq)
                                date_str = time.strftime(FORMAT_DATETIME)
                                myfunc.WriteFile(
                                    "[%s] %s with errmsg=%s\n" %
                                    (date_str, msg, str(e)), runjob_errfile,
                                    "a")
                        elif os.path.exists(zipfile_cache):
                            cmd = [
                                "unzip", zipfile_cache, "-d", outpath_result
                            ]
                            webcom.RunCmd(cmd, runjob_logfile, runjob_errfile)
                            shutil.move("%s/%s" % (outpath_result, md5_key),
                                        outpath_this_seq)

                        if os.path.exists(outpath_this_seq):
                            info_finish = webcom.GetInfoFinish_PRODRES(
                                outpath_this_seq,
                                cnt,
                                len(rd.seq),
                                rd.description,
                                source_result="cached",
                                runtime=0.0)
                            myfunc.WriteFile("\t".join(info_finish) + "\n",
                                             finished_seq_file,
                                             "a",
                                             isFlush=True)
                            isSkip = True

                if not isSkip:
                    # first try to delete the outfolder if exists
                    if os.path.exists(outpath_this_seq):
                        try:
                            shutil.rmtree(outpath_this_seq)
                        except OSError:
                            pass
                    origIndex = cnt
                    numTM = 0
                    toRunDict[origIndex] = [rd.seq, numTM, rd.description
                                            ]  #init value for numTM is 0

                cnt += 1
            recordList = hdl.readseq()
        hdl.close()
    myfunc.WriteFile("\n".join(maplist_simple) + "\n", mapfile)

    if not g_params['isOnlyGetCache']:
        torun_all_seqfile = "%s/%s" % (tmp_outpath_result, "query.torun.fa")
        dumplist = []
        for key in toRunDict:
            top = toRunDict[key][0]
            dumplist.append(">%s\n%s" % (str(key), top))
        myfunc.WriteFile("\n".join(dumplist) + "\n", torun_all_seqfile, "w")
        del dumplist

        sortedlist = sorted(list(toRunDict.items()),
                            key=lambda x: x[1][1],
                            reverse=True)
        #format of sortedlist [(origIndex: [seq, numTM, description]), ...]

        # submit sequences one by one to the workflow according to orders in
        # sortedlist

        for item in sortedlist:
            origIndex = item[0]
            seq = item[1][0]
            description = item[1][2]

            subfoldername_this_seq = "seq_%d" % (origIndex)
            outpath_this_seq = "%s/%s" % (outpath_result,
                                          subfoldername_this_seq)
            tmp_outpath_this_seq = "%s/%s" % (tmp_outpath_result, "seq_%d" %
                                              (0))
            if os.path.exists(tmp_outpath_this_seq):
                try:
                    shutil.rmtree(tmp_outpath_this_seq)
                except OSError:
                    pass

            seqfile_this_seq = "%s/%s" % (tmp_outpath_result, "query_%d.fa" %
                                          (origIndex))
            seqcontent = ">query_%d\n%s\n" % (origIndex, seq)
            myfunc.WriteFile(seqcontent, seqfile_this_seq, "w")

            if not os.path.exists(seqfile_this_seq):
                msg = "failed to generate seq index %d" % (origIndex)
                date_str = time.strftime(g_params['FORMAT_DATETIME'])
                myfunc.WriteFile("[%s] %s\n" % (date_str, msg), runjob_errfile,
                                 "a", True)
                continue

            cmd = [
                "python", runscript, "--input", seqfile_this_seq, "--output",
                tmp_outpath_this_seq, "--pfam-dir", path_pfamdatabase,
                "--pfamscan-script", path_pfamscanscript,
                "--fallback-db-fasta", blastdb
            ]

            if 'second_method' in query_para and query_para[
                    'second_method'] != "":
                cmd += ['--second-search', query_para['second_method']]

            if 'pfamscan_evalue' in query_para and query_para[
                    'pfamscan_evalue'] != "":
                cmd += ['--pfamscan_e-val', query_para['pfamscan_evalue']]
            elif 'pfamscan_bitscore' in query_para and query_para[
                    'pfamscan_bitscore'] != "":
                cmd += ['--pfamscan_bitscore', query_para['pfamscan_bitscore']]

            if 'pfamscan_clanoverlap' in query_para:
                if query_para['pfamscan_clanoverlap'] == False:
                    cmd += ['--pfamscan_clan-overlap', 'no']
                else:
                    cmd += ['--pfamscan_clan-overlap', 'yes']

            if 'jackhmmer_iteration' in query_para and query_para[
                    'jackhmmer_iteration'] != "":
                cmd += [
                    '--jackhmmer_max_iter', query_para['jackhmmer_iteration']
                ]

            if 'jackhmmer_threshold_type' in query_para and query_para[
                    'jackhmmer_threshold_type'] != "":
                cmd += [
                    '--jackhmmer-threshold-type',
                    query_para['jackhmmer_threshold_type']
                ]

            if 'jackhmmer_evalue' in query_para and query_para[
                    'jackhmmer_evalue'] != "":
                cmd += ['--jackhmmer_e-val', query_para['jackhmmer_evalue']]
            elif 'jackhmmer_bitscore' in query_para and query_para[
                    'jackhmmer_bitscore'] != "":
                cmd += [
                    '--jackhmmer_bit-score', query_para['jackhmmer_bitscore']
                ]

            if 'psiblast_iteration' in query_para and query_para[
                    'psiblast_iteration'] != "":
                cmd += ['--psiblast_iter', query_para['psiblast_iteration']]
            if 'psiblast_outfmt' in query_para and query_para[
                    'psiblast_outfmt'] != "":
                cmd += ['--psiblast_outfmt', query_para['psiblast_outfmt']]

            (t_success,
             runtime_in_sec) = webcom.RunCmd(cmd, runjob_logfile,
                                             runjob_errfile, True)

            aaseqfile = "%s/seq.fa" % (tmp_outpath_this_seq + os.sep +
                                       "query_0")
            if not os.path.exists(aaseqfile):
                seqcontent = ">%s\n%s\n" % (description, seq)
                myfunc.WriteFile(seqcontent, aaseqfile, "w")

            if os.path.exists(tmp_outpath_this_seq):
                cmd = [
                    "mv", "-f", tmp_outpath_this_seq + os.sep + "query_0",
                    outpath_this_seq
                ]
                isCmdSuccess = False
                (isCmdSuccess,
                 t_runtime) = webcom.RunCmd(cmd, runjob_logfile,
                                            runjob_errfile, True)

                if not 'isKeepTempFile' in query_para or query_para[
                        'isKeepTempFile'] == False:
                    try:
                        temp_result_folder = "%s/temp" % (outpath_this_seq)
                        shutil.rmtree(temp_result_folder)
                    except:
                        msg = "Failed to delete the folder %s" % (
                            temp_result_folder)
                        date_str = time.strftime(g_params['FORMAT_DATETIME'])
                        myfunc.WriteFile("[%s] %s\n" % (date_str, msg),
                                         runjob_errfile, "a", True)

                    flist = [
                        "%s/outputs/%s" % (outpath_this_seq, "Alignment.txt"),
                        "%s/outputs/%s" % (outpath_this_seq, "tableOut.txt"),
                        "%s/outputs/%s" % (outpath_this_seq, "fullOut.txt")
                    ]
                    for f in flist:
                        if os.path.exists(f):
                            try:
                                os.remove(f)
                            except:
                                msg = "Failed to delete the file %s" % (f)
                                date_str = time.strftime(
                                    g_params['FORMAT_DATETIME'])
                                myfunc.WriteFile("[%s] %s\n" % (date_str, msg),
                                                 runjob_errfile, "a", True)

                if isCmdSuccess:
                    timefile = "%s/time.txt" % (outpath_this_seq)
                    runtime = webcom.ReadRuntimeFromFile(timefile,
                                                         default_runtime=0.0)
                    info_finish = webcom.GetInfoFinish_PRODRES(
                        outpath_this_seq,
                        origIndex,
                        len(seq),
                        description,
                        source_result="newrun",
                        runtime=runtime)
                    myfunc.WriteFile("\t".join(info_finish) + "\n",
                                     finished_seq_file,
                                     "a",
                                     isFlush=True)
                    # now write the text output for this seq

                    info_this_seq = "%s\t%d\t%s\t%s" % (
                        "seq_%d" % origIndex, len(seq), description, seq)
                    resultfile_text_this_seq = "%s/%s" % (outpath_this_seq,
                                                          "query.result.txt")
                    #webcom.WriteSubconsTextResultFile(resultfile_text_this_seq,
                    #        outpath_result, [info_this_seq], runtime_in_sec, g_params['base_www_url'])
                    # create or update the md5 cache
                    # create cache only on the front-end
                    if webcom.IsFrontEndNode(g_params['base_www_url']):
                        md5_key = hashlib.md5(
                            (seq +
                             str(query_para)).encode('utf-8')).hexdigest()
                        subfoldername = md5_key[:2]
                        md5_subfolder = "%s/%s" % (path_cache, subfoldername)
                        cachedir = "%s/%s/%s" % (path_cache, subfoldername,
                                                 md5_key)

                        # copy the zipped folder to the cache path
                        origpath = os.getcwd()
                        os.chdir(outpath_result)
                        shutil.copytree("seq_%d" % (origIndex), md5_key)
                        cmd = ["zip", "-rq", "%s.zip" % (md5_key), md5_key]
                        webcom.RunCmd(cmd, runjob_logfile, runjob_logfile)
                        if not os.path.exists(md5_subfolder):
                            os.makedirs(md5_subfolder)
                        shutil.move("%s.zip" % (md5_key),
                                    "%s.zip" % (cachedir))
                        shutil.rmtree(
                            md5_key
                        )  # delete the temp folder named as md5 hash
                        os.chdir(origpath)

                        # Add the finished date to the database
                        date_str = time.strftime(FORMAT_DATETIME)
                        webcom.InsertFinishDateToDB(date_str, md5_key, seq,
                                                    finished_date_db)

    all_end_time = time.time()
    all_runtime_in_sec = all_end_time - all_begin_time

    if not g_params['isOnlyGetCache'] or len(toRunDict) == 0:
        # now write the text output to a single file
        statfile = "%s/%s" % (outpath_result, "stat.txt")
        #webcom.WriteSubconsTextResultFile(resultfile_text, outpath_result, maplist,
        #        all_runtime_in_sec, g_params['base_www_url'], statfile=statfile)

        # now making zip instead (for windows users)
        # note that zip rq will zip the real data for symbolic links
        os.chdir(outpath)
        #             cmd = ["tar", "-czf", tarball, resultpathname]
        cmd = ["zip", "-rq", zipfile, resultpathname]
        webcom.RunCmd(cmd, runjob_logfile, runjob_errfile)

        # write finish tag file
        if os.path.exists(finished_seq_file):
            webcom.WriteDateTimeTagFile(finishtagfile, runjob_logfile,
                                        runjob_errfile)

        isSuccess = False
        if (os.path.exists(finishtagfile)
                and os.path.exists(zipfile_fullpath)):
            isSuccess = True
        else:
            isSuccess = False
            webcom.WriteDateTimeTagFile(failedtagfile, runjob_logfile,
                                        runjob_errfile)

# send the result to email
# do not sendmail at the cloud VM
        if webcom.IsFrontEndNode(g_params['base_www_url']
                                 ) and myfunc.IsValidEmailAddress(email):
            if isSuccess:
                finish_status = "success"
            else:
                finish_status = "failed"
            webcom.SendEmail_on_finish(
                jobid,
                g_params['base_www_url'],
                finish_status,
                name_server="PRODRES",
                from_email="*****@*****.**",
                to_email=email,
                contact_email=contact_email,
                logfile=runjob_logfile,
                errfile=runjob_errfile)

    if os.path.exists(runjob_errfile) and os.path.getsize(runjob_errfile) > 1:
        return 1
    else:
        try:
            shutil.rmtree(tmpdir)
            msg = "rmtree(%s)" % (tmpdir)
            webcom.loginfo("rmtree(%s)" % (tmpdir), runjob_logfile)
        except Exception as e:
            msg = "Failed to rmtree(%s)" % (tmpdir)
            webcom.loginfo(
                "Failed to rmtree(%s) with error message: %s" %
                (tmpdir, str(e)), runjob_errfile)
        return 0
Exemplo n.º 11
0
def SubmitJobToQueue(
        jobid,
        datapath,
        outpath,
        nummodel,
        nummodel_this_user,
        email,  #{{{
        host_ip,
        base_www_url):
    myfunc.WriteFile("Entering SubmitJobToQueue()\n", g_params['debugfile'],
                     "a")
    modelfile = "%s/query.pdb" % (datapath)
    seqfile = "%s/query.fa" % (datapath)

    if nummodel == -1:
        nummodel = myfunc.ReadFile(modelfile).count("\nENDMDL")
        if nummodel == 0:
            nummodel = 1
    if nummodel_this_user == -1:
        nummodel_this_user = nummodel

    query_parafile = "%s/query.para.txt" % (outpath)

    query_para = {}
    content = myfunc.ReadFile(query_parafile)
    para_str = content
    if content != "":
        query_para = json.loads(content)

    try:
        name_software = query_para['name_software']
    except KeyError:
        name_software = "proq3"

    runjob = "%s %s/run_job.py" % (python_exec, rundir)
    scriptfile = "%s/runjob,%s,%s,%s,%s,%d.sh" % (
        outpath, name_software, jobid, host_ip, email, nummodel)
    code_str_list = []
    code_str_list.append("#!/bin/bash")
    code_str_list.append("source %s/bin/activate" % (virt_env_path))
    cmdline = "%s %s -outpath %s -tmpdir %s -jobid %s " % (
        runjob, modelfile, outpath, datapath, jobid)
    if email != "":
        cmdline += "-email \"%s\" " % (email)
    if os.path.exists(seqfile):
        cmdline += "-fasta \"%s\" " % (seqfile)
    if base_www_url != "":
        cmdline += "-baseurl \"%s\" " % (base_www_url)
    if g_params['isForceRun']:
        cmdline += "-force "
    code_str_list.append(cmdline)

    code = "\n".join(code_str_list)

    msg = "Write scriptfile %s" % (scriptfile)
    myfunc.WriteFile(msg + "\n", g_params['debugfile'], "a")

    myfunc.WriteFile(code, scriptfile)
    os.chmod(scriptfile, 0o755)

    myfunc.WriteFile("Getting priority" + "\n", g_params['debugfile'], "a")
    priority = myfunc.GetSuqPriority(nummodel_this_user)

    if email in vip_user_list:
        priority = 999999999.0

    myfunc.WriteFile("priority=%d\n" % (priority), g_params['debugfile'], "a")

    st1 = webcom.SubmitSlurmJob(datapath, outpath, scriptfile,
                                g_params['debugfile'])

    return st1