Пример #1
0
def getlog(rid, ltype, offset, perm, disabled4reader):
    print "in getlog(), rid=", rid, ",ltype=", ltype, "offset=", offset
    document = _list.get_ds_doc(rid, perm)
    #print "document=",document
    if document is None:
        document = _list.get_doc(rid, perm)
        if document is None:
            return HttpResponse(json.dumps({"error": "data not found"}),
                                content_type="application/json")

    filename = get_log_fname(rid, ltype)
    fsize, logtxt = get_log_content(filename, ltype, offset)

    sts = "n/a"
    if document:
        sts = document.status

    # call from job_logs(), disabled4reader as a flag
    if disabled4reader is None:
        return logtxt

    ret = {
        "log": logtxt,
        "id": rid,
        "status": sts,
        "fsize": fsize
    }  #, "linenumb":endln}
    #time.sleep(2)
    #print "++++++++++++++++++++++++++++++++++++++++++++>>>>", ret
    return HttpResponse(json.dumps(ret), content_type="application/json")
def get_optlist(request, rid, perm, disabled4reader):
    print "in _api.get_optlist. rid=", rid
    if request.method == 'GET':
        #doc = Document.objects.get(id=rid)
        doc = _list.get_ds_doc(rid, perm)
        if not doc:
            #print "not found!"
            #return Response({"data not found":-1},status=404)
            return Response({"error": "data not found"}, status=404)
        arr = []
        # get dataset row
        arr.append(get_row_4_opt(doc))
        #print "arr=", arr
        #documents = Document.objects.all().filter(~Q(file_type='predict'),acl_list__lte=perm, train_id=rid).order_by('-id')[0:500]
        #print "before doc"
        # get option rows for this dataset
        documents = _list.get_opt_docs(rid, perm)
        #print "here, len=",len(documents)
        if documents:
            for doc in documents:
                #print "doc=",doc
                arr.append(get_row_4_opt(doc))

        jobj = {}
        jobj["data"] = arr
        #print "jobj=",jobj
        return JsonResponse(jobj, safe=False)
def rm_data(rid, type, perm, disabled4reader):
    # for deleting dataset record only
    if type == "ds":
        document = _list.get_ds_doc(rid, perm)
    elif type == "pred":
        document = _predict.get_pred_doc(rid, perm, disabled4reader)
        if not document is None and len(document) > 0:
            document = document[0]

    if document is None:
        return Response({
            "status": "failed",
            "msg": "record not found"
        },
                        status=404)
    # should we not really delete record?
    ret = document.delete()
    print "ret=", ret
    if ret[0] == 1:
        return Response({
            "status": "deleted",
            "msg": "Record id=" + rid + " deleted"
        })
    else:
        return Response(
            {
                "status": "failed",
                "msg": "Delete failed for id=" + rid
            },
            status=404)
def get_model(request, rid, perm, disabled4reader):
    print "in get_model, rid=", rid
    # check permission
    document = _list.get_ds_doc(rid, perm)
    if not document:
        return Response({"error": "data not found"}, status=404)
    # get model dict

    local_processed_date = document.local_processed_date()
    ret = {}
    ret["id"] = document.id
    ret["filename"] = document.filename
    ret["file_type"] = document.file_type
    ret["status"] = document.status
    ret["local_processed_date"] = local_processed_date
    ret["ml_n_gram"] = document.ml_n_gram
    ret["ml_lib"] = document.ml_lib
    ret["ml_opts"] = json.loads(document.ml_opts)
    ret["accuracy"] = document.accuracy
    ret["train_id"] = document.train_id
    ret["option_state"] = document.option_state

    # get other info from mongo
    ret = ml_util.ml_get_model(ret)

    return JsonResponse(ret, safe=False)
def get_log_file(rid, ltype, offset, perm, disabled4reader):
    print 'in get_log_file(), rid=', rid, ",offset=", offset
    # check access
    document = _list.get_ds_doc(rid, perm)
    #print "document..=",document
    if document is None:
        # check if record exist
        document = _list.get_doc(rid, perm)
        if document is None:
            return Response({"error": "file not found"})

    return _log.get_log_file(rid, ltype, offset, perm, disabled4reader)
def get_all_predicts(request,rid, perm,disabled4reader,count):
    print "in _predict.get_all_predicts, rid=",rid," user="******"error":"data not found"},status=404)
        #use "like" _icontains to get predict and ensemble_predict  
        if count is None or int(count) <=0:
            predictions = Document.objects.all().filter(Q(file_type__icontains="predict"), train_id=rid).order_by('-id')
        else: 
            predictions = Document.objects.all().filter(Q(file_type__icontains="predict"), train_id=rid).order_by('-id')[:int(count)]
        #serializer = PredictSerializer(predictions, many=True)
        #return Response(serializer.data)
        return Response(ml_serializers.pred2json(predictions))
def get_pred(rid, perm,disabled4reader):
    print 'in get_pred, rid=', rid
    predictions = Document.objects.all().filter(file_type__icontains="predict", id=rid)
    
    #print "t=",type(predictions),",predictions=",predictions
    
    # check if dataset doc accessible
    train_id=predictions[0].train_id
    ds_doc =_list.get_ds_doc(train_id, perm)
    if not ds_doc:
        return Response({"error":"data not found"},status=404)
    
    #serializer = PredictSerializer(predictions, many=True)
    #return Response(serializer.data)
    return Response(ml_serializers.pred2json(predictions))
def train(request, perm, disabled4reader):
    action = request.POST.get('action')
    rid = request.POST.get('hf_w_id')
    if action is None or not action in ("mllib_api", "scikit_api"):
        return Response({"error": "not supported."}, status=404)
    if rid is None:
        return Response({"error": "id not found"}, status=404)
    # check doc
    document = _list.get_ds_doc(rid, perm)
    #print "document..=",document
    if document is None:
        return Response({"error": "dataset not found"}, status=404)

    rid, msg_id, ret_msg = _list.ml_opts(request, perm, disabled4reader)
    ret = {"id": rid, "msg_id": msg_id, "ret_msg": ret_msg}
    return Response(ret)
def get_feat_impo(request, rid, perm,disabled4reader):
    # chk access
    document =_list.get_ds_doc(rid, perm)
    if not document:
        return Response({"data not found":-1})
    
    # get data from mongo.dataset_info
    doc=query_mongo.find_one(settings.MONGO_OUT_DNS, settings.MONGO_OUT_PORT, settings.MONGO_OUT_DB, settings.MONGO_OUT_TBL
        , settings.MONGO_OUT_USR, settings.MONGO_OUT_PWD
        , '{"rid":'+rid+',"key":"feature_importance"}', '{"value":1,"_id":0}')
        
    if doc:
        arr=doc["value"]    
        return Response(arr)
    else:
        return Response({"data not found":-1})
def get_ds_info(request, rid, perm, disabled4reader):
    print "in _api.get_ds_info. rid=", rid
    if request.method == 'GET':
        #doc = Document.objects.get(id=rid)
        doc = _list.get_ds_doc(rid, perm)
        if not doc:
            return Response({"error": "data not found"}, status=404)
        local_processed_date = doc.local_processed_date()

        arr = []
        arr.append([
            doc.id, doc.filename, doc.file_type, doc.status,
            local_processed_date, doc.desc
        ])
        ret = {}
        ret["data"] = arr

        return JsonResponse(ret, safe=False)
def calculate_feature_impo(request,rid, perm,disabled4reader):
    print 'In calculate_feature_impo'
    document =_list.get_ds_doc(rid, perm)
    if not document:
        return HttpResponseRedirect(reverse('list'))
    
    filename=document.filename
    uploadtype=document.file_type
    document.status='processing feature importance'
    #document.processed_date=datetime.datetime.now() why failed?
    document.processed_date=datetime.now()
    document.save()
    
    ds_id=None
    if document.option_state == "new_training":
        # having featuring output, not depends on source dataset id.
        ds_id=document.train_id
    else:
        ds_id=rid
    
    # call feature_impo API
    ret=invoke_feature_impo(filename, rid, uploadtype,"",ds_id)
    #execute shell script here

    
    print "feat import ret=", ret
    # update status code
    document = Document.objects.get(id=rid)
    msg_id=-1
    if ret ==0 :
        document.status='importance_calculated'
        if settings.STS_1000_FEATURE_IMPO > document.status_code:
            document.status_code=settings.STS_1000_FEATURE_IMPO
        msg_id="231" 
    else:
        document.status='feature importance failed'    
        msg_id="90231" 
    #document.processed_date=datetime.datetime.now()
    document.processed_date=datetime.now()
    document.save()    
    
    print '* end Feature importance: rc=', ret, '; id=', rid

    return msg_id # _list to handle return page
def feature_impo_combs(request,rid, perm,disabled4reader):
    #print 'in feature_impo_all'

    document =_list.get_ds_doc(rid, perm)
    if not document:
        return HttpResponseRedirect(reverse('list'))

        
    is_option='N'
    if document.train_id:
        is_option='Y'
    jopts=document.ml_opts
    if jopts:
        jopts=json.loads(document.ml_opts)
        jopts["learning_algorithm"]=jopts["learning_algorithm"].title().replace("_"," ")
        
    return render(request,
        'atdml/feature_combs.html',
        {'document': document,'is_option':is_option
            ,"jopts":jopts}, 
    ) 
def set_data(request, type, rid, perm, disabled4reader):
    # check support types
    if not type in ("_es_list", "dnn_state"):
        return Response({
            "status": "failed",
            "msg": "not supported"
        },
                        status=404)

    # check doc
    document = _list.get_ds_doc(rid, perm)
    #print "document..=",document
    if document is None:
        return Response({
            "status": "failed",
            "msg": "record not found"
        },
                        status=404)

    if "ensemble" in document.file_type:
        ds_list = request.POST.get("hf_w_ds_list")
        document.ds_list = ds_list
        document.save()
        return Response({
            "status": "updated",
            "id": rid,
            "msg": "Dataset list updated for Id=" + rid
        })
    elif type in ("dnn_state"):
        dnn_state = request.POST.get("dnn_state")
        document.ml_state = dnn_state
        document.save()
        return Response({"status": "updated", "id": rid, "msg": "succeeded"})
    else:
        return Response({
            "status": "failed",
            "msg": "not an ensemble record"
        },
                        status=404)
Пример #14
0
def job_logs(request, rid, perm, disabled4reader, cid=None):
    print 'in job_logs(), rid=', rid
    document = _list.get_ds_doc(rid, perm)
    print "document..=", document
    if not document:
        return HttpResponseRedirect(reverse('list'))

    filename = document.filename

    train_id = None
    if cid is None:
        prd_id = request.POST.get("_prd_id")
    else:
        prd_id = cid
    train_id = document.train_id
    #print "prd_id=",prd_id

    #get log files
    dir_str = os.path.join(settings.LOG_FOLDER, rid + "[a-z]*.log")

    # get a list of filenames
    alllist = glob.glob(dir_str)
    pipeline = [
        'retrieve', 'feature', 'pca', 'train', 'multi_run',
        'feature_importance'
    ]

    #print file_list
    content1st = ""
    file_list = []
    prdct_lst = []
    exec_lst = []
    if len(alllist) > 0:
        # remove path and leading rid
        #file_list=[ os.path.basename(f).replace(rid,'').replace('.log','') for f in sorted(alllist) ]
        ava_list = [
            os.path.basename(f).replace(rid, '').replace('.log', '')
            for f in alllist
        ]

        # filter file list and keep pipeline order
        for i in pipeline:
            for j in ava_list:
                if i == j:
                    file_list.append(i)
        #print file_list
        content1st = getlog(rid, file_list[0], 0, perm, None)

    #add predict
    print "document.file_type=", document.file_type
    prdct_doc_lst = Document.objects.all().filter(
        file_type__contains="predict", train_id=rid).order_by('-id')[0:200]

    for i in prdct_doc_lst:
        print i.id, i.filename
    print "prd_id=", prd_id

    if len(prdct_doc_lst) > 0:
        #prdct_lst=[ (str(d.id), d.filename) for d in sorted(prdct_doc_lst,reverse=True) ]
        prdct_lst = [(str(d.id), d.filename) for d in prdct_doc_lst]
        prdct_lst = sorted(prdct_lst, reverse=True)
        # trick to set latest predict_id for negative predict id
        if prd_id and prd_id.startswith('-') and prd_id[1:].isdigit():
            plist = [i[0] for i in prdct_lst]
            prd_id = plist[0]
            #print "plist=",plist
            #print "prd_id2=",prd_id
        file_list.append("predict")
        #print "prdct_lst=",prdct_lst
        #for ensemble
        if len(content1st) == 0:
            content1st = getlog(prdct_lst[0][0], file_list[0], 0, perm, None)

    # find execution log ============== =============
    exec_doc_lst = Document.objects.all().filter(
        file_type__contains="predict", train_id=rid,
        desc="has_exe_log").order_by('-id')[0:200]
    if len(exec_doc_lst) > 0:
        exec_lst = [(str(d.id), d.filename) for d in exec_doc_lst]
        exec_lst = sorted(exec_lst, reverse=True)
        file_list.append("execution log")

    jopts = document.ml_opts
    if jopts:
        jopts = json.loads(document.ml_opts)
        if "learning_algorithm" in jopts:
            jopts["learning_algorithm"] = jopts["learning_algorithm"].title(
            ).replace("_", " ")
    pca_jopts = document.ml_pca_opts
    if pca_jopts:
        pca_jopts = json.loads(document.ml_pca_opts)

    #print 'exec_lst=',exec_lst
    return render(
        request,
        'atdml/joblogs.html',
        {
            'document': document,
            'file_list': file_list,
            'content1st': content1st,
            'prdct_lst': prdct_lst,
            'exec_lst': exec_lst,
            'disabled4reader': disabled4reader,
            'perm': perm,
            'prd_id': prd_id,
            'train_id': train_id,
            'jopts': jopts,
            'pca_jopts': pca_jopts
            #, 'msg_error':msg_error, 'msg_success': msg_success
        },
    )
def emulate(request, rid, cid, msg_id, perm, disabled4reader, from_api="n"):
    print 'in emulate, cid=', cid, ", rid=", rid, ",perm=", perm
    document = None
    if not rid is None and len(rid) > 0:
        document = _list.get_ds_doc(rid, perm)

    msg_error = ""
    msg_success = ""
    msg_info = ""
    msg_warning = ""
    new_id = None

    # set message for GET
    if msg_id == "101":
        msg_success = settings.MSG_UPLOAD_SUCCESS + " Id=" + str(cid)
    elif msg_id == "90101":
        msg_error = settings.MSG_UPLOAD_FAILED
    elif msg_id == "90901":
        msg_error = settings.MSG_RECAPTCHA_FAILED
    elif msg_id and "90902" in msg_id:
        arr = msg_id.split('.')
        if len(arr) > 1:  # append count to the end
            msg_error = settings.MSG_UPLOAD_OVER_MAX + " " + arr[1]
        else:
            msg_error = settings.MSG_UPLOAD_OVER_MAX
    exe_type = None
    recaptcha = settings.RECAPTCHA_PREDICT
    if recaptcha is None:
        recaptcha = "N"

    # Handle file upload
    if request.method == 'POST':
        form = DocumentForm(request.POST, request.FILES)
        if form.is_valid():
            desc = request.POST.get('_desc')
            emulater_config = request.POST.get('_emulater_config')
            train_id = request.POST.get('hf_train_id')
            print "desc=", desc, ",train_id=", train_id, ",perm=", perm
            # assume "<id> <type> <other info>"; append to desc for ref
            if " " in train_id:
                tarr = train_id.split(" ")
                train_id = tarr[0]
                exe_type = tarr[1].lower()
                desc = desc + ", by " + train_id + " " + exe_type

            newdoc = Document(docfile=request.FILES['docfile'])
            newdoc.filename = request.FILES[
                'docfile']  #hardcode to remove "upload/"
            newdoc.submitted_by = request.user.username
            newdoc.acl_list = perm
            newdoc.file_type = "emulate"  # for AE page only
            newdoc.desc = desc  # user input + ds info

            if not train_id is None and train_id > "":
                newdoc.status = "apk_queued"  # flag "apk_queued" for prediction job
                newdoc.train_id = train_id  # bind to a ML model for prediction
                #newdoc.file_type="predict" # predict page only
            else:
                newdoc.status = "submitted"  # "submitted" for APK emulator without prediction
                newdoc.train_id = -1  # flag to not be a dataset
            #newdoc.desc="has_exe_log" # flag for execution log
            if not rid is None:
                newdoc.train_id = rid
                # TBD for rid assigned
            newdoc.save()
            new_id = str(newdoc.id)

            realname = os.path.basename(newdoc.docfile.name)
            #dir_indx=realname.index(settings.UPLOAD_DIR)
            print "realname=", realname
            print "UPLOAD_DIR=", settings.UPLOAD_DIR

            print "before Save ========="
            # filename may be different if filename duplicated
            if realname != newdoc.filename:
                newdoc.filename = realname
                newdoc.save()
            print "After Save =========="

            # with prediction, invoke _predict  ============
            if not train_id is None and train_id > "":
                mdoc = _list.get_shared_doc(train_id, perm)
                print "mdoc=", mdoc
                action_type = 'upload_predict'
                if exe_type is None:
                    exe_type = "apk-dynamic"
                ml_feat_threshold = None
                # invoke predict script
                (ret, msg_id, msg)=  _predict.invoke_pred_script_by_docs( \
                    mdoc, newdoc, action_type, ml_feat_threshold \
                    , exe_type, emulater_config)
                if ret == 0 or ret == 205:
                    msg_success = msg
                else:
                    msg_error = msg
            else:  # emulator only ============
                (ret, msg_id, msg_success, msg_error)=invoke_apk_script(realname, cid=new_id \
                , emulator_config=emulater_config)

        else:  #  invalid form
            # for return only
            form = DocumentForm()

        if from_api == "y":
            if not new_id is None:
                newdoc = _list.get_doc(new_id, perm)
            if not newdoc is None:
                msg_id = "0"
                msg = "APK submitted."
                retj = {
                    "id": new_id,
                    "status": newdoc.status,
                    "by": newdoc.submitted_by,
                    "filename": newdoc.filename,
                    "msg_id": msg_id,
                    "msg": msg
                }
                return Response(retj)
            else:
                return Response({"error": "submit error!"}, status=404)

        # for ae_list page =============== ===
        return render(
            request,
            'atdml/ae_list.html',
            {
                'form': form,
                'disabled4reader': disabled4reader,
                'perm': perm,
                'msg_error': msg_error,
                'msg_success': msg_success,
                'msg_info': msg_info,
                'msg_warning': msg_warning,
                'new_id': new_id  #, 'options': options
                ,
                "use_recaptcha": recaptcha
            },
            #context_instance=RequestContext(request)
        )

    elif request.method == 'GET':  # =========== =============
        print 'in _emulator.emulate() GET'
        if from_api == "y":
            doc = None
            if not cid is None:
                doc = _list.get_doc(cid, perm)
            if not doc is None:
                msg_id = "0"
                msg = ""
                retj = {
                    "id": cid,
                    "status": doc.status,
                    "by": doc.submitted_by,
                    "filename": doc.filename,
                    "msg_id": msg_id,
                    "msg": msg
                }
                return Response(retj)
            return Response({"error": "record not found"}, status=404)
        else:
            form = DocumentForm()

    else:  # not POST ========== ====
        print 'in _emulator.emulate not post'

    print "msg_error=" + msg_error, ",msg_success=" + msg_success

    # for ae_list page =============== ===
    #return render_to_response(
    return render(
        request,
        'atdml/ae_list.html',
        {
            'form': form,
            'disabled4reader': disabled4reader,
            'perm': perm,
            'msg_error': msg_error,
            'msg_success': msg_success,
            'msg_info': msg_info,
            'msg_warning': msg_warning,
            'new_id': new_id  #, 'options': options
            ,
            "use_recaptcha": recaptcha
        },
        #context_instance=RequestContext(request)
    )
def set_feature(request,rid, perm,disabled4reader):
    #print "in set_feature"
    msg_id=None
    document =_list.get_ds_doc(rid, perm)
    if not document:
        return HttpResponseRedirect(reverse('list'))
    
    filename=document.filename
    uploadtype=document.file_type

    #get files
    ck_list = None
    feat=None
    #print ck_list
    if request.method == 'POST': 
        action=request.POST.get('action')
        #print 'in post action=', action
        has_change=0
        
        if action=='vote_fid':
            ck_list =request.POST.getlist('ck_fid')
            to_verified =request.POST.getlist('to_verified')
            #print 'to_verified=',to_verified
            # add a new one
            for idx, fid in enumerate(ck_list):
                feat=None
                try: 
                    if not Feature_click.objects.all().filter(fid=fid, rid=rid):
                        feat = Feature_click(fid = fid,rid=rid, vote=1)
                        if len(to_verified)==1 and to_verified[0]=="1":
                            feat.vote=FILTER_COUNT
                    else: # increase vote count
                        feat=Feature_click.objects.get(fid=fid, rid=rid)
                        if len(to_verified)==1 and to_verified[0]=="1" and feat.vote <FILTER_COUNT:
                            feat.vote=FILTER_COUNT
                        else:    
                            feat.vote=feat.vote+1
                    feat.save()
                    has_change=1
                except : 
                    feat = None
                    #error msg?
                    
            # call feature_impo API to refresh combine list
            if has_change ==1:
                ds_id=None
                if document.option_state == "new_featuring":
                    # having featuring output, not depends on source dataset id.
                    ds_id=document.train_id
                ret=invoke_feature_impo(filename, rid, uploadtype,"comb_only",ds_id)
            
            #check ret?
                
            msg_id="232" # success msg
        # drop feature list item ===========================================
        if action=='drop_fid':
            ck_list =request.POST.getlist('vf_fid')
            #print ck_list
            # add a new one
            for idx, fid in enumerate(ck_list):
                feat=None
                try: 
                    if Feature_click.objects.all().filter(fid=fid, rid=rid):
                        feat = Feature_click.objects.get(fid=fid, rid=rid)
                        feat.vote=0 # reset to 0
                        feat.save()
                except : 
                    feat = None
                    #error msg?
                
            msg_id="233" # success msg
    else: # not POST ========== ====
        print 'invalid method'

 

    return feature_impo2(request,rid, perm,disabled4reader, msg_id) 
def mrun2(request, rid, filename, msg_id, perm, disabled4reader):
    print "in mrun2()"
    # get perm
    #uname,grp,perm,disabled4reader=get_perm(request)
    #document = Document.objects.get(id=rid)

    document = _list.get_ds_doc(rid, perm)
    if not document:
        return HttpResponseRedirect(reverse('atdml.views.list'))

    mrun_numb = ""
    msg_error = ""
    msg_success = ""

    if msg_id == "211":
        msg_success = settings.MSG_MRUN_SUCCESS
    elif msg_id == "90211":
        msg_error = settings.MSG_MRUN_DUPLICATED

    if request.method == 'POST':

        form = DocumentForm(request.POST, request.FILES)

        action = request.POST.get('action')
        mrun_numb = request.POST.get('mrun_numb')
        print "mrun=", mrun_numb
        print '*** mrun=', action, ' rid=', rid
        # upload to HDFS ======================================================

        if document and (
                action == 'mrun' or action == 'multiple_run'
        ):  # ================================= Multi RUN ========
            print 'In action = mrun. document.mrun_numb=', document.mrun_numb
            print '*** document.status_code=', document.status_code
            ret = 0
            # only call task when number are different
            if document.mrun_numb != mrun_numb and mrun_numb:
                #update db
                document.status = 'processing'
                #document.processed_date=datetime.datetime.now()
                document.mrun_numb = mrun_numb
                document.save()
                #execute shell script here
                uploadtype = document.file_type
                ml_lib = document.ml_lib
                opt_jstr = document.ml_opts
                #print document.get_file_list()

                #print settings.TASK_EXE,
                #print settings.TASK_SRC_DIR+"/"+filename
                #print "in _result.py: settings.SPARK_URL=",settings.SPARK_URL
                #print "in _result.py: settings.MRUN_SCRIPT=",settings.MRUN_SCRIPT

                ret = subprocess.call([
                    settings.TASK_EXE,  #bash
                    settings.MRUN_SCRIPT,  #multi_run.sh
                    #settings.SPARK_SUBMIT, # spark cmd (shared)
                    #settings.HDFS_UPLOAD_DIR+"/"+filename,  # HDFS dir for input
                    #settings.TRAIN_DES_DIR+"/"+filename,   # dest dir
                    rid,
                    filename,
                    mrun_numb,
                    #settings.SPARK_URL,  #URL for Spark
                    uploadtype,
                    ml_lib,
                    opt_jstr,
                ])
                '''
                child=subprocess.Popen([settings.TASK_EXE,
                                    settings.MRUN_SCRIPT,
                                    settings.SPARK_SUBMIT,
                                    settings.HDFS_UPLOAD_DIR+"/"+filename,
                                    settings.TRAIN_DES_DIR+"/"+filename,
                                    rid,
                                    filename,
                                    mrun_numb
                ])
                ret=child.returncode
                '''
                # refresh document
                document = Document.objects.get(id=rid)

                if ret == 0:
                    if settings.STS_800_MRUN > document.status_code:
                        document.status = 'mruned'
                        document.status_code = settings.STS_800_MRUN
                        print '*** updated document.status_code=', document.status_code
                        document.processed_date = datetime.datetime.now()
                        document.save()

                    print "after mrun subproc. ret=", ret
                    msg_id = "211"
                else:
                    msg_id = "90211"

            else:  # repeated
                print "mrun repeated"
                msg_id = "90212"

            print '* end mRun: rc=', ret, '; id=', rid, ', fname=', filename

            #return HttpResponseRedirect('/atdml/'+str(rid)+'/f/mrun/'+msg_id+'/')

        else:  # Invalid status or action
            print '*** Invalid status or action! id=', rid, ', fname=', filename

    else:  # Not POST =========
        form = DocumentForm()  # A empty, unbound form

    # Load documents for the list page
    document = Document.objects.get(id=rid)

    predictions = Document.objects.all().filter(
        file_type="predict", train_id=rid).order_by('-id')[0:10]
    # get train option id
    train_id = document.train_id
    # get sample file list
    sflist = _predict.get_sfile_list(document.filename, document.id,
                                     document.file_type, train_id)
    # how to get dir?
    cv_grid_data, param_str, jopts = get_cv_grid(document, rid)

    if request.is_ajax():
        print "Ajax Mrun"
        #sdoc = serializers.serializer('json', [document])
        #print "sdoc="+sdoc
        document = Document.objects.get(id=rid)
        ret_msg = ""

        if msg_id == "211":
            ret_msg = settings.MSG_MRUN_SUCCESS
            ret_data = {
                "status": document.status,
                "id": rid,
                "pdate": document.local_processed_date(),
                "by": document.submitted_by,
                "vari": document.variance_short(),
                "mean": document.mean_short(),
                "msg": ret_msg + " Id=" + rid,
                "src": mrun_numb
            }
            return HttpResponse(json.dumps(ret_data),
                                content_type="application/json")
        elif msg_id == "90211":  # failed
            ret_msg = settings.MSG_MRUN_FAILED
            ret_data = {"msg": ret_msg + " Id=" + rid}
            print json.dumps(ret_data)
            return HttpResponse(json.dumps(ret_data),
                                content_type="application/json",
                                status=400)
        elif msg_id == "90212":  # duplicated
            ret_msg = settings.MSG_MRUN_DUPLICATED
            ret_data = {"msg": ret_msg + " Id=" + rid}
            print json.dumps(ret_data)
            return HttpResponse(json.dumps(ret_data),
                                content_type="application/json",
                                status=400)

        #time.sleep(2)
    has_roc = has_result_file(rid, str(rid) + "_roc.json")
    has_mrun = has_result_file(rid, str(rid) + "_mrun.json")
    has_score = has_result_file(rid, str(rid) + "_score_graph.json")
    print "has_roc=", has_roc, ", has_mrun=", has_mrun, ", has_score=", has_score

    return render(
        request,
        'atdml/result.html',
        {
            'document': document,
            'form': form,
            'predictions': predictions,
            'disabled4reader': disabled4reader,
            'perm': perm,
            'msg_error': msg_error,
            'msg_success': msg_success,
            'sflist': sflist,
            "cv_grid_data": cv_grid_data,
            "param_str": param_str,
            "jopts": jopts,
            "has_roc": has_roc,
            "has_mrun": has_mrun,
            "has_score": has_score
        },
        #context_instance=RequestContext(request)
    )
def feature_impo_all(request,rid, perm,disabled4reader):
    #print 'in feature_impo_all'

    document =_list.get_ds_doc(rid, perm)
    if not document:
        return HttpResponseRedirect(reverse('list'))

    filename=document.filename

    #get files
    out_FIRM=settings.RESULT_DIR_FULL+"/"+str(rid)+"/"+str(rid)+"_score_FIRM.txt"
    out_PROB=settings.RESULT_DIR_FULL+"/"+str(rid)+"/"+str(rid)+"_score_PROB.txt"
    out_IT=settings.RESULT_DIR_FULL+"/"+str(rid)+"/"+str(rid)+"_score_IT.txt"

    flist1,flist2,flist3 = get_feat_importance(rid,out_FIRM,out_PROB, out_IT);
    outlist1=[]
    outlist2=[]
    outlist3=[]
    # replace \t with html tag; TBD to use bootstrap table...
    for line in flist1[:LIST_COUNT]:
        out=""
        for idx, item in enumerate(line.split('\t')):
            item=item.replace("<","&lt;").replace(">","&gt;")
            print idx, item
            if idx==2:
                out += '<td data-placement="bottom" data-toggle="tooltip" ' \
                    +' title="'+item+'">'+show_partial(item)+'</td>'
            elif idx==0:
                out += '<td class="'+item+'">'+item+'</td>'
            else:
                out += '<td>'+item+'</td>'
        print "out=",out
        outlist1.append(out)
    for line in flist2[:LIST_COUNT]:
        out=""
        for idx, item in enumerate(line.split('\t')):
            item=item.replace("<","&lt;").replace(">","&gt;")
            print idx, item
            if idx==2:
                out += '<td data-placement="bottom" data-toggle="tooltip" ' \
                    +' title="'+item+'">'+show_partial(item)+'</td>'
            elif idx==0:
                out += '<td class="'+item+'">'+item+'</td>'
            else:
                out += '<td>'+item+'</td>'
        print "out=",out
        outlist2.append(out)        
    for line in flist3[:LIST_COUNT]:
        out=""
        for idx, item in enumerate(line.split('\t')):
            item=item.replace("<","&lt;").replace(">","&gt;")
            print idx, item
            if idx==2 and len(item)>MAX_DISPLAY_LEN:
                out += '<td data-placement="bottom" data-toggle="tooltip" ' \
                    +' title="'+item+'">'+show_partial(item)+'</td>'
            elif idx==0:
                out += '<td class="'+item+'">'+item+'</td>'
            else:
                out += '<td>'+item+'</td>'
        print "out=",out
        outlist3.append(out)        #line = line.replace('\t','</td><td>')
        
    is_option='N'
    if document.train_id:
        is_option='Y'
    jopts=document.ml_opts
    if jopts:
        jopts=json.loads(document.ml_opts)
        jopts["learning_algorithm"]=jopts["learning_algorithm"].title().replace("_"," ")
        
    return render(request,
        'atdml/feature_all.html',
        {'document': document,  'flist1':outlist1, 'flist2':outlist2, 'flist3':outlist3,'is_option':is_option
            ,"jopts":jopts}, 
    ) 
def feature_impo2(request,rid, perm,disabled4reader,msg_id):
    print "in feature_impo2(), rid=",rid
    msg_success=""
    msg_error=""
    #msg_id for message after POST and avoid re-POST
    if msg_id=="231":
        msg_success=settings.MSG_FEATURE_IMPO_SUCCESS+" Id="+str(rid)
    elif msg_id=="232":
        msg_success=settings.MSG_FEATURE_SET_SUCCESS+" Id="+str(rid)
    elif msg_id=="233":
        msg_success=settings.MSG_FEATURE_DROP_SUCCESS+" Id="+str(rid)
    elif msg_id>="90000":
        msg_error=settings.MSG_FEATURE_IMPO_FAILED+" Id="+str(rid)
        
    document =_list.get_ds_doc(rid, perm)
    if not document:
        return HttpResponseRedirect(reverse('list'))
    
    filename=None    
    if document:
        filename=document.filename
    
    if not filename:
        msg_error="Dataset not found! id="+rid
        msg_id="90000"
        print "msg_error=",msg_error

    #get verified features from db
    try: 
        vflist = Feature_click.objects.all().filter(rid=rid,vote__gte=FILTER_COUNT).order_by('-vote')[:LIST_COUNT]
    except : 
        vflist = []
    

    #get combined features from file
    out_COMB=settings.RESULT_DIR_FULL+"/"+str(rid)+"/"+str(rid)+"_score_combine.txt"

    lines =[]
    outlist1 =[]
    items=[]
    no_feature="N"
    try:
        with open(out_COMB,'r') as f:
            lines=f.read().splitlines()
    except:
        no_feature="Y"
        pass
    
    if len(lines)>=LIST_COUNT:
        lines=lines[:LIST_COUNT]

    # replace \t with html tag
    for idx, line in enumerate(lines):
        #line = line.replace('\t','</td><td>')
        items = line.split('\t')
        # chk box, fid, score, desc, id
        line='<td ><input type="checkbox" class="checkbox" name="ck_fid" value="'+items[0]+'"></input></td>' \
                +'<td>'+items[0]+'</td><td>'+items[1]+'</td><td data-placement="bottom" data-toggle="tooltip" ' \
				+' title="'
        # escape <  and > for html
        items[2]=items[2].replace("<","&lt;").replace(">","&gt;")
        if len(items[2])>MAX_DISPLAY_LEN:
            line+=items[2]+'">'+show_partial(items[2])
        else:
            line+='">'+items[2]           
        line+='</td><td>'+str(idx+1)+'</td>'
        outlist1.append(line)
    jopts=document.ml_opts
    if jopts:
        jopts=json.loads(document.ml_opts)
        jopts["learning_algorithm"]=jopts["learning_algorithm"].title().replace("_"," ")

    return render(request,
        'atdml/feature.html',
        {'document': document, 'vflist':vflist, 'flist1':outlist1, 'msg_success': msg_success, 'msg_error': msg_error
            , 'no_feature': no_feature, 'jopts':jopts }, 
        #context_instance=RequestContext(request)
    ) 
def result2(request, rid, oid, perm, disabled4reader):
    print 'in result2, rid=', rid, ', oid=', oid
    o_rid = rid
    # get train option doc, if oid provided
    if oid > 0:
        rid = oid
    document = _list.get_ds_doc(rid, perm)
    if not document:
        return HttpResponseRedirect(reverse('atdml.views.list'))

    # for return only
    #form=DocumentForm()
    predictions = [
    ]  #Document.objects.all().filter(file_type="predict", train_id=rid).order_by('-id')[0:10]
    # get train option id
    train_id = document.train_id
    ml_lib = document.ml_lib
    status = document.status
    # get sample file list
    sflist = _predict.get_sfile_list(document.filename, document.id,
                                     document.file_type, train_id)
    # how to get dir?
    # get cross validation info
    cv_grid_data, param_str, jopts = get_cv_grid(document, rid)
    print "************** ml_has_cv=", document.ml_has_cv, cv_grid_data

    if jopts:
        print "rid=", rid, ", jopts=", jopts
    else:
        print "rid=", rid, ", jopts not found"

    has_roc = has_result_file(rid, str(rid) + "_roc.json")
    has_mrun = has_result_file(rid, str(rid) + "_mrun.json")
    has_score = has_result_file(rid, str(rid) + "_score_graph.json")
    print "has_roc=", has_roc, ", has_mrun=", has_mrun, ", has_score=", has_score
    has_result = None

    # check algorithm
    train_opt = {}
    if not document.ml_opts is None and len(document.ml_opts) > 0:
        train_opt = json.loads(document.ml_opts)

    #
    if document.status_code >= 500:
        # check if clustering data is in
        if has_result_file(rid,
                           str(rid) + "_cluster*.png"
                           ) and train_opt["learning_algorithm"] in ('kmeans'):
            has_result = "U"
        else:
            # check if png for classification exists?
            has_result = "Y"
    elif ml_lib == "dnn":  # allow DNN to view status
        has_result = "Y"

    has_featc = has_result_file(rid, str(rid) + "_feat_coef.json")
    has_fp = has_result_file(rid, str(rid) + "_false_pred.json")

    # get ml_opts
    feature_excluded_list = None
    if "has_excluded_feat" in train_opt and train_opt["has_excluded_feat"] == 1:
        # get data from mongo.dataset_info
        try:
            doc = query_mongo.find_one(
                settings.MONGO_OUT_DNS, settings.MONGO_OUT_PORT,
                settings.MONGO_OUT_DB, settings.MONGO_OUT_TBL,
                settings.MONGO_OUT_USR, settings.MONGO_OUT_PWD,
                '{"rid":' + rid + ',"key":"feature_excluded"}', '{"value":1}')
            if not doc is None:
                #print "doc type=", type(doc), ",doc=",doc
                feature_excluded_list = doc["value"]
                print "feature_excluded_list=", feature_excluded_list
        except Exception as e:
            print "Exception from MongoDB:", e

    rpage = 'atdml/result.html'
    if oid > 0:
        rpage = 'atdml/result_opts.html'
    feat_str = ""
    if not feature_excluded_list is None:
        feat_str = ','.join(str(i) for i in feature_excluded_list)
    print "has_roc=", has_roc, ", has_mrun=", has_mrun, ", has_result=", has_result, "rpage=", rpage

    # get perf and dataset info
    if document.perf_measures and document.perf_measures != "null":
        perf_measures = json.loads(document.perf_measures)
    else:
        perf_measures = {}
    if document.dataset_info and document.dataset_info != "null":
        dataset_info = json.loads(document.dataset_info)
    else:
        dataset_info = {}
    return render(
        request,
        #'atdml/result.html',
        rpage,
        {
            "document":
            document,
            "predictions":
            predictions,
            "sflist":
            sflist  #, "form": form
            ,
            "disabled4reader":
            disabled4reader,
            "perm":
            perm,
            "cv_grid_data":
            cv_grid_data,
            "param_str":
            param_str,
            "has_fp":
            has_fp,
            "jopts":
            jopts,
            "has_roc":
            has_roc,
            "has_mrun":
            has_mrun,
            "has_result":
            has_result,
            "has_featc":
            has_featc,
            "has_score":
            has_score,
            "feature_excluded":
            feat_str,
            "ml_lib":
            ml_lib,
            "status":
            status,
            "tp":
            perf_measures["tp"] if "tp" in perf_measures else "",
            "tn":
            perf_measures["tn"] if "tn" in perf_measures else "",
            "fp":
            perf_measures["fp"] if "fp" in perf_measures else "",
            "fn":
            perf_measures["fn"] if "fn" in perf_measures else "",
            "phi":
            '%0.5f' % perf_measures["phi"] if "phi" in perf_measures else "",
            "fscore":
            '%0.5f' %
            perf_measures["fscore"] if "fscore" in perf_measures else "",
            "roc_auc":
            '%0.5f' %
            perf_measures["roc_auc"] if "roc_auc" in perf_measures else "",
            "class_count":
            dataset_info["class_count"]
            if "class_count" in dataset_info else "",
            "training_fraction":
            dataset_info["training_fraction"]
            if "training_fraction" in dataset_info else "",
            "dataset_count":
            dataset_info["dataset_count"]
            if "dataset_count" in dataset_info else "",
            "MEDIA_URL":
            settings.MEDIA_URL
        },
    )
def get_post_predict(request, rid, hash, perm, disabled4reader):
    print "in get_post_predict, hash=", hash, " user="******"not found!"
        #ret={"error":"data model not found!"}
        return Response({"error": "dataset not found"}, status=404)

    if hash:
        hash = hash.lower()

    if request.method == 'GET':
        print "In GET: rid=", rid, ",hash=", hash
        # by prediction
        if hash.isdigit():
            doc = Document.objects.all().filter(file_type="predict",
                                                train_id=rid,
                                                id=hash)
        else:
            doc = Document.objects.all().filter(file_type="predict",
                                                train_id=rid,
                                                filename=hash)
        print "doc=", doc
        # get by md5/filename
        if len(doc) > 0:
            #slz = PredictSerializer(doc, many=True)
            #return Response(slz.data)
            return Response(ml_serializers.pred2json(doc))
        return Response({"error": "prediction not found"}, status=404)

    action_type = 'hash_predict'

    if request.method == 'POST':
        print "in POST, list=", request.POST.get('list')
        verbose = request.POST.get('verbose')
        verbose = "0" if verbose is None else verbose
        print "verbose=", verbose

        host = request.POST.get('host')
        host = "" if host is None else host
        port = request.POST.get('port')
        port = "" if port is None else port
        db = request.POST.get('db')
        db = "" if db is None else db
        tbl = request.POST.get('tbl')
        tbl = "" if tbl is None else tbl
        usr = request.POST.get('usr')
        usr = "" if usr is None else usr
        pwd = request.POST.get('pwd')
        pwd = "" if pwd is None else pwd
        model_filename = request.POST.get('model_filename')
        model_filename = "" if model_filename is None else model_filename
        keep_flag = request.POST.get('keep_flag')
        keep_flag = "0" if keep_flag is None else keep_flag

        predict_list = []
        pred_doc = []

        # for offline massive prediction
        if hash == 'list_offline':
            hash_list = request.POST.get('list')
            feat_threshold = request.POST.get('feat_threshold')
            pred_doc = _predict.predict_massive(document,
                                                hash_list,
                                                host=host,
                                                port=port,
                                                db=db,
                                                tbl=tbl,
                                                usr=usr,
                                                pwd=pwd,
                                                model_filename=model_filename,
                                                keep_flag=keep_flag,
                                                feat_threshold=feat_threshold)
            return Response(pred_doc)

        # for ONE hash list
        elif 'list' in hash:
            hash_str = request.POST.get('list')
            if hash_str:
                hash_str = hash_str.lower()
                predict_list = hash_str.split(',')
                # get unique items
                predict_list = set(predict_list)

        # upload raw data for prediction
        elif 'raw' in hash:
            form = DocumentForm(request.POST, request.FILES)
            if form.is_valid():
                print "in API upload predict"
                newdoc = Document(docfile=request.FILES['docfile'])
                newdoc.filename = request.FILES['docfile']
                if document.file_type == 'ensemble':  # upload binary for ensemble predict
                    action_type = 'ensemble_predict'
                    print "for ensemble predict..."
                else:
                    action_type = 'upload_predict'
                predict_list.append(newdoc.filename)
                print "newdoc.filename=", newdoc.filename
            else:
                print "Form is invalid!"
                return Response({"Error": "invalid form"}, status=404)

        # upload binary for sandbox execution & predict
        elif hash == 'exec':
            form = DocumentForm(request.POST, request.FILES)
            if form.is_valid():
                print "in API upload for execution & wait:"
                exe_type = request.POST.get('_file_type')
                # handle by _predict; key field is "_file_type"
                if exe_type is None:
                    print "required field not found!"
                    return Response({"Error": "required field not found."},
                                    status=404)
                else:
                    return _predict.predict(request,
                                            rid,
                                            cid=None,
                                            msg_id=None,
                                            perm=perm,
                                            disabled4reader=disabled4reader)

            else:
                print "Form is wrong!!"
                return Response({"Error": "invalid form"}, status=404)
        else:
            predict_list.append(hash)

        # TBD need to check upload count here?
        for p_item in predict_list:
            # create newdoc
            if action_type == "hash_predict":
                newdoc = Document()
                newdoc.filename = p_item
            newdoc.submitted_by = request.user.username
            newdoc.acl_list = perm
            newdoc.train_id = str(rid)
            if action_type == "ensemble":
                newdoc.file_type = "ensemble_predict"
            else:
                newdoc.file_type = "predict"

            newdoc.ml_n_gram = document.ml_n_gram
            newdoc.ml_opts = document.ml_opts
            newdoc.ml_lib = document.ml_lib

            newdoc.db_host = host
            newdoc.db_db = db
            newdoc.db_port = port
            newdoc.db_tbl = tbl

            newdoc.save()
            cid = newdoc.id
            #upload_fname=p_item
            #print "before predict_hash *************** "
            ret = _predict.predict_hash(document,
                                        newdoc,
                                        p_item,
                                        tlabel="",
                                        action_type=action_type,
                                        host=host,
                                        port=port,
                                        db=db,
                                        tbl=tbl,
                                        usr=usr,
                                        pwd=pwd,
                                        verbose=verbose)
            #print 'in POST: ret=', ret
            pred_doc.append(ret)

        return Response(pred_doc)
    else:
        return Response({"error": "data not found"}, status=404)
def predict(request, rid, cid, msg_id, perm, disabled4reader):
    print 'in _predict.predict(), rid=', rid
    # get perm
    #uname,grp,perm,disabled4reader=get_perm(request)

    document = _list.get_ds_doc(rid, perm)
    if not document:
        return HttpResponseRedirect(reverse('atdml.views.list'))

    # dataset's type:
    ds_ftype = document.file_type
    msg_error = ""
    msg_success = ""
    tlabel = ""
    #print 'hello2:', request.method

    # set message for GET
    if msg_id == "201":
        msg_success = settings.MSG_PREDICT_SUCCESS + " Id=" + str(cid)
    elif msg_id == "205":
        msg_success = settings.MSG_PREDICT_APK_UPLOAD_SUCCESS + " Id=" + str(
            cid)
    elif msg_id == "90201":
        msg_error = settings.MSG_PREDICT_FAILED
    elif msg_id == "90202":
        msg_error = settings.MSG_PREDICT_DUPLICATED
    elif msg_id == "90901":
        msg_error = settings.MSG_RECAPTCHA_FAILED
    elif msg_id and "90902" in msg_id:
        arr = msg_id.split('.')
        if len(arr) > 1:  # append count to the end
            msg_error = settings.MSG_UPLOAD_OVER_MAX + " " + arr[1]
        else:
            msg_error = settings.MSG_UPLOAD_OVER_MAX

    # predict action
    action_type = request.POST.get('_action_type')
    print 'action_type=', action_type
    upload_fname = ""
    newdoc = None
    # for return only
    form = DocumentForm()

    if request.method == 'POST':  # =========== =============
        print 'in predict POST'
        dns = document.db_host
        port = document.db_port
        db = document.db_db
        tbl = document.db_tbl
        hash = ""
        usr = ""
        pwd = ""
        n_gram = document.ml_n_gram
        opt_str = document.ml_opts
        lib = document.ml_lib
        db_proj = document.db_proj if document.db_proj else ""
        pattern = document.pattern
        pca_opts = document.ml_pca_opts
        ml_feat_threshold = request.POST.get('_feat_threshold')
        if ml_feat_threshold is None or ml_feat_threshold == "":
            ml_feat_threshold = document.ml_feat_threshold
        ds_list = document.ds_list
        if pca_opts is None:
            pca_opts = ""

        if pattern is None:
            pattern = ""

        # find parent dataset id
        ds_id = document.train_id
        if ds_id is None or ds_id == "None" or document.option_state == "new_featuring":
            ds_id = str(rid)  # use self's feature list, if is a feature option
            #print "hihi"
        ds_id = str(ds_id)
        exe_type = request.POST.get('_file_type')
        if not exe_type is None:
            exe_type = exe_type.lower()
        emulater_config = ""
        from_api = None

        if "apk" in exe_type and action_type is None:
            print "_predict.predict() in apk"
            action_type = 'upload_predict'  # for upload apk for execution from API
            from_api = "y"

        # upload a file to predict
        if action_type == 'upload_predict':
            form = DocumentForm(request.POST, request.FILES)
            print "exe_type=", exe_type
            if form.is_valid():
                newdoc = Document(docfile=request.FILES['docfile'])
                newdoc.filename = request.FILES['docfile']
                emulater_config = request.POST.get('_emulater_config')
                pert_flag = None
                print "emulater_config=", emulater_config
                # flag for sandbox execution
                if "apk" in exe_type:
                    if "dynamic" in exe_type:
                        newdoc.status = "apk_queued"
                        newdoc.desc = "has_exe_log"  # flag for apk execution log
                    elif "static" in exe_type:
                        newdoc.desc = "apk static"
                    # check if static apk,
                elif "image" in exe_type:
                    newdoc.file_type = "image_predict"
                    action_type = exe_type
                    pert_flag = request.POST.get('_pert_flag')

                elif document.file_type == "ensemble":
                    # special type for ensemble
                    action_type = "ensemble"
                    newdoc.file_type = "ensemble_predict"
            else:  # form not valid ========== ====
                print 'invalid form'
                form = DocumentForm()
        elif action_type == 'hash_predict':
            hash = request.POST.get('_hash')
            if hash:
                hash = hash.lower()
            dns = request.POST.get("_dns")
            port = request.POST.get("_port")
            db = request.POST.get('_db')
            tbl = request.POST.get('_tbl')
            usr = request.POST.get('_username')
            pwd = request.POST.get('_password')
            print "_hash=", hash
            print "dns=", dns, "_db=", db
            newdoc = Document()
            newdoc.filename = hash
            upload_fname = hash
            newdoc.db_host = dns
            newdoc.db_db = db
            newdoc.db_port = port
            newdoc.db_tbl = tbl
        else:  # ajax; for sample predict
            sname = request.POST.get('filename')
            #print 'sname=',sname
            idx = sname.rindex('.')
            if idx > 0:
                tlabel = sname[idx + 1:].lower().strip()
                print 'label=' + tlabel + "<==="
            newdoc = Document(docfile=sname)
            newdoc.filename = sname.strip()
            newdoc.true_label = tlabel

        newdoc.submitted_by = request.user.username
        newdoc.acl_list = perm
        if newdoc.file_type is None:
            newdoc.file_type = "predict"  # TBD
        newdoc.ml_pca_opts = pca_opts
        newdoc.ml_feat_threshold = ml_feat_threshold

        if newdoc.docfile:
            upload_fname = newdoc.docfile.name

        #print "docfile.name=", newdoc.docfile.name
        #print "newdoc.filename=", newdoc.filename
        print "upload_fname=", upload_fname

        #print "********************"
        newdoc.train_id = rid
        newdoc.save()

        filename = document.filename  # parent filename
        fnumb = str(document.total_feature_numb)
        cid = str(newdoc.id)
        verbose = "1"  # default to generate feature list
        (ret, msg_id, msg)=invoke_pred_script(rid, ds_id, cid, tlabel, upload_fname, filename, fnumb, action_type, ds_ftype \
              , dns, port, db, tbl, usr, pwd, db_proj, hash, n_gram, opt_str, lib, pattern, verbose, pca_opts, exe_type, emulater_config \
              , ml_feat_threshold, ds_list=ds_list, pert_flag=pert_flag)

        print "msg_id=", msg_id, ", msg=" + msg

        # for API
        if from_api == "y":
            print "_predict.predict() in from_api:"

            newdoc = Document.objects.get(id=cid)
            wdoc = {
                "id": cid,
                "status": newdoc.status,
                "pdate": newdoc.local_processed_date(),
                "by": newdoc.submitted_by,
                "filename": newdoc.filename,
                "true_label": newdoc.true_label,
                "msg": msg,
                "prediction": newdoc.prediction,
                "msg_id": msg_id,
                "predict_val": newdoc.predict_val,
                "train_id": newdoc.train_id,
                "feat_list": ""
            }
            return Response([wdoc])  # keep same format as regular pred output

        if request.is_ajax():
            print "Ajax predict************"
            if msg_id == "90201" or msg_id == "90205":
                print "cid=", cid, ", msg_id=", msg_id
                #ret_msg=msg_error
                ret_data = {
                    "msg":
                    msg + " Id=" + str(cid) + ", filename=[" +
                    newdoc.filename + "]"
                }
                print "ret_data", ret_data
                return HttpResponse(json.dumps(ret_data),
                                    content_type="application/json",
                                    status=400)
            #else:
            #   ret_msg=msg_success
            #print "ret_msg="+ret_msg

            newdoc = Document.objects.get(id=cid)
            ret_data = {
                "id": cid,
                "status": newdoc.status,
                "pdate": newdoc.local_processed_date(),
                "by": newdoc.submitted_by,
                "filename": newdoc.filename,
                "true_label": newdoc.true_label,
                "msg": msg,
                "prediction": newdoc.prediction
            }

            print "json dump=" + json.dumps(ret_data)
            return HttpResponse(json.dumps(ret_data),
                                content_type="application/json")

    elif request.method == 'GET':  # =========== =============
        print 'in _predict.predict2 GET'
        param_str = document.ml_opts
        try:
            jopts = json.loads(document.ml_opts)
        except:
            jopts = {}
    else:  # not POST ========== ====
        print 'not post'

    print "echo msg_error=" + msg_error, ", msg_success=" + msg_success

    predictions = Document.objects.all().filter(
        Q(file_type__icontains="predict"), train_id=rid).order_by('-id')[0:100]
    print "pred len=", len(predictions)
    # get sample file list
    ds_id = document.train_id
    if (rid == ds_id or document.option_state == "new_featuring"):
        ds_id = rid  # use self's feature list

    sflist = get_sfile_list(document.filename, document.id, document.file_type,
                            ds_id)
    # how to get dir?
    jopts = document.ml_opts
    pca_jopts = document.ml_pca_opts
    if pca_jopts:
        pca_jopts = json.loads(document.ml_pca_opts)
    if jopts:
        jopts = json.loads(document.ml_opts)
        jopts["learning_algorithm"] = jopts["learning_algorithm"].title(
        ).replace("_", " ")

    #print "has_roc=",has_roc,", has_mrun=",has_mrun
    recaptcha = settings.RECAPTCHA_PREDICT
    if recaptcha is None:
        recaptcha = "N"

    return render(
        request,
        'atdml/predict.html',
        {
            'document': document,
            'form': form,
            'predictions': predictions,
            'disabled4reader': disabled4reader,
            'perm': perm,
            'msg_error': msg_error,
            'msg_success': msg_success,
            'sflist': sflist,
            "jopts": jopts,
            "pca_jopts": pca_jopts,
            "MEDIA_URL": settings.MEDIA_URL,
            "use_recaptcha": recaptcha
        },
    )
def exclude_feature(request,rid, perm,disabled4reader):
    print "in exclude_feature"
    msg_id=None
    document =_list.get_ds_doc(rid, perm)
    if not document:
        return HttpResponseRedirect(reverse('list'))
        
    # find parent dataset id    
    train_id=document.train_id
    if train_id is None:
        train_id=rid
    
    excl_feat=None
    if request.method == 'POST': 
        excl_feat=request.POST.get('hf_w_excl_feat')
         
    print "excl_feat=",excl_feat

    json2save={}
    fid_dict={}
    ml_opts=json.loads(document.ml_opts)
    has_excl_key=0
    
    # check if key exists
    if "has_excluded_feat" in ml_opts:
        has_excl_key=1
    
    print "ml_opts=",ml_opts," type=", type(ml_opts),", excl_feat=",excl_feat
    fid_arr=[]
    # build dict for excluded feature
    if not excl_feat is None and len(excl_feat)>0:
        fid_arr=excl_feat.split(',')

        # update ml_opts
        ml_opts["has_excluded_feat"]=1
        has_excl_key=1
    else: # excl feat was removed
        if has_excl_key==1:
            ml_opts["has_excluded_feat"]=0

    # only update if excl key exists
    if has_excl_key==1:
        # update ml_opts
        document.ml_opts=json.dumps(ml_opts)
        #print  "ml_opts str=",json.dumps(ml_opts)
        document.save()

        # save exclude list to mongo        
        json2save["rid"]=eval(rid)
        json2save["key"]="feature_excluded"
        json2save["value"]=fid_arr

        feat_excl=json.dumps(json2save)
        #print "feature_excluded=",feat_excl
        filter='{"rid":'+rid+',"key":"feature_excluded"}'
        upsert_flag=True
        #print "filter=",filter,",feat_excl=",feat_excl
        ## write to mongoDB.myml.dataset_info, ignore doc with duplicated key
        ret=query_mongo.upsert_doc(settings.MONGO_OUT_DNS, settings.MONGO_OUT_PORT, settings.MONGO_OUT_DB, settings.MONGO_OUT_TBL
            , settings.MONGO_OUT_USR, settings.MONGO_OUT_PWD
                ,filter,feat_excl,upsert_flag)
        print "Upsert count for feat_excl: ret=",ret    
    
    return HttpResponseRedirect(reverse('result_opts',args=[train_id,rid]))