示例#1
0
def _predictSingleclassBaggingModelMatrix(CPIDs,RuntimeList,vtname,pred_X,varnames,selectruntimesvarnames,params,matshape,baggingweights,\
                                          single_thres,bool_gpu,n_gpus,n_parallels,bool_save,savedirbase):
    print("Predicting Singleclass Bagging Ensemble Models...")
    params_parallel = copy.deepcopy(params)
    process_pid = os.getpid()
    if len(CPIDs) < n_parallels:
        CPIDs.append(process_pid)
    process_pid_index = CPIDs.index(process_pid)
    print("Worker #%d: PID = %d" % (process_pid_index, process_pid))
    if bool_gpu:
        params_parallel['gpu_id'] = process_pid_index % n_gpus
    #Execute tasks
    pred_pY_ense = np.zeros(matshape[0] * matshape[1], dtype=np.float32)
    for runtime in RuntimeList:
        print("Predicting Singleclass Model...    runtime = %d" % runtime)
        savedir = savedirbase + os.sep + "runtime_" + str(runtime)
        modelName = vtname + '_xgboost_singleclass_run' + str(
            runtime) + ".model"
        modeldir = savedir + os.sep + modelName
        model = xgbf.loadModel(modeldir, params_parallel)
        selruntimevarstr = selectruntimesvarnames[runtime]
        selruntimevaridx = _findListSubsetIndexes(selruntimevarstr, varnames)
        pred_X_runtime = pred_X[:, selruntimevaridx]
        [pred_Y, pred_pY] = xgbf.Predict(model,
                                         pred_X_runtime,
                                         bool_binary=1,
                                         threshold=single_thres)
        pred_pY_ense = pred_pY_ense + baggingweights[runtime] * pred_pY
        print("Model: %d Calculating Finished!\n" % (runtime))
    return pred_pY_ense
示例#2
0
def testMulticlassSoftmaxModel(ModelList,TestDataSet,VegeTypes,varnames,params,runtime=-1,bool_pandas=True,\
                               bool_strclass=False,labelHeaderName="",bool_save=False,savedir=""):
    num_class = len(VegeTypes)
    if not len(ModelList):
        if runtime == -1:
            modelName = 'softmax_multiclass.model'
        else:
            modelName = 'softmax_multiclass_run' + str(runtime) + '.model'
        modeldir = savedir + os.sep + modelName
        model = xgbf.loadModel(modeldir, params)
    else:
        model = ModelList[0]

    if bool_pandas:
        [test_Y,test_X]=xgbf.trainingDataSet(TestDataSet,VegeTypes,varnames,\
                                        bool_strclass=bool_strclass,labelHeaderName=labelHeaderName)
    else:
        [test_Y, test_X] = TestDataSet
    if len(test_X.shape) == 1:
        t = np.zeros([1, len(varnames)])
        t[0, :] = test_X
        test_X = t
        t = np.zeros([1, num_class])
        t[0, :] = test_Y
        test_Y = t
    if not bool_strclass and len(test_Y.shape) > 1:
        test_Y = init.mergeCategories(test_Y)
    pred_pY = xgbf.Predict(model, test_X, bool_binary=False)
    pred_Y = np.argmax(pred_pY, axis=1)
    return [pred_Y, pred_pY, test_Y]
示例#3
0
def predictMulticlassCategoryModelCvted(ModelList,
                                        pred_X,
                                        params,
                                        runtime=-1,
                                        bool_retlabel=False,
                                        num_instance=-1,
                                        num_class=-1,
                                        bool_save=False,
                                        savedir=""):
    if bool_save:
        if runtime == -1:
            modelName = 'category_multiclass.model'
        else:
            modelName = 'category_multiclass_run' + str(runtime) + '.model'
        modeldir = savedir + os.sep + modelName
        model = xgbf.loadModel(modeldir, params)
    else:
        model = ModelList[0]
    pred_pY = xgbf.Predict(model, pred_X, bool_binary=False)
    if bool_retlabel:
        pred_pY_reshape = np.zeros([num_instance, num_class])
        for i in range(num_instance):
            pred_pY_reshape[i, :] = pred_pY[i * num_class:(i + 1) * num_class]
        pred_pY = pred_pY_reshape
        pred_Y = np.argmax(pred_pY, axis=1)
        return pred_Y
    else:
        return pred_pY
示例#4
0
def predictMulticlassCategoryModelMatrix(ModelList,
                                         MatX,
                                         num_class,
                                         params,
                                         bool_save=False,
                                         savedir=""):
    matshape = MatX.shape
    pred_X = np.zeros([matshape[0] * matshape[1], matshape[2]],
                      dtype=np.float32)
    for i in range(matshape[2]):
        pred_X[:, i] = MatX[:, :, i].flatten()
    if bool_save:
        modelName = 'category_multiclass.model'
        modeldir = savedir + os.sep + modelName
        model = xgbf.loadModel(modeldir, params)
    else:
        model = ModelList[0]
    pred_X = init.formatMulticlassCategoryInput([], pred_X, num_class, 0)
    print("Predicting......")
    pred_pY = xgbf.Predict(model, pred_X, bool_binary=False)
    pred_pY_reshape = np.zeros([matshape[0] * matshape[1], num_class])
    for i in range(matshape[0] * matshape[1]):
        pred_pY_reshape[i, :] = pred_pY[i * num_class:(i + 1) * num_class]
    pred_Y = np.argmax(pred_pY_reshape, axis=1)
    pred_Y = pred_Y.reshape(matshape[0], matshape[1])
    prob_Y = np.zeros([matshape[0], matshape[1], num_class], dtype=np.float32)
    for i in range(pred_pY_reshape.shape[1]):
        prob_Y[:, :, i] = pred_pY_reshape[:,
                                          i].reshape(matshape[0], matshape[1])
    pred_pY = prob_Y
    return [pred_Y, pred_pY]
示例#5
0
def predictMulticlassSoftmaxModelCvted(ModelList,
                                       pred_X,
                                       params,
                                       runtime=-1,
                                       bool_save=False,
                                       savedir=""):
    if bool_save:
        if runtime == -1:
            modelName = 'softmax_multiclass.model'
        else:
            modelName = 'softmax_multiclass_run' + str(runtime) + '.model'
        modeldir = savedir + os.sep + modelName
        model = xgbf.loadModel(modeldir, params)
    else:
        model = ModelList[0]
    pred_pY = xgbf.Predict(model, pred_X, bool_binary=False)
    return pred_pY
示例#6
0
def testSingleclassBaggingModel(Models,TestDataSet,vtname,params,single_thres=0.5,runtimes=300,\
                                bool_strclass=False,labelHeaderName="",bool_save=False,savedirbase=""):
    ModelList = []
    if bool_save:
        evalweightsFileName = vtname + "_Runtime_Evaluation_Weight.csv"
        selectvarnamesfiledir = savedirbase + os.sep + vtname + "_Runtime_Model_Select_Variables.csv"
        evalweightsFiledirto = savedirbase + os.sep + evalweightsFileName
        ense_weights = init.getListFromPandas(evalweightsFiledirto, 'weight')
        selrunvarspdData = init.readCSVasPandas(selectvarnamesfiledir)
        selectruntimesvarnames = []
        for runtime in range(runtimes):
            selectruntimesvarnames.append(
                init.getListFrompdDataSet(selrunvarspdData,
                                          "SelectVarName_run" + str(runtime)))
        del selrunvarspdData
    else:
        [ModelList, selectruntimesvarnames, ense_weights] = Models

    pred_pY_ense = np.zeros(len(TestDataSet))
    for runtime in range(runtimes):
        print("Predicting runtime = %d" % runtime)
        if bool_save:
            savedir = savedirbase + os.sep + "runtime_" + str(runtime)
            modelName = vtname + '_xgboost_singleclass_run' + str(
                runtime) + ".model"
            modeldir = savedir + os.sep + modelName
            model = xgbf.loadModel(modeldir, params)
        else:
            model = ModelList[runtime]
        varnames = selectruntimesvarnames[runtime]
        [test_Y,test_X]=xgbf.trainingDataSet(TestDataSet,[vtname],varnames,\
                                bool_strclass=bool_strclass,labelHeaderName=labelHeaderName,bool_binary=True)
        [pred_Y, pred_pY] = xgbf.Predict(model,
                                         test_X,
                                         bool_binary=1,
                                         threshold=single_thres)
        pred_pY_ense = pred_pY_ense + pred_pY * ense_weights[runtime]
    pred_Y_ense = (pred_pY_ense >= single_thres) * 1
    pred_Y = pred_Y_ense
    pred_pY = pred_pY_ense
    if len(test_Y.shape) > 1:
        test_Y = test_Y[:, 0]
    return [pred_Y, pred_pY, test_Y]
示例#7
0
def testMulticlassCategoryModel(ModelList,TestDataSet,VegeTypes,varnames,params,runtime=-1,bool_pandas=True,\
                                bool_strclass=False,labelHeaderName="",bool_save=False,savedir=""):
    num_class = len(VegeTypes)
    if not len(ModelList):
        if runtime == -1:
            modelName = 'category_multiclass.model'
        else:
            modelName = 'category_multiclass_run' + str(runtime) + '.model'
        modeldir = savedir + os.sep + modelName
        model = xgbf.loadModel(modeldir, params)
    else:
        model = ModelList[0]
    if bool_pandas:
        [test_Y,test_X]=xgbf.trainingDataSet(TestDataSet,VegeTypes,varnames,\
                                        bool_strclass=bool_strclass,labelHeaderName=labelHeaderName)
    else:
        [test_Y, test_X] = TestDataSet
    xshape = test_X.shape
    flag = len(xshape)
    if flag == 1:
        t = np.zeros([1, len(varnames)])
        t[0, :] = test_X
        test_X = t
        t = np.zeros([1, num_class])
        t[0, :] = test_Y
        test_Y = t
    if not bool_strclass and len(test_Y.shape) > 1:
        test_Y = init.mergeCategories(test_Y)
    num_instance = test_X.shape[0]
    test_X = init.formatMulticlassCategoryInput([], test_X, num_class, 0)
    pred_pY = xgbf.Predict(model, test_X, bool_binary=False)
    if flag == 1:
        t = np.zeros([1, num_class])
        t[0, :] = pred_pY
        pred_pY = t
    else:
        pred_pY_reshape = np.zeros([num_instance, num_class])
        for i in range(num_instance):
            pred_pY_reshape[i, :] = pred_pY[i * num_class:(i + 1) * num_class]
        pred_pY = pred_pY_reshape
    pred_Y = np.argmax(pred_pY, axis=1)
    return [pred_Y, pred_pY, test_Y]
示例#8
0
def predictMulticlassSoftmaxModelMatrix(ModelList,
                                        MatX,
                                        params,
                                        bool_save=False,
                                        savedir=""):
    matshape = MatX.shape
    if bool_save:
        modelName = 'softmax_multiclass.model'
        modeldir = savedir + os.sep + modelName
        model = xgbf.loadModel(modeldir, params)
    else:
        model = ModelList[0]
    pred_pY = xgbf.predictMultiMatrix(model, MatX, bool_binary=False)
    pred_Y = np.argmax(pred_pY, axis=1)
    pred_Y = pred_Y.reshape(matshape[0], matshape[1])
    prob_Y = np.zeros([matshape[0], matshape[1], pred_pY.shape[1]],
                      dtype=np.float32)
    for i in range(pred_pY.shape[1]):
        prob_Y[:, :, i] = pred_pY[:, i].reshape(matshape[0], matshape[1])
    pred_pY = prob_Y
    return [pred_Y, pred_pY]
示例#9
0
def _testSingleclassBaggingModel(CPIDs,RuntimeList,TestDataSet,vtname,runtime,params,ModelList,bool_gpu,n_gpus,n_parallels,\
                                 selectruntimesvarnames,baggingweights,single_thres,bool_strclass,labelHeaderName,\
                                 bool_save,savedirbase):
    print("Predicting Singleclass Bagging Ensemble Models...")
    params_parallel = copy.deepcopy(params)
    process_pid = os.getpid()
    if len(CPIDs) < n_parallels:
        CPIDs.append(process_pid)
    process_pid_index = CPIDs.index(process_pid)
    print("Worker #%d: PID = %d" % (process_pid_index, process_pid))
    if bool_gpu:
        params_parallel['gpu_id'] = process_pid_index % n_gpus

    pred_pY_ense = np.zeros(len(TestDataSet))
    for runtime in RuntimeList:
        print("Predicting runtime = %d" % runtime)
        if bool_save:
            savedir = savedirbase + os.sep + "runtime_" + str(runtime)
            modelName = vtname + '_xgboost_singleclass_run' + str(
                runtime) + ".model"
            modeldir = savedir + os.sep + modelName
            model = xgbf.loadModel(modeldir, params_parallel)
        else:
            model = ModelList[runtime]
        varnames = selectruntimesvarnames[runtime]
        [test_Y,test_X]=xgbf.trainingDataSet(TestDataSet,[vtname],varnames,\
                        bool_strclass=bool_strclass,labelHeaderName=labelHeaderName,bool_binary=True)
        [pred_Y, pred_pY] = xgbf.Predict(model,
                                         test_X,
                                         bool_binary=1,
                                         threshold=single_thres)
        pred_pY_ense = pred_pY_ense + pred_pY * baggingweights[runtime]
    pred_Y_ense = (pred_pY_ense >= single_thres) * 1
    pred_Y = pred_Y_ense
    pred_pY = pred_pY_ense
    if len(test_Y.shape) > 1:
        test_Y = test_Y[:, 0]
    return [pred_Y, pred_pY, test_Y]
示例#10
0
def predictSingleclassBaggingModelMatrix(Models,MatX,vtname,varnames,params,single_thres=0.5,runtimes=300,filter_percent=0,\
                                         bool_save=False,savedirbase=""):
    count = 0.0
    if bool_save:
        evalweightsFileName = vtname + "_Runtime_Evaluation_Weight.csv"
        selectvarnamesfiledir = savedirbase + os.sep + vtname + "_Runtime_Model_Select_Variables.csv"
        evalweightsFiledirto = savedirbase + os.sep + evalweightsFileName
        ense_weights = init.getListFromPandas(evalweightsFiledirto, 'weight')
        selrunvarspdData = init.readCSVasPandas(selectvarnamesfiledir)
        selectruntimesvarnames = []
        for runtime in range(runtimes):
            selectruntimesvarnames.append(
                init.getListFrompdDataSet(selrunvarspdData,
                                          "SelectVarName_run" + str(runtime)))
        del selrunvarspdData
    else:
        [ModelList, selectruntimesvarnames, ense_weights] = Models
    matshape = MatX.shape
    bool_mask = init.getMask(MatX)
    pred_X = np.zeros([matshape[0] * matshape[1], matshape[2]],
                      dtype=np.float32)
    for i in range(matshape[2]):
        pred_X[:, i] = MatX[:, :, i].flatten()
    pred_pY_ense = np.zeros(matshape[0] * matshape[1], dtype=np.float32)
    time_start = time.time()
    for runtime in range(runtimes):
        print("Predicting runtime = %d..." % (runtime))
        if bool_save:
            savedir = savedirbase + os.sep + "runtime_" + str(runtime)
            modelName = vtname + '_xgboost_singleclass_run' + str(
                runtime) + ".model"
            modeldir = savedir + os.sep + modelName
            model = xgbf.loadModel(modeldir, params)
        else:
            model = ModelList[runtime]
        selruntimevarstr = selectruntimesvarnames[runtime]
        selruntimevaridx = _findListSubsetIndexes(selruntimevarstr, varnames)
        pred_X_runtime = pred_X[:, selruntimevaridx]
        [pred_Y, pred_pY] = xgbf.Predict(model,
                                         pred_X_runtime,
                                         bool_binary=1,
                                         threshold=single_thres)
        pred_pY_ense = pred_pY_ense + ense_weights[runtime] * pred_pY
        time_stop = time.time()
        count = count + 1
        done = count / runtimes
        remain = (runtimes - count) / runtimes
        num_day, num_hour, num_min = _calDueTime(time_start, time_stop, done,
                                                 0.0)
        print(
            "Model: %d Calculating Finished!      Done: %.2f%%, Remaining: %.2f%%"
            % (runtime, 100 * done, 100 * remain))
        print("Calculating will finish in %d Days %d Hours %d Minutes\n" %
              (num_day, num_hour, num_min))
    pred_Y_ense = (pred_pY_ense >= single_thres) * 1
    pred_pY_ense = pred_pY_ense.reshape(matshape[0], matshape[1])
    pred_Y_ense = pred_Y_ense.reshape(matshape[0], matshape[1])
    if filter_percent > 0:
        p_max = np.max(np.max(pred_pY_ense[bool_mask]))
        pred_pY_ense[pred_pY_ense < p_max * filter_percent] = 0
    return [pred_Y_ense, pred_pY_ense]