Exemplo n.º 1
0
def build_pipline(par_th, re_path, train_x, train_y, test_classifiers,
                  deal_steps):
    #为待训练的模型作为流水线的最后一步
    classifiers = Model.getClassifiers()
    #存放参数
    model_best_parameters = {}
    line = ''
    #fil = open("E:\\workplace\\.metadata\\.me_tcat85\\webapps\\FoolAlgorithm\\AlgorithmUpload\\AlgorithmResult.txt", 'w')
    fil = open(re_path, 'w')
    for classifier in test_classifiers:
        clf = classifiers[classifier]()
        step2 = ('Model', clf)
        step = Deal_Feature(deal_steps)
        step.append(step2)
        pipeline = Pipeline(steps=step)
        #搜索最好的模型训练参数
        #path = "E:\\workplace\\.metadata\\.me_tcat85\\webapps\\FoolAlgorithm\\AlgorithmUpload\\AlgorithmParameter.txt"
        param_grid = SetParameters.getParameters(par_th, classifier)
        #print classifier, param_grid
        #print 'Start train!'
        best_parameters, grid_search = search_best_parameter(
            pipeline, param_grid, train_x, train_y)
        model_best_parameters[classifier] = best_parameters
        print grid_search.best_params_, grid_search.best_score_

        #保存最优搜索结果
        for key, value in (grid_search.best_params_).items():
            line = line + key + ',' + str(value) + '*'
        line = line + classifier + ',' + str(grid_search.best_score_) + '#'
    fil.write(line)
    fil.close()

    return model_best_parameters
Exemplo n.º 2
0
def Myclassifier():
    #读取文件的路径、设置作为预测变量的列名、降维方法、维数、作为预测变量分割数据比例、算法类型
    #par_th = "E:\\workplace\\.metadata\\.me_tcat85\\webapps\\FoolAlgorithm\\PredictUpload\\PredictParameter.txt"
    par_path1 = sys.argv[1]
    par_path2 = sys.argv[2]
    predictPath = sys.argv[3]
    par_path = par_path1 + par_path2+".txt"
    re_path = par_path1.replace('Par_', 'Re_')+par_path2 + ".csv"
    path, y_name, select_method, dims, data_ratio, test_classifiers = SetParameters.getTrainAlgorithm(par_path)
    data = DealData.read_data(path)
    train_x, test_x, train_y, test_y = DealData.get_train_test(data, y_name, data_ratio)
    print dims, select_method
    SelectKBest = Model.getFeature(dims, select_method)
    deal_steps = {'reduce_dim': [SelectKBest]}
    #构建Pipeline, 搜索最优的参数
    print test_classifiers
    classifier, best_parameters, auc = PipLineModel.build_pipline_dim(par_path, train_x, train_y, test_classifiers, deal_steps)
    classifiers = Model.getClassifiers()
    filter = None
    if select_method is not None:
        #做降维处理
        filter = Model.getFeature(dims, select_method)
    else:
        pass
    clf = classifiers[classifier](best_parameters)
    if filter != None:
        model = make_pipeline(filter, clf)
    else:
        model = clf
    model.fit(train_x, train_y)
    #做预测
    #predictPath = "E:\\workplace\\.metadata\\.me_tcat85\\webapps\\FoolAlgorithm\\PredictUpload\\public"
    x_name = [x for x in data.columns if x != y_name]
    #获取预测数据
    test = DealData.read_data(predictPath)
    test_x = test[x_name]

    #利用选用的模型进行训练和预测
    predict = model.predict(test_x)
    proba = model.predict_proba(test_x)

    #处理训练结果
    result = DataFrame(test_x)
    result['FORTARGET'] = predict
    result['PROB'] = [round(i, 4) for i in proba[:, 1]]
    result.to_csv(re_path, encoding='utf-8')
Exemplo n.º 3
0
def build_pipline_dim(par_path, train_x, train_y, test_classifiers,
                      deal_steps):
    #为待训练的模型作为流水线的最后一步
    classifiers = Model.getClassifiers()
    classifier = test_classifiers[0]
    clf = classifiers[classifier]()
    step2 = ('Model', clf)
    step = Deal_Feature(deal_steps)
    step.append(step2)
    pipeline = Pipeline(steps=step)
    #搜索最好的模型训练参数
    #path = "E:\\workplace\\.metadata\\.me_tcat85\\webapps\\FoolAlgorithm\\DimUpload\\DimParameter.txt"
    param_grid = SetParameters.getParameters(par_path, classifier)
    #print classifier, param_grid
    #print '开始训练'
    best_parameters, grid_search = search_best_parameter(
        pipeline, param_grid, train_x, train_y)
    print grid_search.best_params_, grid_search.best_score_
    return classifier, best_parameters, grid_search.best_score_
# contains-function definition for substrings:
def contains(s, other):
    return s.__contains__(other)


# Use scoipt inputs:
if __name__ == "__main__":
    # Process input:
    TheEpochStr = "_Final"

    if (len(TheSys.argv) > 1):
        TheEpoch = int(TheSys.argv[1])
        TheEpochStr = "_Epoch" + str(TheEpoch)

    # Set parameters:
    SetParameters.Initialize(False)

    # Edit the output path:
    ThisOutputPath = SetParameters.OutputPath
    if "/DNN_Training/" in ThisOutputPath:
        ThisOutputPath = ThisOutputPath.replace("/DNN_Training/", "")
    if "/DNN_Validation/" in ThisOutputPath:
        ThisOutputPath = ThisOutputPath.replace("/DNN_Validation/", "")
    if "/DNN_Experiment/" in ThisOutputPath:
        ThisOutputPath = ThisOutputPath.replace("/DNN_Experiment/", "")

    # Define training folder:
    TrainingFolder = ThisOutputPath + "/DNN_Training/"

    # Check if it exists:
    if (TheOS.path.exists(TrainingFolder)):
Exemplo n.º 5
0
from NetworkDefinition import model
from ReadTheArray import ReadArray
import SetParameters

# --------------------------------------------------------------------------

# Use script input:
if __name__ == "__main__":
    TheLabel = int(TheSys.argv[1])
    TheLabelStr = str(TheLabel)

    # NOTE: A label of 0 means that we do standard (multiplicity) No label also means this.
    # But if we have a number, it means that we do a step2-network of given multiplicity.

    # Set the parameters:
    SetParameters.Initialize(True)  # ATTENTION: NEBULA!

    # Modify based on label:
    if (TheLabel > 0):
        SetParameters.Step2_SelectedMultiplicity = TheLabel
        SetParameters.NetworkType = SetParameters.Step2_NetworkType
        SetParameters.TextFilePath = SetParameters.TextFilePath.replace(
            '/DNN_DataBase/', '/DNN_Step2_Mult' + TheLabelStr + '/')
        SetParameters.nMaxClusters = 2 * TheLabel + 1
        SetParameters.nEpochs = SetParameters.nEpochs_Step2

        if (SetParameters.Step2_NetworkType == 'ScoringPlus_OneCluster'):
            # Then, we must adapt the number of batches:
            NewNBatches = int(SetParameters.nMaxClusters *
                              SetParameters.nEventsTotal /
                              SetParameters.BatchSize)
Exemplo n.º 6
0
from NetworkDefinition import model
from ReadTheArray import ReadArray
import SetParameters

# contains-function definition for substrings:
def contains(s, other):
    return s.__contains__(other)

# Use scoipt inputs:
if __name__ == "__main__":
    # Process input:
    TheLabel = int(TheSys.argv[1])
    TheLabelStr = str(TheLabel)

    # Set parameters:
    SetParameters.Initialize(False)
    SetParameters.Step2_SelectedMultiplicity = TheLabel
    SetParameters.NetworkType = SetParameters.Step2_NetworkType
    SetParameters.TextFilePath = SetParameters.TextFilePath.replace('/DNN_DataBase/','/DNN_Step2_Mult'+TheLabelStr+'/')
    SetParameters.nMaxClusters = 2*TheLabel+1
    SetParameters.Compute_IO_Structure()
    
    # Edit the output path:
    ThisOutputPath = SetParameters.OutputPath
    if "/DNN_Training/" in ThisOutputPath:
        ThisOutputPath = ThisOutputPath.replace("/DNN_Training/","")
    if "/DNN_Validation/" in ThisOutputPath:
        ThisOutputPath = ThisOutputPath.replace("/DNN_Validation/","")
    if "/DNN_Experiment/" in ThisOutputPath:
        ThisOutputPath = ThisOutputPath.replace("/DNN_Experiment/","")
    
# import Keras and TensorFlow:
import tensorflow as tf
import keras as ks

# import own functions:
from NetworkDefinition import model
from ReadTheArray import ReadArray
import SetParameters

# Use script inputs:
if __name__ == "__main__":
    TheEpoch = int(TheSys.argv[1])
    TheEpochStr = str(TheEpoch)

    # Set parameters:
    SetParameters.Initialize(True)  # ATTENTION: NEBULA!

    # Edit the output path:
    ThisOutputPath = SetParameters.OutputPath
    if "/DNN_Training/" in ThisOutputPath:
        ThisOutputPath = ThisOutputPath.replace("/DNN_Training/", "")
    if "/DNN_Validation/" in ThisOutputPath:
        ThisOutputPath = ThisOutputPath.replace("/DNN_Validation/", "")
    if "/DNN_Experiment/" in ThisOutputPath:
        ThisOutputPath = ThisOutputPath.replace("/DNN_Experiment/", "")

    # Define training folder:
    TrainingFolder = ThisOutputPath + "/DNN_Training/"

    # Check if it exists:
    if (TheOS.path.exists(TrainingFolder)):
Exemplo n.º 8
0
import PipLineModel
import Model
import SetParameters

reload(sys)
sys.setdefaultencoding('utf-8')

#读取文件的路径、设置作为预测变量的列名、降维方法、作为预测变量分割数据比例、需要比较的算法类型
#par_path = "E:\\workplace\\.metadata\\.me_tcat85\\webapps\\FoolAlgorithm\\AlgorithmUpload\\AlgorithmParameter.txt"
par_path1 = sys.argv[1]
par_path2 = sys.argv[2]
par_th = par_path1 + par_path2 + ".txt"
re_path = par_path1.replace('Par_', 'Re_') + par_path2 + ".txt"
#print par_th
#print re_path
path, y_name, select_method, dims, data_ratio, test_classifiers = SetParameters.getTrainAlgorithm(
    par_th)
if select_method != 'No':
    #print dims, select_method
    SelectKBest = Model.getFeature(dims, select_method)
    deal_steps = {'reduce_dim': [SelectKBest]}
else:
    deal_steps = {'reduce_dim': None}


def Myclassifier():
    data = DealData.read_data(path)
    train_x, test_x, train_y, test_y = DealData.get_train_test(
        data, y_name, data_ratio)

    #构建Pipeline, 搜索最优的参数
    #print '开始搜索'
Exemplo n.º 9
0
def Myclassifier():
    #读取文件的路径、设置作为预测变量的列名、降维方法、最大维数、最小维数、维数间隔、作为预测变量分割数据比例、算法类型
    #par_path = "E:\\workplace\\.metadata\\.me_tcat85\\webapps\\FoolAlgorithm\\DimUpload\\DimParameter.txt"
    par_path1 = sys.argv[1]
    par_path2 = sys.argv[2]
    par_path = par_path1 + par_path2 + ".txt"
    re_path = par_path1.replace('Par_', 'Re_') + par_path2 + ".txt"
    path, y_name, select_method, max_dims, min_dims, int_dims, data_ratio, test_classifiers = SetParameters.getTrainDim(
        par_path)
    data = DealData.read_data(path)
    train_x, test_x, train_y, test_y = DealData.get_train_test(
        data, y_name, data_ratio)
    line = ''
    for dim in xrange(min_dims, max_dims, int_dims):
        print dim, select_method
        SelectKBest = Model.getFeature(dim, select_method)
        deal_steps = {'reduce_dim': [SelectKBest]}
        #构建Pipeline, 搜索最优的参数
        classifier, best_parameters, auc = PipLineModel.build_pipline_dim(
            par_path, train_x, train_y, test_classifiers, deal_steps)
        line = line + str(dim) + ',' + str(auc) + '#'
    fil = open(re_path, 'w')
    print line
    fil.write(line)
    fil.close()