Exemplo n.º 1
0
def train_converted_text(svm_file,
                         text_converter,
                         grid_arguments='0',
                         feature_arguments='',
                         train_arguments=''):
    '''
    return a TextModel
    '''
    train_arguments = '-s 4 ' + train_arguments

    if grid_arguments != '0' and grid_arguments != 0:
        grid_multi_path = os.path.dirname(
            __file__) + '/learner/learner_impl.py'
        default_grid_arguments = '-svmtrain {0} -log2g null -log2c -6,6,2 '.format(
            os.path.dirname(__file__) + '/learner/learner_impl.py')
        # default_grid_arguments = '-svmtrain /home/pio/yast/yast/learner/learner_impl.py -log2g null -log2c -6,6,2 '#.format(path.dirname(__file__) + '/learner/learner_impl.py')
        if grid_arguments == '1' or grid_arguments == 1:
            try:
                os.chmod(grid_multi_path, stat.S_IRWXU)  # Execute by owner.
            except OSError:
                raise OSError("Grid search needs sudo")
            grid_arguments = default_grid_arguments
        else:
            grid_arguments = default_grid_arguments + grid_arguments
        # print "ggg" , svm_file, "||", grid_arguments , ' || ',  train_arguments , '|| ',  feature_arguments
        parameters = find_parameters(
            svm_file, grid_arguments + ' ' + train_arguments + ' ' +
            feature_arguments)[1]
        train_arguments = train_arguments + ' -c ' + str(parameters["c"])

    m = train(svm_file, feature_arguments, train_arguments)

    return TextModel(text_converter, m)
def ParameterOptimization(alg_type_list, test_file_list):
    CreateNewDirectory('train_param')
    train_param_list = []
    #if the test data are modified. And re-compute the best parameters.
    if IsModify(test_file_list) == True:
        options = [
            '-svmtrain', '/usr/bin/svm-train', '-gnuplot', '/usr/bin/gnuplot',
            '-log2c', '-5,5,1', '-log2g', '-4,0,1', '-log2p', '-5,5,1', '-s',
            '3', '-t', '2', '-v', '5', '-m', '300'
        ]
        for type in alg_type_list:
            filename = './traindata/' + type + '_train_data'
            ret_best_params = grid.find_parameters(filename, options)
            print 'ret_best_param : ', ret_best_params
            tmp = str(ret_best_params[1].values()).strip('[] ')
            w_file = file('train_param/' + type + 'train_param',
                          'w').write(tmp)

            train_param_list.append(tmp.split(', '))
    else:
        for i in range(len(alg_type_list)):
            tmp = str(
                file('train_param/' + alg_type_list[i] + 'train_param',
                     'r').readline())
            train_param_list.append(tmp.split(', '))
    print 'all train_param_list : ', train_param_list
    return train_param_list
Exemplo n.º 3
0
def classify(images, classes_list, train_set, test_set, pos_fold, descriptor,
             parameters):
    """
    Performs the classification of the test_set according to the train_set.
    
    Input:
        - images: Python dictionary with all images from the dataset and its
        feature vector.
        - classes: Python dictionary with all classes from the dataset and its
        images.
        - train_set: List of images in the train set.
        - test_set: List of images in the test set.
        - fv_pos: Position of the feature vector according to the train and
        test method.
        - descriptor: Name of the descriptor used in the extraction step, to
        obtain it distance function (not used in this method).
        parameters: Parameters of the libSVM method:
            - Kernel: kernel used in the SVM
            - C: Cost of the SVM
            - degree: degree of the Kernel function
            - gamma: gamma of the Kernel function
            - Cross-Validation: Number of folds in the cross-validation mode
            - Probabilities: Get as predict the probabilities of each class
    Output:
        
    """

    print "Classification: libSVM 3.17"

    #Get parameters
    libSVM_kernel = kernel(parameters['Kernel'])
    libSVM_c = float(parameters['C'])
    libSVM_degree = int(parameters['degree'])
    libSVM_gamma = float(parameters['gamma'])
    libSVM_cv = int(parameters['Cross-Validation'])
    libSVM_probability = int(parameters['Probabilities'])

    #Paths
    dirname = os.path.abspath(os.path.join(os.path.dirname(__file__)))
    temp_path = os.path.abspath(os.path.join(dirname, "..", "..", "temp"))
    train_path = os.path.join(temp_path, "libSVM.train")
    model_path = os.path.join(temp_path, "libSVM.train.model_" + str(pos_fold))
    test_path = os.path.join(temp_path, "libSVM.test")
    output_path = os.path.join(temp_path, "libSVM.output")

    plugin_train = "svm-train"
    plugin_test = "svm-predict"
    system_platform = [platform.system(), platform.architecture()[0]]
    if system_platform[0] == 'Linux':
        if system_platform[1] == '32bit':
            plugin_train += "_32l"
            plugin_test += "_32l"

    #Preprocess each class to a unique value to the classification
    label_encoder = preprocessing.LabelEncoder()
    label_encoder.fit(classes_list)
    print "List of classes of this experiment:", label_encoder.classes_

    list_class = []
    list_fv = []

    #Read the train file and save the list of class and the list
    #of feature vectors
    for img in train_set:
        list_class.append(images[img][POS_CLASSES][INDEX_ZERO])
        list_fv.append(numpy.array(images[img][POS_FV][INDEX_ZERO]))

    list_train = numpy.array(list_fv)
    list_train_class = numpy.array(list_class)

    #Given a list of classes, transform each value in this list to a integer
    list_train_class = label_encoder.transform(list_train_class)

    #Read the test list and save the list of class and the list
    #of feature vectors
    list_img = test_set
    list_class = []
    list_fv = []

    for img in test_set:
        list_class.append(images[img][POS_CLASSES][INDEX_ZERO])
        list_fv.append(numpy.array(images[img][POS_FV][INDEX_ZERO]))

    list_test = numpy.array(list_fv)
    list_test_class = numpy.array(list_class)

    list_test_class = label_encoder.transform(list_test_class)

    #SVM Fit
    #-------------------------------------------------------------------------
    print "\tFit"
    #Create train file
    train_file = open(train_path, "wb")
    for item_class, item_fv in izip(list_train_class, list_train):
        train_file.write(str(item_class) + " ")
        for i in range(len(item_fv)):
            train_file.write(str(i + 1) + ":" + str(item_fv[i]) + " ")
        train_file.write("\n")
    train_file.close()

    #Grid-Search
    result, param = grid.find_parameters(train_path, '-gnuplot null')

    libSVM_c = param['c']
    libSVM_gamma = param['g']
    #-----------

    cmd_train = """
%s%s.%s%s -t %d -d %d -g %f -c %f -b %d "%s" "%s"
    """ % (dirname, os.sep, os.sep, plugin_train, libSVM_kernel, libSVM_degree,
           libSVM_gamma, libSVM_c, libSVM_probability, train_path, model_path)
    print cmd_train

    #Execute SVM-Train
    call(cmd_train, shell=True)
    print "\tEnd Fit"
    #-------------------------------------------------------------------------

    #Read configuration of the libSVM model
    model_file = open(model_path, "rb")
    print model_path
    count_head_lines = 0
    for line in model_file.readlines():
        key_model = line.split()[0]
        if key_model == "label":
            model_labels = map(int, line.split()[1:])
            break
    model_file.close()
    model_paths = [model_path]

    #SVM Predict
    #-------------------------------------------------------------------------
    #Create test file
    test_file = open(test_path, "wb")
    for item_class, item_fv in izip(list_test_class, list_test):
        test_file.write(str(item_class) + " ")
        for i in range(len(item_fv)):
            test_file.write(str(i + 1) + ":" + str(item_fv[i]) + " ")
        test_file.write("\n")
    test_file.close()

    #Execute SVM-Predict
    cmd_test = """
%s%s.%s%s -b %d "%s" "%s" "%s"
    """ % (dirname, os.sep, os.sep, plugin_test, libSVM_probability, test_path,
           model_path, output_path)
    print cmd_test

    call(cmd_test, shell=True)
    print "\tEnd Predict"
    #-------------------------------------------------------------------------

    #Read output file to get the result of the predict
    output_file = open(output_path, "rb")
    if libSVM_probability:
        output_file.readline()
        list_result = []
        for line in output_file.readlines():
            dict_classes = {key_class: 0.0 for key_class in classes_list}
            output_line = map(float, line.split()[1:])
            for class_item, perc in \
                    izip(list(label_encoder.inverse_transform(model_labels)),
                    output_line):
                dict_classes[class_item] = perc
            percentage_list = [dict_classes[key_class] for key_class in \
                    label_encoder.classes_]
            list_result.append(percentage_list)
    else:
        list_predict = map(int, output_file.readlines())
        list_predict = label_encoder.inverse_transform(list_predict)

        list_result = []
        for predict in list_predict:
            img_result = [0.0] * len(label_encoder.classes_)
            #Find all predict in the list label_encoder.classes_ and grab the
            #first index
            pos = numpy.where(label_encoder.classes_ == predict)[0][0]
            img_result[pos] = 1.0
            list_result.append(img_result)
    output_file.close()

    #Remove temporary files
    os.remove(train_path)
    os.remove(test_path)

    return test_set, list_class, list_result, label_encoder.classes_, \
            model_paths
Exemplo n.º 4
0
import svmutil as sm
import numpy as np
import grid


def loadDataSet(fileName):
    dataMat=[];labelMat=[]
    with open(fileName) as fr:
        for line in fr.readlines():
            lineArr=line.strip().split('\t')
            dataMat.append([float(lineArr[0]),float(lineArr[1])])
            labelMat.append(float(lineArr[2]))
    return dataMat,labelMat
def testdata(trainfilename,testfilename):

    dataArr1, labelArr1 = loadDataSet(trainfilename)
    dataArr2, labelArr2 = loadDataSet(testfilename)
    errorCount = 0
    arg=['-s',0,'-t',2,'-c',0.031,'-g',0.0078]
    model = sm.svm_train(labelArr1, dataArr1, arg)
    [plabel, pacc,pval]=sm.svm_predict(labelArr2,dataArr2,model)

if __name__=='__main__':
    trainfilename='C:/Users/XUEJW/Desktop/dataset/MLiA_SourceCode/machinelearninginaction/Ch06/testSetRBF.txt'
    testfilename='C:/Users/XUEJW/Desktop/dataset/MLiA_SourceCode/machinelearninginaction/Ch06/testSetRBF2.txt'
    #testdata(trainfilename,testfilename)
    grid.find_parameters(trainfilename)



Exemplo n.º 5
0
prob_precomputed = svmutil.svm_problem(y1, x_train_precomputed, isKernel=True)

# grid search for the best parameters
print('grid searching...')
grid_search = {
    'linear': '-t 0 -log2c -5,15,2',
    'polynomial': '-t 1 -log2c -5,15,2 -log2g -5,15,2 -log2r -3,5,2 -d 4',
    'RBF': '-t 2 -log2c -5,15,2 -log2g -5,15,2',
}
grid_search_precomputed = {'linear+RBF': '-t 4 -log2c -5,15,2'}
best_param = []
kernel_tab = []
kernel = []

for kernel_type, opts in grid_search.items():
    rst, tab, col = grid.find_parameters(prob, opts)
    kernel.append(kernel_type)
    kernel_tab.append(pd.DataFrame(tab, columns=col))
    best_param.append(rst)

for kernel_type, opts in grid_search_precomputed.items():
    rst, tab, col = grid.find_parameters(prob_precomputed, opts)
    kernel.append(kernel_type)
    kernel_tab.append(pd.DataFrame(tab, columns=col))
    best_param.append(rst)

for i in range(len(kernel_tab)):
    kernel_tab[i].to_csv('best param/' + kernel[i] + '.csv')
    print(kernel[i] + ':', end=' ')
    print(best_param[i])
'''
Exemplo n.º 6
0
    #Computers Confusion Matrix
    for i in range(len(true)):
        result[classInd[true[i]]][classInd[pred[i]]] += 1

    return result


#Read in Data
trainRAD_y, trainRAD_x = svm_read_problem('./rad_d2')
trainCust_y, trainCust_x = svm_read_problem('./cust_d2')
testRAD_y, testRAD_x = svm_read_problem('./rad_d2.t')
testCust_y, testCust_x = svm_read_problem('./cust_d2.t')

#Perform Cross Validation to find Best Parameters
print('INITIALIZING CROSS VALIDATION')
bestRAD_acc, bestRAD_params = find_parameters('./rad_d2')
print('Best Accuracy with RAD is:', bestRAD_acc, '\nWith C and y:',
      bestRAD_params)
bestCust_acc, bestCust_params = find_parameters('./cust_d2')
print('Best Accuracy with Custom is:', bestCust_acc, '\nWith C and y:',
      bestCust_params)

#Perform SVM training
print('TRAINING SVMs')
parameter_string = '-s 0 -t 2 -g ' + str(bestRAD_params['g']) + ' -c ' + str(
    bestRAD_params['c'])
mRAD = svm_train(trainRAD_y, trainRAD_x, parameter_string)
parameter_string = '-s 0 -t 2 -g ' + str(bestCust_params['g']) + ' -c ' + str(
    bestCust_params['c'])
mCust = svm_train(trainCust_y, trainCust_x, parameter_string)