from svr import SVR from ContextEngineBase import Complexity ## For different tests, these values will vary. inputFilePath = "SVRTestInput.csv" outputFilePath = "SVRTestOutput.csv" complexity = Complexity.secondOrder numTrainingSamples = 96 numExecuteSamples = 96 inputFile = open(inputFilePath) outputFile = open(outputFilePath) inputReader = csv.reader(inputFile) outputReader = csv.reader(outputFile) csv = recfromcsv(inputFilePath, delimiter=',') ## Change the name of the algorithm to test it out. algorithmTest = SVR(complexity, 1, 0, [0], {}) teslaTimestamps = {} svrTimestamps = {} #print(algorithmTest.complexity); #print(algorithmTest.functionOrder); totRow = 35040 numRow = 96 day_train_start = 0 day_train_end = 0 day_predict = 4 x_train = [] y_train = [] x_predict = []
cv = GridSearchCV(data=data, model=SVR, param_grid=param_grid, scorer=mean_absolute_epsilon_error, num_folds=args.K) cv.train() # Print and write CV results to log file. print_and_write(filename=log_filename, log='-'*100) print_and_write(filename=log_filename, log='[*] Cross validation history:') for cv_result in cv.cv_results: print_and_write(filename=log_filename, log=' - Parameter: {} | Cross validation error: {}'.format(cv_result['param'], cv_result['score'])) print_and_write(filename=log_filename, log='-'*100) print_and_write(filename=log_filename, log='[*] Best parameter: {}'.format(cv.best_param)) print_and_write(filename=log_filename, log='[*] Best cross validation error: {}'.format(cv.best_score)) print_and_write(filename=log_filename, log='[*] Start to train on full training data and evaluate on test data ...') # Train on full training data and evaluate on test data with the best hyper-parameters. model = SVR(data=data, param=cv.best_param, verbose=True) model.train() # Compute training and test error. train_error = mean_absolute_epsilon_error(y_truth=data['train_y'], y_pred=model.hypothesis(X=data['train_X'])) test_error = mean_absolute_epsilon_error(y_truth=data['test_y'], y_pred=model.hypothesis(X=data['test_X'])) # Print and write results to log file. timestamp = datetime.datetime.now().strftime('%H:%M:%S') print_and_write(filename=log_filename, log='-'*100) print_and_write(filename=log_filename, log='[*] Train file path: "{}"'.format(train_filepath)) print_and_write(filename=log_filename, log='[*] Test file path: "{}"'.format(test_filepath)) print_and_write(filename=log_filename, log='[*] Datetime: {}'.format(timestamp)) print_and_write(filename=log_filename, log='[*] Best parameter: {}'.format(cv.best_param)) if model.use_w: print_and_write(filename=log_filename, log='[*] Weight vector: {}'.format(model.w)) print_and_write(filename=log_filename, log='[*] Sample mean of bias: {}'.format(model.b_mean))
#csv = recfromcsv('adj_microwave1.csv', delimiter=',') #csv = recfromcsv('adj_oven1.csv', delimiter=',') csv = recfromcsv('adj_refrigerator1.csv', delimiter=',') #csv = recfromcsv('adj_use.csv', delimiter=',') #c = open('adj_clotheswasher1.csv', 'r') #c = open('adj_dishwasher1.csv', 'r') #c = open('adj_dryer1.csv', 'r') #c = open('adj_furnace1.csv', 'r') #c = open('adj_microwave1.csv', 'r') #c = open('adj_oven1.csv', 'r') #c = open('adj_refrigerator1.csv', 'r') c = open('adj_use.csv', 'r') #trainer = Knn(complexity=0, numInputs=1, discreteOutputs=0, discreteInputs=0); trainer = SVR(complexity=0, numInputs=1, discreteOutputs=0, discreteInputs=0); #trainer = DecisionTreeAB(complexity=0, numInputs=1, discreteOutputs=0, discreteInputs=0); x_train = []; y_train = []; x_predict = []; x_real = []; y_real = []; numRow = 96 day_train_start = 0 day_train_end = 2 day_predict = 8 variance = 0 sum = 0
plt_y = [ 0.46, 0.376, 0.264, 0.318, 0.215, 0.237, 0.149, 0.211, 0.091, 0.0267, 0.057, 0.099, 0.161, 0.198 ] #,0.37,0.042,0.103 plt.scatter(plt_x, plt_y, alpha=0.4) test_var, test_lab = lable_split(test_) #自己的源代码 param_grid = {'C': 100, 'kernel_type': 'linear', 'tol': 0.001, 'epsilon': 0.1} data = {} data['train_X'] = train_var data['train_y'] = train_lab data['test_X'] = test_var data['test_y'] = test_lab _model = SVR(data=data, param=param_grid) _model.train() print(_model.w) print(_model.b) # p_x=np.arange(0, 0.8, 0.01) # p_y=p_x*_model.w+_model.b # plt.plot(p_x, p_y) # svr_rbf = svm.SVR(kernel='rbf', gamma=0.2, C=100) #调库 svr_linear = svm.SVR(kernel='linear', C=100) svr_linear.fit(train_var, train_lab) print(svr_linear) print(svr_linear.support_vectors_) print(svr_linear.support_) #这个就是支持向量在训练集中的标号
inDataTrain = (np.asarray(inDataTrain) - float(avg)) / float(std) inDataTest = (np.asarray(inDataTest) - float(avg)) / float(std) # Creating classification labels from continues data #outDataTrain = map(lambda x: int(x > avg), outDataTrain) #outDataTest = map(lambda x: int(x > avg), outDataTest) print "Done: collecting data from GDP" print "Beginning loading and training" # For testing purpose. print input for test data # each line in output corresponds to one input data field (record) # print inDataTest ## Change the name of the algorithm to test it out. # IMPORTANT: outputClassifier is set to 2, because output is NOT continous algorithmTest = SVR(complexity, numInp, 0, [0,0,0,0], {}) timestamps = {} # Add training data to CE object for i in xrange(len(outDataTrain)): # recording time stamps before and after adding to measure load time firstTS = time.time() algorithmTest.addSingleObservation(inDataTrain[:][i], outDataTrain[i]) secondTS = time.time() timestamps["load" + str(i)] = secondTS - firstTS # training CE using the added data, while the training time is measured firstTS = time.time() algorithmTest.clusterAndTrain() secondTS = time.time() timestamps["train"] = secondTS - firstTS
'norm': 'lin'} # Number of CE input numInp = 4 ## Algorithm to be tested interfaceDict = {'in': [dict1, dict2, dict3, dict4], 'out': dict0} ceDict = {'interface': interfaceDict, 'n_neighbors': 4, 'weights': 'uniform', 'algorithm': 'auto', 'n_jobs': 1, 'complexity': 1} # Algorithm that is subject to test. # algorithmTest = model (numInputs, outputClassifier, InputClassifiersList, ceDict) algorithmTest = SVR(numInp, 3, [0,3,1,0], ceDict) print "Collecting training and test data from GDP" # Use the collect data routine to fetch training data in separate lists # for input and output trainRecStart = 100 trainRecStop = 200 numTrainingSamples = trainRecStop - trainRecStart + 1 inDataTrain, outDataTrain = algorithmTest.interface.collectData(trainRecStart, trainRecStop) # Use the collect data routine to fetch test data in separate lists # for input and output testRecStart = 201 testRecStop = 250 numExecuteSamples = testRecStop - testRecStart + 1 inDataTest, outDataTest = algorithmTest.interface.collectData(testRecStart,testRecStop) print "Done: collecting data from GDP"
inDataTrain = (np.asarray(inDataTrain) - float(avg)) / float(std) inDataTest = (np.asarray(inDataTest) - float(avg)) / float(std) # Creating classification labels from continues data #outDataTrain = map(lambda x: int(x > avg), outDataTrain) #outDataTest = map(lambda x: int(x > avg), outDataTest) print "Done: collecting data from GDP" print "Beginning loading and training" # For testing purpose. print input for test data # each line in output corresponds to one input data field (record) # print inDataTest ## Change the name of the algorithm to test it out. # IMPORTANT: outputClassifier is set to 2, because output is NOT continous algorithmTest = SVR(complexity, numInp, 0, [0, 0, 0, 0], {}) timestamps = {} # Add training data to CE object for i in xrange(len(outDataTrain)): # recording time stamps before and after adding to measure load time firstTS = time.time() algorithmTest.addSingleObservation(inDataTrain[:][i], outDataTrain[i]) secondTS = time.time() timestamps["load" + str(i)] = secondTS - firstTS # training CE using the added data, while the training time is measured firstTS = time.time() algorithmTest.clusterAndTrain() secondTS = time.time() timestamps["train"] = secondTS - firstTS