def main(): dataset = load_data_network('network_backup_dataset.csv') week = dataset.week day_of_week = dataset.day_of_week backup_start_time = dataset.backup_start_time workflow_id = dataset.workflow_id filename = dataset.file_name size_of_backup = dataset.size_of_backup backup_time = dataset.backup_time ##################### #### dictionary ##### ##################### day_of_week_dict = { "Monday": 1, "Tuesday": 2, "Wednesday": 3, "Thursday": 4, "Friday": 5, "Saturday": 6, "Sunday": 7 } workflow_id_dict = { 'work_flow_0': 1, 'work_flow_1': 2, 'work_flow_2': 3, 'work_flow_3': 4, 'work_flow_4': 5 } # scalar variables ## ##################### week_scalar = week day_of_week_scalar = [] backup_start_time_scalar = backup_start_time workflow_id_scalar = [] filename_scalar = [] for day in day_of_week: day_of_week_scalar.append(day_of_week_dict[day]) for ID in workflow_id: workflow_id_scalar.append(workflow_id_dict[ID]) def filename_remove_string(filename): return int(string.replace(filename, "File_", "")) + 1 for name in filename: filename_scalar.append(filename_remove_string(name)) ## directly use linear regression ## ######################################## features_scalar = np.array([ week_scalar, day_of_week_scalar, backup_start_time_scalar, workflow_id_scalar, filename_scalar ]) features_scalar = features_scalar.transpose() labels_scalar = np.array([size_of_backup]) labels_scalar = labels_scalar.transpose() # question e knn regression neighbors_range = range(1, 51) feature_train_error = [] feature_test_error = [] for neighbor in neighbors_range: rmse_test, rmse_train = knn(features=features_scalar, labels=labels_scalar, n_neighbor=neighbor) feature_test_error.append(rmse_test) feature_train_error.append(rmse_train) plt.plot(neighbors_range, feature_test_error, label="Test Error") plt.plot(neighbors_range, feature_train_error, label="Train Error") plt.legend(loc='lower right') plt.show() knn(features=features_scalar, labels=labels_scalar, n_neighbor=4)
def main(): dataset = load_data_network('network_backup_dataset.csv') week = dataset.week day_of_week = dataset.day_of_week backup_start_time = dataset.backup_start_time workflow_id = dataset.workflow_id filename = dataset.file_name size_of_backup = dataset.size_of_backup backup_time = dataset.backup_time #data_list=[week, day_of_week, backup_start_time, workflow_id,filename, size_of_backup, backup_time] workflow_list = [ 'work_flow_0', 'work_flow_1', 'work_flow_2', 'work_flow_3', 'work_flow_4' ] week_list = [i + 1 for i in range(15)] day_of_week_list = [ 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday' ] workflow_count = [[0 for i in range(20)] for i in range(5)] i = 0 while (week[i] - 1) * 7 + day_of_week_list.index(day_of_week[i]) + 1 <= 20: workflow_count[workflow_list.index(workflow_id[i])][ (week[i] - 1) * 7 + day_of_week_list.index(day_of_week[i])] += size_of_backup[i] i += 1 for i in range(5): plt.plot([j + 1 for j in range(20)], workflow_count[i], label=workflow_list[i]) plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode="expand", borderaxespad=0.) #plt.show() workflow_count = [[0 for i in range(105)] for i in range(5)] i = 0 while i < len(week): workflow_count[workflow_list.index(workflow_id[i])][ (week[i] - 1) * 7 + day_of_week_list.index(day_of_week[i])] += size_of_backup[i] i += 1 for i in range(5): plt.plot([j + 1 for j in range(105)], workflow_count[i], label=workflow_list[i]) plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode="expand", borderaxespad=0.) #plt.show() # question a: Linear regression # part 1: scalarize the variable(all the scalar variables will have a subfix of "scalar) ##################### #### dictionary ##### ##################### day_of_week_dict = { "Monday": 1, "Tuesday": 2, "Wednesday": 3, "Thursday": 4, "Friday": 5, "Saturday": 6, "Sunday": 7 } workflow_id_dict = { 'work_flow_0': 1, 'work_flow_1': 2, 'work_flow_2': 3, 'work_flow_3': 4, 'work_flow_4': 5 } # scalar variables ## ##################### week_scalar = week day_of_week_scalar = [] backup_start_time_scalar = backup_start_time workflow_id_scalar = [] filename_scalar = [] for day in day_of_week: day_of_week_scalar.append(day_of_week_dict[day]) for ID in workflow_id: workflow_id_scalar.append(workflow_id_dict[ID]) def filename_remove_string(filename): return int(string.replace(filename, "File_", "")) + 1 for name in filename: filename_scalar.append(filename_remove_string(name)) week_one_hot = scalar_to_one_hot(week_scalar) day_of_week_one_hot = scalar_to_one_hot(day_of_week_scalar) backup_start_time_one_hot = scalar_to_one_hot(backup_start_time_scalar) workflow_id_one_hot = scalar_to_one_hot(workflow_id_scalar) filename_one_hot = scalar_to_one_hot(filename_scalar) # directly use linear regression ## ######################################## linear_model = LinearRegression() features_scalar = np.array([ week_scalar, day_of_week_scalar, backup_start_time_scalar, workflow_id_scalar, filename_scalar ]) features_scalar = features_scalar.transpose() labels_scalar = np.array([size_of_backup]) labels_scalar = labels_scalar.transpose() linear_rmse_test_score, linear_rmse_train_score, linear_predict, linear_actual = linear_regression( features_scalar, labels_scalar, linear_model) plt.figure() plt.scatter(linear_actual, linear_predict, alpha=0.1) plt.ylim(min(linear_predict), max(linear_actual)) plt.xlabel("Actual value") plt.ylabel("Fitted value") plt.figure() linear_residuals = list( map(lambda x: x[0] - x[1], zip(linear_actual, linear_predict))) plt.scatter(linear_predict, linear_residuals, alpha=0.1) plt.xlim(min(linear_predict), max(linear_actual)) plt.xlabel("Fitted value") plt.ylabel("Residual value") print(linear_rmse_test_score) # standardize ## ##################### standard = StandardScaler() features_standard = standard.fit_transform(features_scalar) linear_model = LinearRegression() linear_rmse_test_score, linear_rmse_train_score, linear_predict, linear_actual = linear_regression( features_standard, labels_scalar, linear_model) plt.figure() plt.scatter(linear_actual, linear_predict, alpha=0.1) plt.ylim(min(linear_predict), max(linear_actual)) plt.xlabel("Actual value") plt.ylabel("Fitted value") print(linear_rmse_test_score) # three features ## ##################### Fval, pval = f_regression(features_scalar, labels_scalar) mval = mutual_info_regression(features_scalar, labels_scalar) print(Fval) print(mval) linear_model = LinearRegression() features_select = np.array( [day_of_week_scalar, backup_start_time_scalar, workflow_id_scalar]) features_select = features_select.transpose() linear_rmse_test_score, linear_rmse_train_score, linear_predict, linear_actual = linear_regression( features_select, labels_scalar, linear_model) print(linear_rmse_test_score) plt.figure() plt.scatter(linear_actual, linear_predict, alpha=0.1) plt.ylim(min(linear_predict), max(linear_actual)) plt.xlabel("Actual value") plt.ylabel("Fitted value") plt.show() # 32 combinations of features ## ################################ linear_rmse_test_result = [] linear_rmse_train_result = [] linear_model = LinearRegression() min_ridge_combination = 0 min_lasso_combination = 0 min_elastic_combination = 0 min_ridge_alpha = 0 min_lasso_alpha = 0 min_elastic_alpha = 0 min_ridge = 1000 min_lasso = 1000 min_elastic = 1000 for i in range(0, 32): features = [] if (i & 1): features.append(np.array(week_scalar)) else: features.append(np.array(week_one_hot)) if (i >> 1 & 1): features.append(np.array(day_of_week_scalar)) else: features.append(np.array(day_of_week_one_hot)) if (i >> 2 & 1): features.append(np.array(backup_start_time_scalar)) else: features.append(np.array(backup_start_time_one_hot)) if (i >> 3 & 1): features.append(np.array(workflow_id_scalar)) else: features.append(np.array(workflow_id_one_hot)) if (i >> 4 & 1): features.append(np.array(filename_scalar)) else: features.append(np.array(filename_one_hot)) features = zip(*features) features_spilit = [] for j in range(len(features)): tmp = splitlist(features[j]) features_spilit.append(tmp) #print(features) linear_rmse_test_score, linear_rmse_train_score, linear_predict, linear_actual = linear_regression( features_spilit, labels_scalar, linear_model) linear_rmse_test_result.append(linear_rmse_test_score) linear_rmse_train_result.append(linear_rmse_train_score) for al in np.arange(0.1, 1, 0.1): ridge_model = Ridge(alpha=al) ridge_rmse_test_score, ridge_rmse_train_score, ridge_predict, ridge_actual = linear_regression( features_spilit, labels_scalar, ridge_model) if (min_ridge > ridge_rmse_test_score): min_ridge_combination = i min_ridge_alpha = al min_ridge = ridge_rmse_test_score for al in np.arange(0.01, 1, 0.01): lasso_model = Lasso(alpha=al) lasso_rmse_test_score, lasso_rmse_train_score, linear_predict, linear_actual = linear_regression( features_spilit, labels_scalar, lasso_model) if (min_lasso > lasso_rmse_test_score): min_lasso_combination = i min_lasso_alpha = al min_lasso = lasso_rmse_test_score for al in np.arange(0.01, 1, 0.01): elastic_model = ElasticNet(alpha=al, l1_ratio=0.5) elastic_rmse_test_score, elastic_rmse_train_score, linear_predict, linear_actual = linear_regression( features_spilit, labels_scalar, elastic_model) if (min_elastic > elastic_rmse_test_score): min_elastic_combination = i min_elastic_alpha = al min_elastic = elastic_rmse_test_score plt.figure() plt.plot(linear_rmse_test_result, label="Testset") plt.plot(linear_rmse_train_result, label="Trainset") plt.legend(loc="best") print("The minimum test RMSE is combination: ") print(linear_rmse_test_result.index(min(linear_rmse_test_result))) print("The minimum train RMSE is combination: ") print(linear_rmse_train_result.index(min(linear_rmse_train_result))) print( "The minimum ridge test RMSE is:%f and combination: %d with alpha is %f" % (min_ridge, min_ridge_combination, min_ridge_alpha)) print( "The minimum lasso test RMSE is:%f and combination: %d with alpha is %f" % (min_lasso, min_lasso_combination, min_lasso_alpha)) print( "The minimum elastic test RMSE is:%f and combination: %d with alpha is %f" % (min_elastic, min_elastic_combination, min_elastic_alpha)) # Controlling ill-conditioning and over-fiting ## ################################################# ridge_test_result = [] lasso_test_result = [] elastic_test_result = [] for al in np.arange(0.1, 1, 0.1): ridge_model = Ridge(alpha=al) ridge_rmse_test_score, ridge_rmse_train_score, linear_predict, linear_actual = linear_regression( features_scalar, labels_scalar, ridge_model) lasso_model = Lasso(alpha=al) lasso_rmse_test_score, lasso_rmse_train_score, linear_predict, linear_actual = linear_regression( features_scalar, labels_scalar, lasso_model) elastic_model = ElasticNet(alpha=0.5, l1_ratio=al) elastic_rmse_test_score, elastic_rmse_train_score, linear_predict, linear_actual = linear_regression( features_scalar, labels_scalar, elastic_model) ridge_test_result.append(ridge_rmse_test_score) lasso_test_result.append(lasso_rmse_test_score) elastic_test_result.append(elastic_rmse_test_score) print("The minimum ridge test RMSE happens when alpha is: ") print(0.1 * (1 + ridge_test_result.index(min(ridge_test_result)))) print("The minimum lasso test RMSE happens when alpha is: ") print(0.1 * (1 + lasso_test_result.index(min(lasso_test_result)))) print("The minimum elasticnet test RMSE happens when l1_ratio is: ") print(0.1 * (1 + elastic_test_result.index(min(elastic_test_result)))) plt.show()
def main(): dataset = load_data_network('network_backup_dataset.csv') week = dataset.week day_of_week = dataset.day_of_week backup_start_time = dataset.backup_start_time workflow_id = dataset.workflow_id filename = dataset.file_name size_of_backup = dataset.size_of_backup backup_time = dataset.backup_time ##################### #### dictionary ##### ##################### day_of_week_dict = { "Monday": 1, "Tuesday": 2, "Wednesday": 3, "Thursday": 4, "Friday": 5, "Saturday": 6, "Sunday": 7 } workflow_id_dict = { 'work_flow_0': 1, 'work_flow_1': 2, 'work_flow_2': 3, 'work_flow_3': 4, 'work_flow_4': 5 } # scalar variables ## ##################### week_scalar = week day_of_week_scalar = [] backup_start_time_scalar = backup_start_time workflow_id_scalar = [] filename_scalar = [] for day in day_of_week: day_of_week_scalar.append(day_of_week_dict[day]) for ID in workflow_id: workflow_id_scalar.append(workflow_id_dict[ID]) def filename_remove_string(filename): return int(string.replace(filename, "File_", "")) + 1 for name in filename: filename_scalar.append(filename_remove_string(name)) ## directly use linear regression ## ######################################## features_scalar = np.array([ week_scalar, day_of_week_scalar, backup_start_time_scalar, workflow_id_scalar, filename_scalar ]) features_scalar = features_scalar.transpose() labels_scalar = np.array([size_of_backup]) labels_scalar = labels_scalar.transpose() # Question b: Random Forest Method # subquestion i: print 'Test RMSE, Train RMSE, OOB error: ' print random_forest(features=features_scalar, labels=labels_scalar, num_trees=20, num_features=5, max_depth=4) # subquestion ii: ## first is oob error tree_range = range(1, 201) feature_range = range(1, 6) for num_feature in feature_range: feature_oob_error = [] for num_tree in tree_range: rmse_test, rmse_train, oob_error = random_forest( features=features_scalar, labels=labels_scalar, num_trees=num_tree, num_features=num_feature, max_depth=4) feature_oob_error.append(oob_error) plt.plot(tree_range, feature_oob_error, label=str(num_feature)) plt.title("OOB error") plt.legend(loc='lower right') plt.show() ## second is test rmse error tree_range = range(1, 201) feature_range = range(1, 6) for num_feature in feature_range: feature_test_rmse_error = [] for num_tree in tree_range: rmse_test, rmse_train, oob_error = random_forest( features=features_scalar, labels=labels_scalar, num_trees=num_tree, num_features=num_feature, max_depth=4) feature_test_rmse_error.append(rmse_test) plt.plot(tree_range, feature_test_rmse_error, label=str(num_feature)) plt.title("Test RMSE error") plt.legend(loc='lower right') plt.show() # subquestion iii: depth_range = range(1, 21) depth_oob_error = [] depth_test_error = [] for depth in depth_range: rmse_test, rmse_train, oob_error = random_forest( features=features_scalar, labels=labels_scalar, num_trees=25, num_features=4, max_depth=depth) depth_oob_error.append(oob_error) depth_test_error.append(rmse_test) plt.plot(depth_range, depth_oob_error) plt.title("OOB Error") plt.show() plt.plot(depth_range, depth_test_error) plt.title("Test RMSE Error") plt.show() # subquestion iv: random_forest(features=features_scalar, labels=labels_scalar, num_trees=25, num_features=4, max_depth=4)
def main(): dataset = load_data_network('network_backup_dataset.csv') week = dataset.week day_of_week = dataset.day_of_week backup_start_time = dataset.backup_start_time workflow_id = dataset.workflow_id filename = dataset.file_name size_of_backup = dataset.size_of_backup backup_time = dataset.backup_time #data_list=[week, day_of_week, backup_start_time, workflow_id,filename, size_of_backup, backup_time] workflow_list = [ 'work_flow_0', 'work_flow_1', 'work_flow_2', 'work_flow_3', 'work_flow_4' ] week_list = [i + 1 for i in range(15)] day_of_week_list = [ 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday' ] workflow_count = [[0 for i in range(20)] for i in range(5)] i = 0 while (week[i] - 1) * 7 + day_of_week_list.index(day_of_week[i]) + 1 <= 20: workflow_count[workflow_list.index(workflow_id[i])][ (week[i] - 1) * 7 + day_of_week_list.index(day_of_week[i])] += size_of_backup[i] i += 1 for i in range(5): plt.plot([j + 1 for j in range(20)], workflow_count[i], label=workflow_list[i]) plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode="expand", borderaxespad=0.) plt.show() workflow_count = [[0 for i in range(105)] for i in range(5)] i = 0 while i < len(week): workflow_count[workflow_list.index(workflow_id[i])][ (week[i] - 1) * 7 + day_of_week_list.index(day_of_week[i])] += size_of_backup[i] i += 1 for i in range(5): plt.plot([j + 1 for j in range(105)], workflow_count[i], label=workflow_list[i]) plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode="expand", borderaxespad=0.) plt.show() # question a: Linear regression # part 1: scalarize the variable(all the scalar variables will have a subfix of "scalar) ##################### #### dictionary ##### ##################### day_of_week_dict = { "Monday": 1, "Tuesday": 2, "Wednesday": 3, "Thursday": 4, "Friday": 5, "Saturday": 6, "Sunday": 7 } workflow_id_dict = { 'work_flow_0': 1, 'work_flow_1': 2, 'work_flow_2': 3, 'work_flow_3': 4, 'work_flow_4': 5 } # scalar variables ## ##################### week_scalar = week day_of_week_scalar = [] backup_start_time_scalar = backup_start_time workflow_id_scalar = [] filename_scalar = [] for day in day_of_week: day_of_week_scalar.append(day_of_week_dict[day]) for ID in workflow_id: workflow_id_scalar.append(workflow_id_dict[ID]) def filename_remove_string(filename): return int(string.replace(filename, "File_", "")) + 1 for name in filename: filename_scalar.append(filename_remove_string(name)) week_one_hot = scalar_to_one_hot(week_scalar) day_of_week_one_hot = scalar_to_one_hot(day_of_week_scalar) backup_start_time_one_hot = scalar_to_one_hot(backup_start_time_scalar) workflow_id_one_hot = scalar_to_one_hot(workflow_id_scalar) filename_one_hot = scalar_to_one_hot(filename_scalar) # directly use linear regression ## ######################################## features_scalar = np.array([ week_scalar, day_of_week_scalar, backup_start_time_scalar, workflow_id_scalar, filename_scalar ]) features_scalar = features_scalar.transpose() labels_scalar = np.array([size_of_backup]) labels_scalar = labels_scalar.transpose() linear_rmse_test_score, linear_rmse_train_score = linear_regression( features_scalar, labels_scalar, "linear") #question c features_one_hot = [ week_one_hot, day_of_week_one_hot, backup_start_time_one_hot, workflow_id_one_hot, filename_one_hot ] features_one_hot = zip(*features_one_hot) features = [] for j in range(len(features_one_hot)): tmp = splitlist(features_one_hot[j]) features.append(tmp) activity_function_list = ['relu', 'logistic', 'tanh'] kf = KFold(n_splits=10, random_state=0) rmse_relu = [] rmse_logistic = [] rmse_tanh = [] label = np.array(size_of_backup) label = label.astype('float') """for activity_function in activity_function_list: for i in range(10,201,10): rmse=0 for train_index, test_index in kf.split(features): features_train, labels_train = np.asarray(features)[train_index], label[train_index] features_test, labels_test = np.asarray(features)[test_index], label[test_index] clf = neural_network(features_train, labels_train, i, activity_function) labels_predict=clf.predict(features_test) rmse_test = np.sqrt(metrics.mean_squared_error(labels_test, labels_predict)) rmse=rmse+rmse_test rmse=rmse/10 if(activity_function=='relu'): rmse_relu.append(rmse) if (activity_function == 'logistic'): rmse_logistic.append(rmse) if (activity_function == 'tanh'): rmse_tanh.append(rmse) print('hidden units= %i activity function= %s' %(i, activity_function)) print rmse plt.plot([i for i in range(10, 201, 10)], rmse_relu, color='r') plt.show() plt.plot([i for i in range(10, 201, 10)], rmse_relu, color='r') plt.plot([i for i in range(10, 201, 10)], rmse_logistic, color='g') plt.plot([i for i in range(10, 201,10)],rmse_tanh,color='b') print 'begin' fit_value = [] true_value = [] for train_index, test_index in kf.split(features): features_train, labels_train = np.asarray(features)[train_index], label[train_index] features_test, labels_test = np.asarray(features)[test_index], label[test_index] clf = neural_network(features_train, labels_train, 20, 'relu') labels_predict = clf.predict(features_test) labels_test = list(labels_test) labels_predict = list(labels_predict) fit_value.extend(labels_predict) true_value.extend(labels_test) plt.scatter(true_value, fit_value) plt.show() fit_value = np.array(fit_value) true_value = np.array(true_value) residual = true_value - fit_value true_value = list(true_value) residual = list(residual) plt.scatter(fit_value, residual) plt.show() plt.scatter([i+1 for i in range(len(features))],fit_value,color='b',label='fit_value') plt.scatter([i+1 for i in range(len(features))],true_value,color='g',label='true_value') plt.legend(loc='upper right') plt.show() plt.scatter([i+1 for i in range(len(features))],fit_value,color='b',label='fit_value') plt.scatter([i+1 for i in range(len(features))],residual,color='g',label='redidual') plt.legend(loc='upper right') plt.show()""" #question d features_scalar = np.array([ week_scalar, day_of_week_scalar, backup_start_time_scalar, workflow_id_scalar, filename_scalar ]) features_scalar = features_scalar.transpose() label = np.array(size_of_backup) workflow_list = [[] for i in range(5)] label_list = [[] for i in range(5)] for i in range(len(features_scalar)): workflow_list[features_scalar[i][3] - 1].append(features_scalar[i]) label_list[features_scalar[i][3] - 1].append(label[i]) """kf = KFold(n_splits=10, random_state=0) rmse=[0 for i in range(5)] for i in range(5): for train_index, test_index in kf.split(workflow_list[i]): train_feature, train_label=np.array(workflow_list[i])[train_index],np.array(label_list[i])[train_index] test_feature, test_label= np.array(workflow_list[i])[test_index],np.array(label_list[i])[test_index] lr=LinearRegression() lr.fit(train_feature,train_label) label_predict=lr.predict(test_feature) linear_rmse_test = np.sqrt(metrics.mean_squared_error(test_label, label_predict)) rmse[i]=rmse[i]+linear_rmse_test rmse[i]=rmse[i]/10 print rmse""" rmse = [[0 for i in range(2, 11)] for i in range(5)] rmse_train = [[0 for i in range(2, 11)] for i in range(5)] fit_value = [] true_value = [] for i in range(5): for j in range(2, 11): poly = PolynomialFeatures(degree=j) workflow_temp = poly.fit_transform(workflow_list[i]) for train_index, test_index in kf.split(workflow_temp): train_feature, train_label = np.array( workflow_temp)[train_index], np.array( label_list[i])[train_index] test_feature, test_label = np.array( workflow_temp)[test_index], np.array( label_list[i])[test_index] lr = LinearRegression() lr.fit(train_feature, train_label) label_predict = lr.predict(test_feature) label_train_predict = lr.predict(train_feature) linear_rmse_train = np.sqrt( metrics.mean_squared_error(train_label, label_train_predict)) linear_rmse_test = np.sqrt( metrics.mean_squared_error(test_label, label_predict)) rmse_train[i][j - 2] = rmse_train[i][j - 2] + linear_rmse_train rmse[i][j - 2] = rmse[i][j - 2] + linear_rmse_test test_label = list(test_label) label_predict = list(label_predict) fit_value.extend(label_predict) true_value.extend(test_label) rmse_train[i][j - 2] = rmse_train[i][j - 2] / 10 rmse[i][j - 2] = rmse[i][j - 2] / 10 print('i=%i , j= %i' % (i, j)) print('rmse_train: %f' % rmse_train[i][j - 2]) print('rmse: %f' % rmse[i][j - 2]) print rmse print rmse_train workflow_list_sign = [ 'work_flow_0', 'work_flow_1', 'work_flow_2', 'work_flow_3', 'work_flow_4' ] for i in range(5): plt.plot([j for j in range(2, 11)], rmse_train[i], label=workflow_list_sign[i]) plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode="expand", borderaxespad=0.) plt.show() for i in range(5): plt.plot([j for j in range(2, 11)], rmse[i], label=workflow_list_sign[i]) plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode="expand", borderaxespad=0.) plt.show() plt.scatter(true_value, fit_value) plt.show() fit_value = np.array(fit_value) true_value = np.array(true_value) residual = true_value - fit_value true_value = list(true_value) residual = list(residual) plt.scatter(fit_value, residual) plt.show() plt.scatter([i + 1 for i in range(len(fit_value))], fit_value, color='b', label='fit_value') plt.scatter([i + 1 for i in range(len(true_value))], true_value, color='g', label='true_value') plt.legend(loc='upper right') plt.show() plt.scatter([i + 1 for i in range(len(fit_value))], fit_value, color='b', label='fit_value') plt.scatter([i + 1 for i in range(len(true_value))], residual, color='g', label='redidual') plt.legend(loc='upper right') plt.show()