def pre_processing(): """ Processing settings and data :return: required data for execution """ # Process and print settings settings = process_command_line() print 'Current settings:' pp.pprint(vars(settings)) # Resetting random seed reset_random_seed(settings) # Loading data data = load_data(settings) param, cache = precompute_minimal(data, settings) return settings, data, param, cache
def main(): # Import settings from command line settings = process_command_line() print 'Current settings:' pp.pprint(vars(settings)) # Resetting random seed reset_random_seed(settings) # Loading batch data batch_type_list = ['office', 'kitchen', 'bookstore'] #batch_type_list = ['kitchen','office','bathroom','bedroom','bookstore','living_room'] incremental_type_list = [ 'computer_room', 'home_office', 'office_kitchen', 'classroom' ] #incremental_type_list = ['study_space','classroom','computer_room','lobby','home_office','office_kitchen','playroom','reception_room','study','dining_room','cafeteria','furniture_store','conference_room','dinette','gym','storage_room','indoor_balcony','laundromat','printer_room','basement','recreation_room'] training_perc = 0 data, training_list, test_list = load_type_dataset(settings, batch_type_list, incremental_type_list, training_perc) param, cache = precompute_minimal(data, settings) mf = MondrianForest(settings, data) batch_train_ids = data['train_ids_partition']['batch'] print '\nBatch training on %d element...' % (len(batch_train_ids)) mf.fit(data, batch_train_ids, settings, param, cache) print '...batch training done. \n' if training_perc > 0: incremental_train_ids = data['train_ids_partition']['incremental'] print 'Incremental training on %d element...' % ( len(incremental_train_ids)) mf.partial_fit(data, incremental_train_ids, settings, param, cache) print '...incremental training done. \n' #Evaluation print 'Evaluation... \n' weights_prediction = np.ones( settings.n_mondrians) * 1.0 / settings.n_mondrians pred_forest_test, metrics_test = \ mf.evaluate_predictions(data, data['x_test'], data['y_test'], \ settings, param, weights_prediction, False) name_metric = settings.name_metric # acc or mse metric_test = metrics_test[name_metric] tree_numleaves = np.zeros(settings.n_mondrians) for i_t, tree in enumerate(mf.forest): tree_numleaves[i_t] = len(tree.leaf_nodes) forest_numleaves = np.mean(tree_numleaves) f_stats = open( '/home/alberto/tesi/mondrianforest/src/results/statistics.txt', 'w') print '%s\t\tnum_leaves' % (name_metric) f_stats.write(str(name_metric) + ' : ' + str(metric_test)) f_stats.write('\n') f_stats.write('num_leaves : ' + str(forest_numleaves)) f_stats.write('\n') f_stats.write('\n') print '%.3f\t\t%.3f' % (metric_test, forest_numleaves) print '\nFinal forest stats:' f_stats.write('Final forest stats: \n') tree_stats = np.zeros((settings.n_mondrians, 2)) tree_average_depth = np.zeros(settings.n_mondrians) for i_t, tree in enumerate(mf.forest): tree_stats[i_t, -2:] = np.array( [len(tree.leaf_nodes), len(tree.non_leaf_nodes)]) tree_average_depth[i_t] = tree.get_average_depth(settings, data)[0] print 'mean(num_leaves) = %.1f, mean(num_non_leaves) = %.1f, mean(tree_average_depth) = %.1f' \ % (np.mean(tree_stats[:, -2]), np.mean(tree_stats[:, -1]), np.mean(tree_average_depth)) print 'n_train = %d, log_2(n_train) = %.1f, mean(tree_average_depth) = %.1f +- %.1f' \ % (data['n_train'], np.log2(data['n_train']), np.mean(tree_average_depth), np.std(tree_average_depth)) f_stats.write('mean(num_leaves) = ' + str(np.mean(tree_stats[:, -2]))) f_stats.write(' mean(num_non_leaves) = ' + str(np.mean(tree_stats[:, -1]))) f_stats.write(' mean(tree_average_depth) = ' + str(np.mean(tree_average_depth)) + '\n') f_stats.write('n_train = ' + str(data['n_train'])) f_stats.write(' log_2(n_train) = ' + str(np.log2(data['n_train']))) f_stats.write(' mmean(tree_average_depth) = ' + str(np.mean(tree_average_depth)) + ' +- ' + str(np.std(tree_average_depth)) + '\n') f_stats.write( '\n------------------------------------------------------------------\n' ) print '\n...evaluation done.' print 'Computing confusion matrices...' uf_dir = settings.data_path + '/unary_csv' labels_dir = settings.data_path + '/labels_csv' cm_res_dir = '../results/cm' for file_name in test_list: curr_uf_csv = uf_dir + '/' + file_name curr_lables_csv = labels_dir + '/' + file_name x_df = pd.read_csv(curr_uf_csv, usecols=unary_features) y_df = pd.read_csv(curr_lables_csv, dtype=int) x_test = x_df.to_numpy() y_test = y_df.to_numpy() y_test.shape = (y_test.shape[0], ) if settings.normalize_features == 1: min_d = np.minimum(np.min(data['x_train'], 0), np.min(data['x_test'], 0)) max_d = np.maximum(np.max(data['x_train'], 0), np.max(data['x_test'], 0)) range_d = max_d - min_d idx_range_d_small = range_d <= 0. # find columns where all features are identical if data['n_dim'] > 1: range_d[ idx_range_d_small] = 1e-3 # non-zero value just to prevent division by 0 elif idx_range_d_small: range_d = 1e-3 x_test -= min_d + 0. x_test /= range_d cm_weights_prediction = np.ones( settings.n_mondrians) * 1.0 / settings.n_mondrians cm_pred_forest_test, cm_metrics_test = \ mf.evaluate_predictions(data, x_test, y_test, \ settings, param, weights_prediction, False) #y_test_pred = get_y_pred(cm_pred_forest_test['pred_prob']) y_test_pred = get_label_predictions(cm_pred_forest_test['pred_prob']) f_stats.write(str(file_name) + '\n') print 'y_test' print y_test[:25] print 'y_test_pred' print y_test_pred[:25] f_stats.write('\n y_test y_mf_pred: \n') for x in range(25): if (y_test[x] < 10): f_stats.write('# ' + str(y_test[x]) + ' # ' + str(y_test_pred[x]) + '\n') else: f_stats.write('# ' + str(y_test[x]) + ' # ' + str(y_test_pred[x]) + '\n') f_stats.write('\n---------------------------------------\n') cm = compute_confusion_matrix(y_test, y_test_pred, print_cm=False) cm_path = cm_res_dir + '/' + file_name #np.savetxt(cm_path, cm, delimiter=",") # SAVE PREDICTIONS ON CORRESPONDING PCD '''end_idx = file_name.rfind('.') input_path = '/home/alberto/tesi/dataset/NYUDV2/trained_semseg_data/clustering/' pcd_name = file_name[0:end_idx] + '.pcd' print pcd_name pcd_path = input_path + pcd_name input_cloud = pypcd.PointCloud.from_path(pcd_path) point_x_list = input_cloud.pc_data['x'] point_y_list = input_cloud.pc_data['y'] point_z_list = input_cloud.pc_data['z'] cluster_idx_list = input_cloud.pc_data['label'] new_cloud = input_cloud.pc_data.copy new_cloud = input_cloud.pc_data.view(np.float32).reshape(input_cloud.pc_data.shape + (-1,)) print 'Cluster cloud shape:' print new_cloud.shape print 'Cluster label length: %d' % (len(cluster_idx_list)) for n in range(new_cloud.shape[0]): new_cloud[n][0] = point_x_list[n] new_cloud[n][1] = point_y_list[n] new_cloud[n][2] = point_z_list[n] if(cluster_idx_list[n] > 4000): new_cloud[n][3] = 0 else: new_cloud[n][3] = y_test_pred[cluster_idx_list[n]] #res_pcd = pypcd.make_xyz_rgb_point_cloud(new_cloud) res_pcd = pypcd.make_xyz_label_point_cloud(new_cloud) output_path = '/home/alberto/tesi/mondrianforest/src/results/pcd/'+pcd_name res_pcd.save(output_path)''' print '...computation done.\n END.' f_stats.close()
#!/usr/bin/env python import numpy as np import pprint as pp # pretty printing module from matplotlib import pyplot as plt # required only for plotting results from mondrianforest_utils import load_data, reset_random_seed, precompute_minimal from mondrianforest import process_command_line, MondrianForest settings = process_command_line() print 'Current settings:' pp.pprint(vars(settings)) # Resetting random seed reset_random_seed(settings) # Loading data data = load_data(settings) print "Data: ", data print type(settings) param, cache = precompute_minimal(data, settings) mf = MondrianForest(settings, data) data['x_test'] print(data) train_ids_current_minibatch = data['train_ids_partition']['current'][0] print train_ids_current_minibatch.shape print "First batch train on 5 data points" mf.fit(data, train_ids_current_minibatch[0:5], settings, param, cache)
#!/usr/bin/env python import numpy as np import pprint as pp # pretty printing module from matplotlib import pyplot as plt # required only for plotting results from mondrianforest_utils import load_data, reset_random_seed, precompute_minimal from mondrianforest import process_command_line, MondrianForest PLOT = False settings = process_command_line() print 'Current settings:' pp.pprint(vars(settings)) # Resetting random seed reset_random_seed(settings) # Loading data data = load_data(settings) param, cache = precompute_minimal(data, settings) mf = MondrianForest(settings, data) print '\nminibatch\tmetric_train\tmetric_test\tnum_leaves' for idx_minibatch in range(settings.n_minibatches): train_ids_current_minibatch = data['train_ids_partition']['current'][idx_minibatch] if idx_minibatch == 0: # Batch training for first minibatch mf.fit(data, train_ids_current_minibatch, settings, param, cache)
def main(): # Import settings from command line settings = process_command_line() print 'Current settings:' pp.pprint(vars(settings)) # Resetting random seed reset_random_seed(settings) # Loading data data = load_dataset(settings) param, cache = precompute_minimal(data,settings) mf = MondrianForest(settings, data) print '\nminibatch\tmetric_train\tmetric_test\tnum_leaves' for idx_minibatch in range(settings.n_minibatches): train_ids_current_minibatch = data['train_ids_partition']['current'][idx_minibatch] if idx_minibatch == 0: # Batch training for first minibatch mf.fit(data, train_ids_current_minibatch, settings, param, cache) else: # Online update mf.partial_fit(data, train_ids_current_minibatch, settings, param, cache) # Evaluate weights_prediction = np.ones(settings.n_mondrians) * 1.0 / settings.n_mondrians train_ids_cumulative = data['train_ids_partition']['cumulative'][idx_minibatch] pred_forest_train, metrics_train = \ mf.evaluate_predictions(data, data['x_train'][train_ids_cumulative, :], \ data['y_train'][train_ids_cumulative], \ settings, param, weights_prediction, False) pred_forest_test, metrics_test = \ mf.evaluate_predictions(data, data['x_test'], data['y_test'], \ settings, param, weights_prediction, False) name_metric = settings.name_metric # acc or mse metric_train = metrics_train[name_metric] metric_test = metrics_test[name_metric] tree_numleaves = np.zeros(settings.n_mondrians) for i_t, tree in enumerate(mf.forest): tree_numleaves[i_t] = len(tree.leaf_nodes) forest_numleaves = np.mean(tree_numleaves) print '%9d\t%.3f\t\t%.3f\t\t%.3f' % (idx_minibatch, metric_train, metric_test, forest_numleaves) print 'length of y_test' print data['y_test'].shape y_test_pred = get_y_pred(pred_forest_test['pred_prob']) print 'lenght of y_test_pred:' print y_test_pred.shape for x in range(0,len(y_test_pred)): print 'label: %d mf prediction: %d' % (data['y_test'][x], y_test_pred[x]) cm = confusion_matrix(data['y_test'], y_test_pred) # Show confusion matrix in a separate window plt.matshow(cm) plt.title('Confusion matrix') plt.colorbar() plt.ylabel('True label') plt.xlabel('Predicted label') plt.show() print '\nFinal forest stats:' tree_stats = np.zeros((settings.n_mondrians, 2)) tree_average_depth = np.zeros(settings.n_mondrians) for i_t, tree in enumerate(mf.forest): tree_stats[i_t, -2:] = np.array([len(tree.leaf_nodes), len(tree.non_leaf_nodes)]) tree_average_depth[i_t] = tree.get_average_depth(settings, data)[0] print 'mean(num_leaves) = %.1f, mean(num_non_leaves) = %.1f, mean(tree_average_depth) = %.1f' \ % (np.mean(tree_stats[:, -2]), np.mean(tree_stats[:, -1]), np.mean(tree_average_depth)) print 'n_train = %d, log_2(n_train) = %.1f, mean(tree_average_depth) = %.1f +- %.1f' \ % (data['n_train'], np.log2(data['n_train']), np.mean(tree_average_depth), np.std(tree_average_depth))