def get_tools_sorted_indexes(tools_predictions_list, labels_list): ''' Returns the indexes of the tools, sorted according to their performances example: if tool 0 is better than tool 1 which is better that tool 2 then return [2, 1, 0] args: tools_predictions_list: a list containing the tools predictions for a set of systems labels_list: a list containing the labels for this same set of systems ''' assert len(tools_predictions_list) == len(labels_list) # Get overall tools' predictions and labels overall_labels = reduce(lambda x1, x2: np.concatenate((x1, x2), axis=0), labels_list) overall_tools_predictions = reduce( lambda x1, x2: np.concatenate((x1, x2), axis=1), tools_predictions_list) # Compute overall preformances for each tool overall_tools_performances = [ detection_utils.mcc(pred, overall_labels) for pred in overall_tools_predictions ] return np.argsort(np.array(overall_tools_performances))
else: core_metrics[system] = hist_cm.getFECoreMetrics(system) params = np.arange(0.0, 20.0, 0.5)/100. if args.antipattern == 'god_class' else np.arange(100., 300., 5)/100. # Initialize progressbar bar = progressbar.ProgressBar(maxval=len(params), \ widgets=['Tuning HIST parameters for ' + args.test_system + ': ' ,progressbar.Percentage()]) bar.start() perfs = [] for i, alpha in enumerate(params): bar.update(i) overall_prediction = np.empty(shape=[0, 1]) overall_labels = np.empty(shape=[0, 1]) for system in systems: smells = [entityName for entityName, ratio in core_metrics[system].items() if ratio[0]>alpha] prediction = detection_utils.predictFromDetect(args.antipattern, system, smells) overall_prediction = np.concatenate((overall_prediction, prediction), axis=0) overall_labels = np.concatenate((overall_labels, detection_utils.getLabels(args.antipattern, system)), axis=0) perfs.append(detection_utils.mcc(overall_prediction, overall_labels)) bar.finish() output_file_path = os.path.join(ROOT_DIR, 'experiments', 'tuning', 'results', 'hist', args.antipattern, args.test_system + '.csv') indexes = np.argsort(np.array(perfs)) with open(output_file_path, 'w') as file: file.write("Alpha;MCC\n") for i in reversed(indexes): file.write("{0:.3f};{1}\n".format(params[i], perfs[i]))
y_train=y_train, x_test=x_test, y_test=y_test, num_step=args.n_step, start_lr=args.learning_rate, beta=args.beta, gamma=args.gamma, decay_step=args.decay_step, lr_decay=args.lr_decay) all_losses_train.append(losses_train) all_losses_test.append(losses_test) # Save the model saver.save(sess=session, save_path=smad_utils.get_save_path(args.antipattern, args.test_system, i)) # Compute the ensemble prediction on the test system ensemble_prediction = smad_utils.ensemble_prediction( model=model, save_paths=[smad_utils.get_save_path(args.antipattern, args.test_system, i) for i in range(args.n_net)], input_x=x_test) # Print Ensemble performances print("\nPerformances on " + args.test_system + ": ") print('Precision: ' + str(detection_utils.precision(ensemble_prediction, y_test))) print('Recall : ' + str(detection_utils.recall(ensemble_prediction, y_test))) print('MCC : ' + str(detection_utils.mcc(ensemble_prediction, y_test))) # Plot learning curves smad_utils.plot_learning_curves(all_losses_train, all_losses_test)
shape=dense_sizes, input_size=x_train.shape[-1]) with tf.Session() as session: # Initialize the variables of the TensorFlow graph. session.run(tf.global_variables_initializer()) # Train the model train( session=session, model=model, x_train=x_train, y_train=y_train, num_step=args.n_step, lr=learning_rate, beta=beta, gamma=gamma) pred_overall = np.concatenate((pred_overall, session.run(model.inference, feed_dict={model.input_x: x_valid})), axis=0) labels_overall = np.concatenate((labels_overall, y_valid), axis=0) perfs.append(detection_utils.mcc(pred_overall, labels_overall)) indexes = np.argsort(np.array(perfs)) with open(output_file_path, 'w') as file: file.write("Learning rate;Beta;Gamma;Dense sizes;MCC\n") for j in reversed(indexes): for k in range(len(params[j])): file.write(str(params[j][k]) + ';') file.write(str(perfs[j]) + '\n') bar.update(i+1) bar.finish()
overall_prediction_asci = np.concatenate( (overall_prediction_asci, prediction_asci), axis=0) # Compute performances for SMAD prediction_smad = smad.predict('feature_envy', system) overall_prediction_smad = np.concatenate( (overall_prediction_smad, prediction_smad), axis=0) # Print performances for the considered system print(system) print(' |precision |recall |mcc') print('-------------------------------------------') print('InCode |{0:.3f} |{1:.3f} |{2:.3f}'.format( detection_utils.precision(prediction_incode, labels), detection_utils.recall(prediction_incode, labels), detection_utils.mcc(prediction_incode, labels))) print('-------------------------------------------') print('HIST |{0:.3f} |{1:.3f} |{2:.3f}'.format( detection_utils.precision(prediction_hist, labels), detection_utils.recall(prediction_hist, labels), detection_utils.mcc(prediction_hist, labels))) print('-------------------------------------------') print('JDeodorant |{0:.3f} |{1:.3f} |{2:.3f}'.format( detection_utils.precision(prediction_jd, labels), detection_utils.recall(prediction_jd, labels), detection_utils.mcc(prediction_jd, labels))) print('-------------------------------------------') print('Vote |{0:.3f} |{1:.3f} |{2:.3f}'.format( detection_utils.precision(prediction_vote, labels), detection_utils.recall(prediction_vote, labels), detection_utils.mcc(prediction_vote, labels)))
predictions = np.zeros((args.n_tree, x_test.shape[0], 1)) for i in range(args.n_tree): clf = tree.DecisionTreeClassifier( min_samples_split=args.min_samples_split, max_features=args.max_features, max_depth=args.max_depth, min_samples_leaf=args.min_samples_leaf) clf = clf.fit(x_train, y_train_asci) # Save the tree with open( asci_utils.get_save_path(args.antipattern, args.test_system, i), 'wb') as save_file: pickle.dump(clf, save_file) # Compute the prediction of the current tree predicted_tool_indexes = clf.predict(x_test) for j, tool_index in enumerate(predicted_tool_indexes): predictions[i, j, 0] = tools_predictions_test[tool_index][j] ensemble_prediction = np.mean(predictions, axis=0) # Print Ensemble performances print("\nPerformances on " + args.test_system + ": ") print('Precision: ' + str(detection_utils.precision(ensemble_prediction, y_test))) print('Recall : ' + str(detection_utils.recall(ensemble_prediction, y_test))) print('MCC : ' + str(detection_utils.mcc(ensemble_prediction, y_test)))