def rf_predict_actual(data, n_estimators): # generate the features and targets features, targets = generate_features_targets(data) # instantiate a random forest classifier rfc = RandomForestClassifier(n_estimators=n_estimators) # get predictions using 10-fold cross validation with cross_val_predict predicted = cross_val_predict(rfc, features, targets, cv=10) # return the predictions and their actual classes return predicted, targets if __name__ == "__main__": data = np.load('galaxy_catalogue.npy') features, targets = generate_features_targets(data) # Print the shape of each array to check the arrays are the correct dimensions. print("Features shape:", features.shape) print("Targets shape:", targets.shape) # fraction of data which should be in the training set fraction_training = 0.7 # split the data using your function training, testing = splitdata_train_test(data, fraction_training) # print the key values print('Number data galaxies:', len(data)) print('Train fraction:', fraction_training) print('Number of galaxies in training set:', len(training)) print('Number of galaxies in testing set:', len(testing)) predicted_class, actual_class = dtc_predict_actual(data) # Print some of the initial results print("Some initial results...\n predicted, actual") for i in range(10): print("{}. {}, {}".format(i, predicted_class[i], actual_class[i])) # get the predicted and actual classes number_estimators = 50 # Number of trees predicted, actual = rf_predict_actual(data, number_estimators) # calculate the model score using your function accuracy = calculate_accuracy(predicted, actual) print("Accuracy score:", accuracy)
features, targets = generate_features_targets(data) # instantiate a random forest classifier using n estimators rfc = RandomForestClassifier(n_estimators=n_estimators) # get predictions using 10-fold cross validation with cross_val_predict predictions = cross_val_predict(rfc, features, targets, cv=10) # return the predictions and their actual classes return predictions, targets if __name__ == "__main__": data = np.load('galaxy_data.npy') # get the predicted and actual classes number_estimators = 50 # Number of trees predicted, actual = rf_predict_actual(data, number_estimators) # calculate the model score using your function accuracy = calculate_accuracy(predicted, actual) print("Accuracy score:", accuracy) # calculate the models confusion matrix using sklearns confusion_matrix function class_labels = list(set(actual)) model_cm = confusion_matrix(y_true=actual, y_pred=predicted, labels=class_labels) # plot the confusion matrix using the provided functions. plt.figure() plot_confusion_matrix(model_cm, classes=class_labels, normalize=False) plt.show()