def draw_distribution_graph(column, name, mean, median, mode): column.sort() graph.hist(column, bins=100, density=True, alpha=constant.SCATTER_ALPHA, color=constant.SCATTER_COLOUR) x = np.linspace(graph.xlim()[0], graph.xlim()[1], 100) graph.xlabel(name) # distributions p = norm.pdf(x, mean, MathsUtil.arr_standard_deviation(column, mean)) graph.plot(x, p, color=constant.DISTRIBUTION_MEAN_COLOUR) p = norm.pdf(x, median, MathsUtil.arr_standard_deviation(column, median)) graph.plot(x, p, color=constant.DISTRIBUTION_MEDIAN_COLOUR) p = norm.pdf(x, mode, MathsUtil.arr_standard_deviation(column, mode)) graph.plot(x, p, color=constant.DISTRIBUTION_MODE_COLOUR) # averages graph.axvline(mean, color=constant.DISTRIBUTION_MEAN_COLOUR, linestyle=constant.DISTRIBUTION_MEAN_LINE_STYLE) graph.axvline(median, color=constant.DISTRIBUTION_MEDIAN_COLOUR, linestyle=constant.DISTRIBUTION_MEDIAN_LINE_STYLE) graph.axvline(mode, color=constant.DISTRIBUTION_MODE_COLOUR, linestyle=constant.DISTRIBUTION_MODE_LINE_STYLE) # title graph.legend({"Mean": mean, "Median": median, "Mode": mode}) mean_title = f"mean={mean:.2f}, variance={MathsUtil.arr_variance(column, mean):.2f} std={MathsUtil.arr_standard_deviation(column, mean):.2f}" median_title = f"median={median:.2f}, variance={MathsUtil.arr_variance(column, median):.2f} std={MathsUtil.arr_standard_deviation(column, median):.2f}" mode_title = f"mode={mode:.2f}, variance={MathsUtil.arr_variance(column, mode):.2f} std={MathsUtil.arr_standard_deviation(column, mode):.2f}" graph.title(f"Distribution: {mean_title}\n{median_title}\n{mode_title}")
def export_normal_distribution(self, column, date): path = DataFrame.get_export_path(date) mean = MathsUtil.arr_mean(self.data[self.headers[column]]) median = MathsUtil.arr_median(self.data[self.headers[column]]) mode = MathsUtil.arr_mode(self.data[self.headers[column]]) GraphUtils.export_distribution_graph(self.data[self.headers[column]], self.headers[column], mean, median, mode, path) print(f"exported {path}dist_{self.headers[column]}")
def print_deviation_calculations(self, column): rows = self.data[self.headers[column]] DataFrame.print_deviation_calculation("Mean", rows, MathsUtil.arr_mean(rows)) DataFrame.print_deviation_calculation("Median", rows, MathsUtil.arr_median(rows)) DataFrame.print_deviation_calculation("Mode", rows, MathsUtil.arr_mode(rows))
def run_linear_regression(self, x_axis, y_axis): self.x_axis = self.headers[x_axis] self.y_axis = self.headers[y_axis] self.slope = MathsUtil.estimate_slope(self.data[self.x_axis], self.data[self.y_axis]) self.y_intercept = MathsUtil.estimate_y_intercept(self.data[self.x_axis], self.data[self.y_axis], self.slope) self.predicted_y = MathsUtil.num_plus_arr(self.y_intercept, MathsUtil.num_by_arr(self.slope, self.data[self.x_axis]))
def test_sum_of_array(self): self.assertEqual(MathsUtil.arr_sum([1, 2, 3]), 6) self.assertEqual(MathsUtil.arr_sum([1, -2, -3]), -4) self.assertEqual(MathsUtil.arr_sum([1, 2.6, 3.07]), 6.67)
def test_estimate_y_intercept(self): self.assertEqual(MathsUtil.estimate_y_intercept([1, 2, 3], [1, 2, 3], 1), 0)
def test_arr_standard_deviation(self): self.assertEqual(MathsUtil.arr_standard_deviation([3, 9], MathsUtil.arr_mean([3, 9])), 3)
def test_array_by_array(self): self.assertEqual(MathsUtil.arr_by_arr([1, 2, 3], [1, 0, 3]), [1, 0, 9]) self.assertEqual(MathsUtil.arr_by_arr([-1, -2, -3], [-1, 2, 3]), [1, -4, -9]) self.assertEqual(MathsUtil.arr_by_arr([2, 2.6, 3.02], [1.5, 2, 3]), [3, 5.2, 9.06])
def test_num_sqrt(self): self.assertEqual(MathsUtil.num_sqrt(25), 5) self.assertEqual(MathsUtil.num_sqrt(100), 10) self.assertEqual(MathsUtil.num_sqrt(9), 3)
def test_arr_variance(self): self.assertEqual(MathsUtil.arr_variance([1, 2, 3, 4], MathsUtil.arr_mean([1, 2, 3, 4])), 1.25) self.assertEqual(MathsUtil.arr_variance([-10, 1, 2, 3, 4], MathsUtil.arr_mean([-10, 1, 2, 3, 4])), 26)
def test_number_plus_array(self): self.assertEqual(MathsUtil.num_plus_arr(0, [1, 2, 3]), [1, 2, 3]) self.assertEqual(MathsUtil.num_plus_arr(2, [-1, -2, -3]), [1, 0, -1]) self.assertEqual(MathsUtil.num_plus_arr(-1, [-1, -2, -3]), [-2, -3, -4]) self.assertEqual(MathsUtil.num_plus_arr(2.5, [1.5, 2.4, 3.05]), [4, 4.9, 5.55])
def test_number_by_array(self): self.assertEqual(MathsUtil.num_by_arr(0, [1, 2, 3]), [0, 0, 0]) self.assertEqual(MathsUtil.num_by_arr(2, [-1, -2, -3]), [-2, -4, -6]) self.assertEqual(MathsUtil.num_by_arr(-1, [-1, -2, -3]), [1, 2, 3]) self.assertEqual(MathsUtil.num_by_arr(2.5, [1.5, 2.4, 3.05]), [3.75, 6, 7.625])
def test_array_by_array(self): self.assertEqual(MathsUtil.arr_by_arr([1, 2, 3], [1, 0, 3, 5]), []) self.assertEqual(MathsUtil.arr_by_arr([-1, -2, -3, 4], [-1, 2, 3]), [])
def print_deviation_calculation(average_type, rows, average): variance = MathsUtil.arr_variance(rows, average) standard_deviation = MathsUtil.arr_standard_deviation(rows, average) print(f"For {average_type}: The Standard Deviation is {standard_deviation:.2f}, The Variance is {variance:.2f} and the {average_type} in {average:.2f}")
def test_estimate_slope(self): self.assertEqual(MathsUtil.estimate_slope([1, 2, 3], [1, 2, 3]), 1)
def plot_normal_distribution(self, column): mean = MathsUtil.arr_mean(self.data[self.headers[column]]) median = MathsUtil.arr_median(self.data[self.headers[column]]) mode = MathsUtil.arr_mode(self.data[self.headers[column]]) GraphUtils.show_distribution_graph(self.data[self.headers[column]], self.headers[column], mean, median, mode)
def test_mean_of_array(self): self.assertEqual(MathsUtil.arr_mean([1, 2, 3]), 2) self.assertEqual(MathsUtil.arr_mean([14, -2, -3]), 3) self.assertEqual(MathsUtil.arr_mean([1, 2.6, 3.07, 2.3524]), 2.2556)