def helper_extract_leading_digits_payments(num_payments, col, num_digits): prefix = TEST_DATA_DIR + "/payments_" + str(num_payments) + "_" input_filename = prefix+"input.csv" amounts = read_column_from_csv(input_filename, col, True) amounts_copy = amounts[:] expected_filename = prefix + str(num_digits) + "_leading_digits.txt" expected = [] for x in read_column_from_csv(expected_filename, 0, True): expected.append(int(x)) leading_digits = extract_leading_digits_from_list(amounts, num_digits) assert leading_digits == expected assert amounts == amounts_copy, "Do not change the list that is passed to your function!"
def go(): usage = "usage: python benford.py <input filename> <column number> <num digits>" if len(sys.argv) != 4: print(usage) else: input_filename = sys.argv[1] if not os.path.isfile(input_filename): print(usage) print("error: file not found: {}".format(input_filename)) return # convert column number argument to an integer try: col_num = int(sys.argv[2]) except ValueError: s = "error: column number must be an integer: {}" print(usage) print(s.format(sys.argv[2])) return data = util.read_column_from_csv(input_filename, col_num, True) # convert number of digits argument to an integer try: num_digits = int(sys.argv[3]) except ValueError: s = "error: number of digits must be an integer: {}".format(sys.argv[3]) print(usage) print(s.format(sys.argv[3])) return plot_benford_dist(data, num_digits) # print only four digits after the decimal point print("MAD: {:.4}".format(compute_benford_MAD(data, num_digits)))
def compare_actual_expected_from_file(actual, expected_filename): # get expected list of values from the file expected = [] for x in read_column_from_csv(expected_filename, 0, True): expected.append(float(x)) if not isinstance(actual, list): s = "Actual value returned from the function must be a list of floats." pytest.fail(s) if len(actual) > 0 and isinstance(actual[0], list): s = ( "Actual value returned from the function must be a list of floats," " not a list containing a list.") pytest.fail(s) if len(actual) != len(expected): s = "Length of expected ({0}) and actual results ({1}) do not match" pytest.fail(s.format(len(expected), len(actual))) for i in range(len(actual)): # stored and computed representations may not be identical if abs(expected[i] - actual[i]) > EPS: s = "actual and expected values do not match at element {0}" pytest.fail(s.format(i))
def helper_test_compute_benford_MAD(prefix, col, num_digits): input_filename = prefix + "input.csv" amounts = read_column_from_csv(input_filename, col, True) actual = compute_benford_MAD(amounts, num_digits) expected_filename = prefix + "computed_benford_mad_{0}_output.txt".format(num_digits) compare_actual_expected_from_file([actual], expected_filename)
def compare_actual_expected_from_file(actual, expected_filename): # get expected list of values from the file expected = [] for x in read_column_from_csv(expected_filename, 0, True): expected.append(float(x)) compare_actual_expected(actual, expected)
def helper_test_compute_benford_dist(prefix, col, num_digits): input_filename = prefix + "input.csv" amounts = read_column_from_csv(input_filename, col, True) amounts_copy = amounts[:] actual = compute_benford_dist(amounts, num_digits) expected_filename = prefix + "computed_benford_dist_{0}_output.txt".format(num_digits) compare_actual_expected_from_file(actual, expected_filename) assert amounts == amounts_copy, "Do not change the list that is passed to your function!"
def helper_test_compute_benford_dist(prefix, col, num_digits): input_filename = prefix + "input.csv" amounts = read_column_from_csv(input_filename, col, True) expected_filename = prefix + "computed_benford_dist_{0}_output.txt".format( num_digits) amounts_copy = amounts[:] actual = compute_benford_dist(amounts, num_digits) compare_actual_expected_from_file(actual, expected_filename) if amounts != amounts_copy: pytest.fail("Do not change the list that is passed to your function!")
def helper_test_compute_benford_dist(prefix, col, currency_symbol, num_digits): input_filename = os.path.join(TEST_DATA_DIR, prefix + "input.csv") amounts = read_column_from_csv(input_filename, col, True) expected_filename = os.path.join( TEST_DATA_DIR, prefix + "computed_benford_dist_{0}_output.txt".format(num_digits)) amounts_copy = amounts[:] actual = benford.compute_benford_dist(currency_symbol, amounts, num_digits) compare_actual_expected_from_file(actual, expected_filename) if amounts != amounts_copy: pytest.fail("Do not change the list that is passed to your function!")
def helper_test_compute_benford_MAD(prefix, col, currency_symbol, num_digits): input_filename = prefix + "input.csv" amounts = read_column_from_csv(input_filename, col, True) amounts_copy = amounts[:] actual = benford.compute_benford_MAD(currency_symbol, amounts, num_digits) if amounts != amounts_copy: pytest.fail("Do not change the list that is passed to your function!") # get expected list of values from the file expected_filename = prefix + "computed_benford_mad_{0}_output.txt".format( num_digits) expected = float(read_column_from_csv(expected_filename, 0, True)[0]) if not isinstance(actual, float): s = "Actual value returned from the function must be a float." pytest.fail(s) if abs(expected - actual) > EPS: s = "actual ({:f}) and expected ({:f}) values do not match" pytest.fail(s.format(actual, expected)) compare_actual_expected_from_file([actual], expected_filename)
def go(): ''' Process the arguments and do the work. ''' usage = ("usage: python benford.py <input filename> <column number>" "<currency symbol> <num digits>") if len(sys.argv) < 5 or len(sys.argv) > 6: print(usage) return input_filename = sys.argv[1] if not os.path.isfile(input_filename): print(usage) print("error: file not found: {}".format(input_filename)) return # convert column number argument to an integer try: col_num = int(sys.argv[2]) except ValueError: s = "error: column number must be an integer: {}" print(usage) print(s.format(sys.argv[2])) return data = util.read_column_from_csv(input_filename, col_num, True) currency_symbol = sys.argv[3] # convert number of digits argument to an integer try: num_digits = int(sys.argv[4]) except ValueError: s = "error: number of digits must be an integer: {}".format(sys.argv[4]) print(usage) print(s.format(sys.argv[4])) return # grab the name for the PNG file, if exists. if len(sys.argv) == 5: output_filename = None else: output_filename = sys.argv[5] plot_benford_dist(currency_symbol, data, num_digits, output_filename) # print only four digits after the decimal point print("MAD: {:.4}".format(compute_benford_MAD(currency_symbol, data, num_digits)))
plt.xticks(range(lb, ub, 10**(num_digits-1))) # compute limits for the y axis max_val = max(max(expected), max(actual)) y_ub = max_val + max_val * .1 plt.ylim(0,y_ub) # add labels plt.title("Actual (blue) and expected (red) Benford distributions") if num_digits ==1: plt.xlabel("Leading digit") else: plt.xlabel("Leading digits") plt.ylabel("Proportion") plt.savefig(output_filename) if __name__=="__main__": if len(sys.argv) != 5: print("usage: python benford.py <input filename> <column number> <num digits> <output filename>") else: input_filename = sys.argv[1] data = util.read_column_from_csv(input_filename, int(sys.argv[2]), True) num_digits = int(sys.argv[3]) output_filename = sys.argv[4] plot_benford_dist(data, num_digits, output_filename) # print only four digits after the decimal point print("MAD: {0:.4}".format(benford.compute_benford_MAD(data, num_digits)))