def test_csv_required_fieldname_missing(): # required file name test contents = "A,B,C\n1,2,3\n4,5" fieldnames = ["A", "B", "C"] missing_fieldnames = set(["D"]) required_fieldnames = ['A', 'B', 'C', 'D'] with open(test_filename, "w") as f: f.write(contents) f = StringIO() with redirect_stdout(f): try: read_csv_file(test_filename, required_fieldnames=required_fieldnames) except SystemExit: pass s = f.getvalue() lines = [ "FATAL ERROR: File {} has fieldnames {}, while {} are required. Missing {}.\n" .format(test_filename, fieldnames, required_fieldnames, missing_fieldnames) ] assert s == "\n".join(lines) os.remove(test_filename)
def test_csv_duplicate_field(): contents = 'A,B,B\n1,2,3\n4,5' duplicate_fieldnames = ['A', 'B', 'B'] with open(test_filename, 'w') as f: f.write(contents) try: read_csv_file(test_filename) except ValueError as e: assert str(e) == 'Duplicate field name: {}'.format( duplicate_fieldnames) os.remove(test_filename)
def read_election_spec_contests(e): """ Read file election-spec-contests.csv, put results into Election e. """ election_pathname = os.path.join(OpenAuditTool.ELECTIONS_ROOT, e.election_dirname) spec_pathname = os.path.join(election_pathname, "1-election-spec") filename = utils.greatest_name(spec_pathname, "election-spec-contests", ".csv") file_pathname = os.path.join(spec_pathname, filename) fieldnames = [ "Contest", "Contest type", "Params", "Write-ins", "Selections" ] rows = csv_readers.read_csv_file(file_pathname, fieldnames, varlen=True) for row in rows: cid = row["Contest"] e.cids.append(cid) e.contest_type_c[cid] = row["Contest type"].lower() e.params_c[cid] = row["Params"] e.write_ins_c[cid] = row["Write-ins"].lower() e.selids_c[cid] = {} for selid in row["Selections"]: e.selids_c[cid][selid] = True
def read_reported_cvrs(e): """ Read reported votes 22-reported-cvrs/reported-cvrs-PBCID.csv. """ election_pathname = os.path.join(multi.ELECTIONS_ROOT, e.election_dirname) specification_pathname = os.path.join(election_pathname, "2-reported", "22-reported-cvrs") fieldnames = [ "Collection", "Scanner", "Ballot id", "Contest", "Selections" ] for pbcid in e.pbcids: safe_pbcid = ids.filename_safe(pbcid) filename = utils.greatest_name(specification_pathname, "reported-cvrs-" + safe_pbcid, ".csv") file_pathname = os.path.join(specification_pathname, filename) rows = csv_readers.read_csv_file(file_pathname, fieldnames, varlen=True) for row in rows: pbcid = row["Collection"] scanner = row["Scanner"] bid = row["Ballot id"] cid = row["Contest"] vote = row["Selections"] vote = tuple(sorted(vote)) # put vote selids into canonical order utils.nested_set(e.rv_cpb, [cid, pbcid, bid], vote) utils.nested_set(e.votes_c, [cid, vote], True)
def test_csv_duplicate_field(): contents = "A,B,B\n1,2,3\n4,5" with open(test_filename, 'w') as f: f.write(contents) f = StringIO() with redirect_stdout(f): try: read_csv_file(test_filename) except SystemExit: pass s = f.getvalue() lines = ["FATAL ERROR: Duplicate field name:{}\n".format(['A', 'B', 'B'])] assert s == "\n".join(lines) os.remove(test_filename)
def test_csv_extra_fieldnames2(self): to_in = [["A", "B", "C", "D"], [1, 2, 3, 4], [ 6, 7, 8, ], [3, 4, 5, 6, 2, 4]] create_csv(to_in) self.assertWarnsRegex(UserWarning, "extra fieldnames", csv_readers.read_csv_file, "example.csv", varlen=True, required_fieldnames=["C", "B"]) self.assertEqual( csv_readers.read_csv_file("example.csv", varlen=True, required_fieldnames=["C", "B"]), [{ 'A': '1', 'B': '2', 'C': '3', 'D': ('4', ) }, { 'A': '6', 'B': '7', 'C': '8', 'D': () }, { 'A': '3', 'B': '4', 'C': '5', 'D': ('6', '2', '4') }])
def read_audit_spec_contest(e, args): """ Read 3-audit/31-audit-spec/audit-spec-contest.csv """ election_pathname = os.path.join(OpenAuditTool.ELECTIONS_ROOT, e.election_dirname) audit_spec_pathname = os.path.join(election_pathname, "3-audit", "31-audit-spec") filename = utils.greatest_name(audit_spec_pathname, "audit-spec-contest", ".csv") file_pathname = os.path.join(audit_spec_pathname, filename) fieldnames = [ "Measurement id", "Contest", "Risk Measurement Method", "Risk Limit", "Risk Upset Threshold", "Sampling Mode", "Initial Status", "Param 1", "Param 2" ] rows = csv_readers.read_csv_file(file_pathname, fieldnames, varlen=False) logger.info("read_audit_spec_contest: e.mid: %s", e.mids) for row in rows: mid = row["Measurement id"] e.mids.append(mid) e.cid_m[mid] = row["Contest"] e.risk_method_m[mid] = row["Risk Measurement Method"] e.risk_limit_m[mid] = float(row["Risk Limit"]) e.risk_upset_m[mid] = float(row["Risk Upset Threshold"]) e.sampling_mode_m[mid] = row["Sampling Mode"] e.initial_status_m[mid] = row["Initial Status"] e.risk_measurement_parameters_m[mid] = (row["Param 1"], row["Param 2"])
def test_csv_one_entry_len(): # One header with one entry contents = 'A\n\n1' expected = [{'A': ''}, {'A': '1'}] with open(test_filename, 'w') as f: f.write(contents) assert read_csv_file(test_filename) == expected os.remove(test_filename)
def test_csv_blank_fields_len(): # Empty headers and empty entries contents = 'A,B,C,,,\n1,2,,,,' expected = [{'A': '1', 'B': '2', 'C': ''}] with open(test_filename, 'w') as f: f.write(contents) assert read_csv_file(test_filename) == expected os.remove(test_filename)
def test_csv_len(): # First sample given in csv_readers.py contents = 'A,B,C\n1,2,3\n4,5' expected = [{'A': '1', 'B': '2', 'C': '3'}, {'A': '4', 'B': '5', 'C': ''}] with open(test_filename, 'w') as f: f.write(contents) assert read_csv_file(test_filename) == expected os.remove(test_filename)
def test_csv_short_row_varlen(): contents = "A,B,C\n1\n1,2,3" expected = [{'A': '1', 'B': '2', 'C': '3'}] with open(test_filename, "w") as f: f.write(contents) f = StringIO() with redirect_stdout(f): try: read_csv_file(test_filename, varlen=True) except SystemExit: pass s = f.getvalue() lines = ["WARNING: Ignoring too-short row:{}\n".format(['1'])] assert s == "\n".join(lines) os.remove(test_filename)
def test_csv_base(self): to_in = [["A", "B", "C"], [1, 2, 3]] create_csv(to_in) self.assertEqual(csv_readers.read_csv_file("example.csv"), [{ 'A': '1', 'B': '2', 'C': '3' }])
def test_csv_extra_entries_len(): # non-varlen error when more entries than fields contents = "A,B\n1,2,3" expected = [{'A': '1', 'B': '2'}] # expected warning message - "Ignoring extra values in row:0" with open(test_filename, "w") as f: f.write(contents) assert read_csv_file(test_filename) == expected os.remove(test_filename)
def test_csv_required_fieldname_missing(): # required file name test contents = 'A,B,C\n1,2,3\n4,5' fieldnames = ['A', 'B', 'C'] missing_fieldnames = set(['D']) required_fieldnames = ['A', 'B', 'C', 'D'] with open(test_filename, 'w') as f: f.write(contents) try: read_csv_file(test_filename, required_fieldnames=required_fieldnames) except ValueError as e: assert str(e) == ( 'File {} has fieldnames {}, while {} are required. Missing {}.'. format(test_filename, fieldnames, required_fieldnames, missing_fieldnames)) os.remove(test_filename)
def read_reported_ballot_manifests(e): """ Read ballot manifest file 21-reported-ballot-manifests and expand rows if needed. """ election_pathname = os.path.join(multi.ELECTIONS_ROOT, e.election_dirname) specification_pathname = os.path.join(election_pathname, "2-reported", "21-reported-ballot-manifests") fieldnames = [ "Collection", "Box", "Position", "Stamp", "Ballot id", "Number of ballots", "Required Contests", "Possible Contests", "Comments" ] for pbcid in e.pbcids: safe_pbcid = ids.filename_safe(pbcid) filename = utils.greatest_name(specification_pathname, "manifest-" + safe_pbcid, ".csv") file_pathname = os.path.join(specification_pathname, filename) rows = csv_readers.read_csv_file(file_pathname, fieldnames, varlen=False) for row in rows: pbcid = row["Collection"] boxid = row["Box"] position = row["Position"] stamp = row["Stamp"] bid = row["Ballot id"] try: num = int(row["Number of ballots"]) except ValueError: utils.myerror( "Number {} of ballots not an integer.".format(num)) if num <= 0: utils.mywarning( "Number {} of ballots not positive.".format(num)) req = row["Required Contests"] poss = row["Possible Contests"] comments = row["Comments"] bids = utils.count_on(bid, num) stamps = utils.count_on(stamp, num) positions = utils.count_on(position, num) for i in range(num): # utils.nested_set(e.bids_p, [pbcid, bids[i]], True) if pbcid not in e.bids_p: e.bids_p[pbcid] = [] e.bids_p[pbcid].append(bids[i]) utils.nested_set(e.boxid_pb, [pbcid, bids[i]], boxid) utils.nested_set(e.position_pb, [pbcid, bids[i]], position[i]) utils.nested_set(e.stamp_pb, [pbcid, bids[i]], stamps[i]) utils.nested_set(e.required_gid_pb, [pbcid, bids[i]], req) utils.nested_set(e.possible_gid_pb, [pbcid, bids[i]], poss) utils.nested_set(e.comments_pb, [pbcid, bids[i]], comments)
def test_csv_blank(self): to_in = [["A", "B", "C"], [1, 2, 3], [4, 5, '']] create_csv(to_in) self.assertEqual(csv_readers.read_csv_file("example.csv"), [{ 'A': '1', 'B': '2', 'C': '3' }, { 'A': '4', 'B': '5', 'C': '' }])
def test_csv_varlen_true_blank_row(self): to_in = [["A", "B", "C"], [1, 2, 3], [], [4, 5, 6]] create_csv(to_in) self.assertEqual(csv_readers.read_csv_file("example.csv", varlen=True), [{ 'A': '1', 'B': '2', 'C': ('3', ) }, { 'A': '4', 'B': '5', 'C': ('6', ) }])
def read_reported_outcomes(e): election_pathname = os.path.join(multi.ELECTIONS_ROOT, e.election_dirname) specification_pathname = os.path.join(election_pathname, "2-reported") fieldnames = ["Contest", "Winner(s)"] filename = utils.greatest_name(specification_pathname, "23-reported-outcomes", ".csv") file_pathname = os.path.join(specification_pathname, filename) rows = csv_readers.read_csv_file(file_pathname, fieldnames, varlen=True) for row in rows: cid = row["Contest"] winners = row["Winner(s)"] utils.nested_set(e.ro_c, [cid], winners)
def test_csv_short_row_varlen(): contents = 'A,B,C\n1\n1,2,3' expected = [{'A': '1', 'B': '2', 'C': ('3', )}] short_row = ['1'] with open(test_filename, 'w') as f: f.write(contents) with warnings.catch_warnings(record=True) as w: assert expected == read_csv_file(test_filename, varlen=True) assert str( w[0].message) == "Ignoring too-short row: {}".format(short_row) os.remove(test_filename)
def read_audit_spec_collection(e, args): """ Read 3-audit/31-audit-spec/audit-spec-collection.csv """ election_pathname = os.path.join(multi.ELECTIONS_ROOT, e.election_dirname) audit_spec_pathname = os.path.join(election_pathname, "3-audit", "31-audit-spec") filename = utils.greatest_name(audit_spec_pathname, "audit-spec-collection", ".csv") file_pathname = os.path.join(audit_spec_pathname, filename) fieldnames = ["Collection", "Max audit rate"] rows = csv_readers.read_csv_file(file_pathname, fieldnames, varlen=False) for row in rows: pbcid = row["Collection"] e.max_audit_rate_p[pbcid] = int(row["Max audit rate"])
def test_csv_extra_required_fieldname(): # required file name test contents = "A,B,C\n1,2,3\n4,5" fieldnames = ["A", "B", "C"] extra_fieldnames = set(["C"]) with open(test_filename, 'w') as f: f.write(contents) f = StringIO() with redirect_stdout(f): try: read_csv_file(test_filename, required_fieldnames=['A', 'B']) except SystemExit: pass s = f.getvalue() lines = [ "WARNING: File {} has extra fieldnames (ignored): {}\n".format( test_filename, extra_fieldnames) ] assert s == "\n".join(lines) os.remove(test_filename)
def test_csv_extra_entries_len(): # non-varlen error when more entries than fields contents = 'A,B\n1,2,3' expected = [{'A': '1', 'B': '2'}] extra_entries_row = ['1', '2', '3'] with open(test_filename, 'w') as f: f.write(contents) with warnings.catch_warnings(record=True) as w: assert read_csv_file(test_filename) == expected assert str(w[0].message) == 'Ignoring extra values in row: {}'.format( extra_entries_row) os.remove(test_filename)
def read_syn2_csv(e, synpar): """ Read file defining syn2 synthetic election spec. """ syn2_pathname = os.path.join(multi.ELECTIONS_ROOT, "syn2_specs") filename = utils.greatest_name(syn2_pathname, synpar.election_dirname, ".csv") file_pathname = os.path.join(syn2_pathname, filename) fieldnames = [ "Contest", "Collection", "Reported Vote", "Actual Vote", "Number" ] rows = csv_readers.read_csv_file(file_pathname, fieldnames, varlen=False) return [(row["Contest"], row["Collection"], (row["Reported Vote"], ), (row["Actual Vote"], ), row["Number"]) for row in rows]
def read_audit_spec_global(e, args): """ Read 3-audit/31-audit-spec/audit-spec-global.csv """ election_pathname = os.path.join(multi.ELECTIONS_ROOT, e.election_dirname) audit_spec_pathname = os.path.join(election_pathname, "3-audit", "31-audit-spec") filename = utils.greatest_name(audit_spec_pathname, "audit-spec-global", ".csv") file_pathname = os.path.join(audit_spec_pathname, filename) fieldnames = ["Global Audit Parameter", "Value"] rows = csv_readers.read_csv_file(file_pathname, fieldnames, varlen=False) for row in rows: parameter = row["Global Audit Parameter"] value = row["Value"] if parameter == "Max audit stage time": e.max_stage_time = value
def test_csv_varlen(self): to_in = [["A", "B", "C"], [1, 2, 3], [4, 5, ''], [6, 7, 8, 9]] create_csv(to_in) self.assertEqual(csv_readers.read_csv_file("example.csv", varlen=True), [{ 'A': '1', 'B': '2', 'C': ('3', ) }, { 'A': '4', 'B': '5', 'C': () }, { 'A': '6', 'B': '7', 'C': ('8', '9') }])
def test_csv_extra_required_fieldname(): # required file name test contents = 'A,B,C\n1,2,3\n4,5' extra_fieldnames = set(['C']) expected = [{'A': '1', 'B': '2', 'C': '3'}, {'A': '4', 'B': '5', 'C': ''}] with open(test_filename, 'w') as f: f.write(contents) with warnings.catch_warnings(record=True) as w: assert expected == read_csv_file(test_filename, required_fieldnames=['A', 'B']) assert (len(w) > 0) and (str(w[0].message) == 'File {} has extra fieldnames (ignored): {}'.format( test_filename, extra_fieldnames)) os.remove(test_filename)
def test_csv_varlen_unspecified(self): to_in = [["A", "B", "C"], [1, 2, 3], [4, 5, ''], [6, 7, 8, 9, 10, 11, 12, 13, 17, 18, 20, 21, 243, 80]] create_csv(to_in) self.assertWarnsRegex(UserWarning, 'extra values', csv_readers.read_csv_file, "example.csv") self.assertEqual(csv_readers.read_csv_file("example.csv"), [{ 'A': '1', 'B': '2', 'C': '3' }, { 'A': '4', 'B': '5', 'C': '' }, { 'A': '6', 'B': '7', 'C': '8' }])
def counting(prefix, dirpath="./elections"): results = dd(int) output_prefix = "audit-output-contest-status" output_postfix = ".csv" output_inner_dir = path.join("3-audit", "34-audit-output") output_fields = [ "Measurement id", "Contest", "Risk Measurement Method", "Risk Limit", "Risk Upset Threshold", "Sampling Mode", "Status", "Param 1", "Param 2" ] for election_dir in os.listdir(dirpath): # Find the correct directory if election_dir.startswith(prefix): election_dir = path.join(dirpath, election_dir) output_dir = path.join(election_dir, output_inner_dir) if path.exists(output_dir): file = greatest_name(output_dir, output_prefix, output_postfix) file = path.join(output_dir, file) field_dict = read_csv_file(file, required_fieldnames=output_fields) print(file) print(field_dict) for row in field_dict: results[row["Status"]] += 1 result_path = path.join(".", "results") if not path.exists(result_path): os.makedirs(result_path) all_status = ["Upset", "Passed", "Open"] result_csv = path.join(result_path, prefix + ".csv") try: with open(result_csv, 'w') as csv_file: writer = csv.DictWriter(csv_file, fieldnames=all_status) writer.writeheader() for data in [ results, ]: writer.writerow(data) except IOError: print("I/O error") return results
def test_csv_varlen(): # Second sample given in csv_readers.py contents = 'A,B,C\n1,2,3\n4,5\n6,7,8,9' with open(test_filename, 'w') as f: f.write(contents) expected = [{ 'A': '1', 'B': '2', 'C': ('3', ) }, { 'A': '4', 'B': '5', 'C': () }, { 'A': '6', 'B': '7', 'C': ('8', '9') }] assert read_csv_file(test_filename, varlen=True) == expected os.remove(test_filename)
def test_csv_empty_line_len(): # 5 headers with no entries contents = 'A,B,C,D,E\n\n1,2,3,4,5' expected = [{ 'A': '', 'B': '', 'C': '', 'D': '', 'E': '' }, { 'A': '1', 'B': '2', 'C': '3', 'D': '4', 'E': '5' }] with open(test_filename, 'w') as f: f.write(contents) assert read_csv_file(test_filename) == expected os.remove(test_filename)