Python read_csv_file示例，csv_readers.read_csv_file Python示例

示例#1

0

显示文件

def test_csv_required_fieldname_missing():
    # required file name test
    contents = "A,B,C\n1,2,3\n4,5"
    fieldnames = ["A", "B", "C"]
    missing_fieldnames = set(["D"])
    required_fieldnames = ['A', 'B', 'C', 'D']

    with open(test_filename, "w") as f:
        f.write(contents)

    f = StringIO()
    with redirect_stdout(f):
        try:
            read_csv_file(test_filename,
                          required_fieldnames=required_fieldnames)
        except SystemExit:
            pass
    s = f.getvalue()

    lines = [
        "FATAL ERROR: File {} has fieldnames {}, while {} are required. Missing {}.\n"
        .format(test_filename, fieldnames, required_fieldnames,
                missing_fieldnames)
    ]
    assert s == "\n".join(lines)
    os.remove(test_filename)

示例#2

0

显示文件

def test_csv_duplicate_field():
    contents = 'A,B,B\n1,2,3\n4,5'
    duplicate_fieldnames = ['A', 'B', 'B']

    with open(test_filename, 'w') as f:
        f.write(contents)

    try:
        read_csv_file(test_filename)
    except ValueError as e:
        assert str(e) == 'Duplicate field name: {}'.format(
            duplicate_fieldnames)

    os.remove(test_filename)

示例#3

0

显示文件

文件： election_spec.py 项目： guoyu07/audit-lab

def read_election_spec_contests(e):
    """
    Read file election-spec-contests.csv, put results into Election e.
    """

    election_pathname = os.path.join(OpenAuditTool.ELECTIONS_ROOT,
                                     e.election_dirname)
    spec_pathname = os.path.join(election_pathname, "1-election-spec")
    filename = utils.greatest_name(spec_pathname, "election-spec-contests",
                                   ".csv")
    file_pathname = os.path.join(spec_pathname, filename)
    fieldnames = [
        "Contest", "Contest type", "Params", "Write-ins", "Selections"
    ]
    rows = csv_readers.read_csv_file(file_pathname, fieldnames, varlen=True)
    for row in rows:

        cid = row["Contest"]

        e.cids.append(cid)

        e.contest_type_c[cid] = row["Contest type"].lower()

        e.params_c[cid] = row["Params"]

        e.write_ins_c[cid] = row["Write-ins"].lower()

        e.selids_c[cid] = {}
        for selid in row["Selections"]:
            e.selids_c[cid][selid] = True

示例#4

0

显示文件

文件： reported.py 项目： ron-rivest/2017-bayes-audit

def read_reported_cvrs(e):
    """
    Read reported votes 22-reported-cvrs/reported-cvrs-PBCID.csv.
    """

    election_pathname = os.path.join(multi.ELECTIONS_ROOT, e.election_dirname)
    specification_pathname = os.path.join(election_pathname, "2-reported",
                                          "22-reported-cvrs")
    fieldnames = [
        "Collection", "Scanner", "Ballot id", "Contest", "Selections"
    ]
    for pbcid in e.pbcids:
        safe_pbcid = ids.filename_safe(pbcid)
        filename = utils.greatest_name(specification_pathname,
                                       "reported-cvrs-" + safe_pbcid, ".csv")
        file_pathname = os.path.join(specification_pathname, filename)
        rows = csv_readers.read_csv_file(file_pathname,
                                         fieldnames,
                                         varlen=True)
        for row in rows:
            pbcid = row["Collection"]
            scanner = row["Scanner"]
            bid = row["Ballot id"]
            cid = row["Contest"]
            vote = row["Selections"]
            vote = tuple(sorted(vote))  # put vote selids into canonical order
            utils.nested_set(e.rv_cpb, [cid, pbcid, bid], vote)
            utils.nested_set(e.votes_c, [cid, vote], True)

示例#5

0

显示文件

def test_csv_duplicate_field():
    contents = "A,B,B\n1,2,3\n4,5"
    with open(test_filename, 'w') as f:
        f.write(contents)

    f = StringIO()
    with redirect_stdout(f):
        try:
            read_csv_file(test_filename)
        except SystemExit:
            pass
    s = f.getvalue()

    lines = ["FATAL ERROR: Duplicate field name:{}\n".format(['A', 'B', 'B'])]
    assert s == "\n".join(lines)
    os.remove(test_filename)

示例#6

0

显示文件

文件： csv_readers_tests.py 项目： michaeljets/audit-lab

 def test_csv_extra_fieldnames2(self):
     to_in = [["A", "B", "C", "D"], [1, 2, 3, 4], [
         6,
         7,
         8,
     ], [3, 4, 5, 6, 2, 4]]
     create_csv(to_in)
     self.assertWarnsRegex(UserWarning,
                           "extra fieldnames",
                           csv_readers.read_csv_file,
                           "example.csv",
                           varlen=True,
                           required_fieldnames=["C", "B"])
     self.assertEqual(
         csv_readers.read_csv_file("example.csv",
                                   varlen=True,
                                   required_fieldnames=["C", "B"]),
         [{
             'A': '1',
             'B': '2',
             'C': '3',
             'D': ('4', )
         }, {
             'A': '6',
             'B': '7',
             'C': '8',
             'D': ()
         }, {
             'A': '3',
             'B': '4',
             'C': '5',
             'D': ('6', '2', '4')
         }])

示例#7

0

显示文件

文件： audit.py 项目： guoyu07/audit-lab

def read_audit_spec_contest(e, args):
    """ Read 3-audit/31-audit-spec/audit-spec-contest.csv """

    election_pathname = os.path.join(OpenAuditTool.ELECTIONS_ROOT,
                                     e.election_dirname)
    audit_spec_pathname = os.path.join(election_pathname, "3-audit",
                                       "31-audit-spec")
    filename = utils.greatest_name(audit_spec_pathname, "audit-spec-contest",
                                   ".csv")
    file_pathname = os.path.join(audit_spec_pathname, filename)
    fieldnames = [
        "Measurement id", "Contest", "Risk Measurement Method", "Risk Limit",
        "Risk Upset Threshold", "Sampling Mode", "Initial Status", "Param 1",
        "Param 2"
    ]
    rows = csv_readers.read_csv_file(file_pathname, fieldnames, varlen=False)
    logger.info("read_audit_spec_contest: e.mid: %s", e.mids)
    for row in rows:
        mid = row["Measurement id"]
        e.mids.append(mid)
        e.cid_m[mid] = row["Contest"]
        e.risk_method_m[mid] = row["Risk Measurement Method"]
        e.risk_limit_m[mid] = float(row["Risk Limit"])
        e.risk_upset_m[mid] = float(row["Risk Upset Threshold"])
        e.sampling_mode_m[mid] = row["Sampling Mode"]
        e.initial_status_m[mid] = row["Initial Status"]
        e.risk_measurement_parameters_m[mid] = (row["Param 1"], row["Param 2"])

示例#8

0

显示文件

def test_csv_one_entry_len():
    # One header with one entry
    contents = 'A\n\n1'
    expected = [{'A': ''}, {'A': '1'}]
    with open(test_filename, 'w') as f:
        f.write(contents)
    assert read_csv_file(test_filename) == expected
    os.remove(test_filename)

示例#9

0

显示文件

def test_csv_blank_fields_len():
    # Empty headers and empty entries
    contents = 'A,B,C,,,\n1,2,,,,'
    expected = [{'A': '1', 'B': '2', 'C': ''}]
    with open(test_filename, 'w') as f:
        f.write(contents)
    assert read_csv_file(test_filename) == expected
    os.remove(test_filename)

示例#10

0

显示文件

def test_csv_len():
    # First sample given in csv_readers.py
    contents = 'A,B,C\n1,2,3\n4,5'
    expected = [{'A': '1', 'B': '2', 'C': '3'}, {'A': '4', 'B': '5', 'C': ''}]
    with open(test_filename, 'w') as f:
        f.write(contents)
    assert read_csv_file(test_filename) == expected
    os.remove(test_filename)

示例#11

0

显示文件

def test_csv_short_row_varlen():
    contents = "A,B,C\n1\n1,2,3"
    expected = [{'A': '1', 'B': '2', 'C': '3'}]
    with open(test_filename, "w") as f:
        f.write(contents)

    f = StringIO()
    with redirect_stdout(f):
        try:
            read_csv_file(test_filename, varlen=True)
        except SystemExit:
            pass
    s = f.getvalue()

    lines = ["WARNING: Ignoring too-short row:{}\n".format(['1'])]
    assert s == "\n".join(lines)
    os.remove(test_filename)

示例#12

0

显示文件

文件： csv_readers_tests.py 项目： michaeljets/audit-lab

 def test_csv_base(self):
     to_in = [["A", "B", "C"], [1, 2, 3]]
     create_csv(to_in)
     self.assertEqual(csv_readers.read_csv_file("example.csv"), [{
         'A': '1',
         'B': '2',
         'C': '3'
     }])

示例#13

0

显示文件

def test_csv_extra_entries_len():
    # non-varlen error when more entries than fields
    contents = "A,B\n1,2,3"
    expected = [{'A': '1', 'B': '2'}]
    # expected warning message - "Ignoring extra values in row:0"
    with open(test_filename, "w") as f:
        f.write(contents)
    assert read_csv_file(test_filename) == expected
    os.remove(test_filename)

示例#14

0

显示文件

def test_csv_required_fieldname_missing():
    # required file name test
    contents = 'A,B,C\n1,2,3\n4,5'
    fieldnames = ['A', 'B', 'C']
    missing_fieldnames = set(['D'])
    required_fieldnames = ['A', 'B', 'C', 'D']

    with open(test_filename, 'w') as f:
        f.write(contents)

    try:
        read_csv_file(test_filename, required_fieldnames=required_fieldnames)
    except ValueError as e:
        assert str(e) == (
            'File {} has fieldnames {}, while {} are required. Missing {}.'.
            format(test_filename, fieldnames, required_fieldnames,
                   missing_fieldnames))
    os.remove(test_filename)

示例#15

0

显示文件

文件： reported.py 项目： ron-rivest/2017-bayes-audit

def read_reported_ballot_manifests(e):
    """
    Read ballot manifest file 21-reported-ballot-manifests and expand rows if needed.
    """

    election_pathname = os.path.join(multi.ELECTIONS_ROOT, e.election_dirname)
    specification_pathname = os.path.join(election_pathname, "2-reported",
                                          "21-reported-ballot-manifests")
    fieldnames = [
        "Collection", "Box", "Position", "Stamp", "Ballot id",
        "Number of ballots", "Required Contests", "Possible Contests",
        "Comments"
    ]
    for pbcid in e.pbcids:
        safe_pbcid = ids.filename_safe(pbcid)
        filename = utils.greatest_name(specification_pathname,
                                       "manifest-" + safe_pbcid, ".csv")
        file_pathname = os.path.join(specification_pathname, filename)
        rows = csv_readers.read_csv_file(file_pathname,
                                         fieldnames,
                                         varlen=False)
        for row in rows:
            pbcid = row["Collection"]
            boxid = row["Box"]
            position = row["Position"]
            stamp = row["Stamp"]
            bid = row["Ballot id"]
            try:
                num = int(row["Number of ballots"])
            except ValueError:
                utils.myerror(
                    "Number {} of ballots not an integer.".format(num))
            if num <= 0:
                utils.mywarning(
                    "Number {} of ballots not positive.".format(num))
            req = row["Required Contests"]
            poss = row["Possible Contests"]
            comments = row["Comments"]

            bids = utils.count_on(bid, num)
            stamps = utils.count_on(stamp, num)
            positions = utils.count_on(position, num)

            for i in range(num):
                # utils.nested_set(e.bids_p, [pbcid, bids[i]], True)
                if pbcid not in e.bids_p:
                    e.bids_p[pbcid] = []
                e.bids_p[pbcid].append(bids[i])
                utils.nested_set(e.boxid_pb, [pbcid, bids[i]], boxid)
                utils.nested_set(e.position_pb, [pbcid, bids[i]], position[i])
                utils.nested_set(e.stamp_pb, [pbcid, bids[i]], stamps[i])
                utils.nested_set(e.required_gid_pb, [pbcid, bids[i]], req)
                utils.nested_set(e.possible_gid_pb, [pbcid, bids[i]], poss)
                utils.nested_set(e.comments_pb, [pbcid, bids[i]], comments)

示例#16

0

显示文件

文件： csv_readers_tests.py 项目： michaeljets/audit-lab

 def test_csv_blank(self):
     to_in = [["A", "B", "C"], [1, 2, 3], [4, 5, '']]
     create_csv(to_in)
     self.assertEqual(csv_readers.read_csv_file("example.csv"), [{
         'A': '1',
         'B': '2',
         'C': '3'
     }, {
         'A': '4',
         'B': '5',
         'C': ''
     }])

示例#17

0

显示文件

文件： csv_readers_tests.py 项目： michaeljets/audit-lab

 def test_csv_varlen_true_blank_row(self):
     to_in = [["A", "B", "C"], [1, 2, 3], [], [4, 5, 6]]
     create_csv(to_in)
     self.assertEqual(csv_readers.read_csv_file("example.csv", varlen=True),
                      [{
                          'A': '1',
                          'B': '2',
                          'C': ('3', )
                      }, {
                          'A': '4',
                          'B': '5',
                          'C': ('6', )
                      }])

示例#18

0

显示文件

文件： reported.py 项目： ron-rivest/2017-bayes-audit

def read_reported_outcomes(e):

    election_pathname = os.path.join(multi.ELECTIONS_ROOT, e.election_dirname)
    specification_pathname = os.path.join(election_pathname, "2-reported")
    fieldnames = ["Contest", "Winner(s)"]
    filename = utils.greatest_name(specification_pathname,
                                   "23-reported-outcomes", ".csv")
    file_pathname = os.path.join(specification_pathname, filename)
    rows = csv_readers.read_csv_file(file_pathname, fieldnames, varlen=True)
    for row in rows:
        cid = row["Contest"]
        winners = row["Winner(s)"]
        utils.nested_set(e.ro_c, [cid], winners)

示例#19

0

显示文件

def test_csv_short_row_varlen():
    contents = 'A,B,C\n1\n1,2,3'
    expected = [{'A': '1', 'B': '2', 'C': ('3', )}]
    short_row = ['1']
    with open(test_filename, 'w') as f:
        f.write(contents)

    with warnings.catch_warnings(record=True) as w:
        assert expected == read_csv_file(test_filename, varlen=True)
        assert str(
            w[0].message) == "Ignoring too-short row: {}".format(short_row)

    os.remove(test_filename)

示例#20

0

显示文件

def read_audit_spec_collection(e, args):
    """ Read 3-audit/31-audit-spec/audit-spec-collection.csv """

    election_pathname = os.path.join(multi.ELECTIONS_ROOT, e.election_dirname)
    audit_spec_pathname = os.path.join(election_pathname, "3-audit",
                                       "31-audit-spec")
    filename = utils.greatest_name(audit_spec_pathname,
                                   "audit-spec-collection", ".csv")
    file_pathname = os.path.join(audit_spec_pathname, filename)
    fieldnames = ["Collection", "Max audit rate"]
    rows = csv_readers.read_csv_file(file_pathname, fieldnames, varlen=False)
    for row in rows:
        pbcid = row["Collection"]
        e.max_audit_rate_p[pbcid] = int(row["Max audit rate"])

示例#21

0

显示文件

def test_csv_extra_required_fieldname():
    # required file name test
    contents = "A,B,C\n1,2,3\n4,5"
    fieldnames = ["A", "B", "C"]
    extra_fieldnames = set(["C"])

    with open(test_filename, 'w') as f:
        f.write(contents)

    f = StringIO()
    with redirect_stdout(f):
        try:
            read_csv_file(test_filename, required_fieldnames=['A', 'B'])
        except SystemExit:
            pass
    s = f.getvalue()

    lines = [
        "WARNING: File {} has extra fieldnames (ignored): {}\n".format(
            test_filename, extra_fieldnames)
    ]
    assert s == "\n".join(lines)
    os.remove(test_filename)

示例#22

0

显示文件

def test_csv_extra_entries_len():
    # non-varlen error when more entries than fields
    contents = 'A,B\n1,2,3'
    expected = [{'A': '1', 'B': '2'}]
    extra_entries_row = ['1', '2', '3']

    with open(test_filename, 'w') as f:
        f.write(contents)

    with warnings.catch_warnings(record=True) as w:
        assert read_csv_file(test_filename) == expected
        assert str(w[0].message) == 'Ignoring extra values in row: {}'.format(
            extra_entries_row)
    os.remove(test_filename)

示例#23

0

显示文件

文件： syn2.py 项目： ron-rivest/2017-bayes-audit

def read_syn2_csv(e, synpar):
    """ 
    Read file defining syn2 synthetic election spec. 
    """

    syn2_pathname = os.path.join(multi.ELECTIONS_ROOT, "syn2_specs")
    filename = utils.greatest_name(syn2_pathname, synpar.election_dirname,
                                   ".csv")
    file_pathname = os.path.join(syn2_pathname, filename)
    fieldnames = [
        "Contest", "Collection", "Reported Vote", "Actual Vote", "Number"
    ]
    rows = csv_readers.read_csv_file(file_pathname, fieldnames, varlen=False)
    return [(row["Contest"], row["Collection"], (row["Reported Vote"], ),
             (row["Actual Vote"], ), row["Number"]) for row in rows]

示例#24

0

显示文件

def read_audit_spec_global(e, args):
    """ Read 3-audit/31-audit-spec/audit-spec-global.csv """

    election_pathname = os.path.join(multi.ELECTIONS_ROOT, e.election_dirname)
    audit_spec_pathname = os.path.join(election_pathname, "3-audit",
                                       "31-audit-spec")
    filename = utils.greatest_name(audit_spec_pathname, "audit-spec-global",
                                   ".csv")
    file_pathname = os.path.join(audit_spec_pathname, filename)
    fieldnames = ["Global Audit Parameter", "Value"]
    rows = csv_readers.read_csv_file(file_pathname, fieldnames, varlen=False)
    for row in rows:
        parameter = row["Global Audit Parameter"]
        value = row["Value"]
        if parameter == "Max audit stage time":
            e.max_stage_time = value

示例#25

0

显示文件

文件： csv_readers_tests.py 项目： michaeljets/audit-lab

 def test_csv_varlen(self):
     to_in = [["A", "B", "C"], [1, 2, 3], [4, 5, ''], [6, 7, 8, 9]]
     create_csv(to_in)
     self.assertEqual(csv_readers.read_csv_file("example.csv", varlen=True),
                      [{
                          'A': '1',
                          'B': '2',
                          'C': ('3', )
                      }, {
                          'A': '4',
                          'B': '5',
                          'C': ()
                      }, {
                          'A': '6',
                          'B': '7',
                          'C': ('8', '9')
                      }])

示例#26

0

显示文件

def test_csv_extra_required_fieldname():
    # required file name test
    contents = 'A,B,C\n1,2,3\n4,5'
    extra_fieldnames = set(['C'])
    expected = [{'A': '1', 'B': '2', 'C': '3'}, {'A': '4', 'B': '5', 'C': ''}]

    with open(test_filename, 'w') as f:
        f.write(contents)

    with warnings.catch_warnings(record=True) as w:
        assert expected == read_csv_file(test_filename,
                                         required_fieldnames=['A', 'B'])
        assert (len(w) >
                0) and (str(w[0].message)
                        == 'File {} has extra fieldnames (ignored): {}'.format(
                            test_filename, extra_fieldnames))
    os.remove(test_filename)

示例#27

0

显示文件

文件： csv_readers_tests.py 项目： michaeljets/audit-lab

 def test_csv_varlen_unspecified(self):
     to_in = [["A", "B", "C"], [1, 2, 3], [4, 5, ''],
              [6, 7, 8, 9, 10, 11, 12, 13, 17, 18, 20, 21, 243, 80]]
     create_csv(to_in)
     self.assertWarnsRegex(UserWarning, 'extra values',
                           csv_readers.read_csv_file, "example.csv")
     self.assertEqual(csv_readers.read_csv_file("example.csv"), [{
         'A': '1',
         'B': '2',
         'C': '3'
     }, {
         'A': '4',
         'B': '5',
         'C': ''
     }, {
         'A': '6',
         'B': '7',
         'C': '8'
     }])

示例#28

0

显示文件

文件： parallel_edge.py 项目： Dovermore/audit-lab

def counting(prefix, dirpath="./elections"):
    results = dd(int)
    output_prefix = "audit-output-contest-status"
    output_postfix = ".csv"
    output_inner_dir = path.join("3-audit", "34-audit-output")

    output_fields = [
        "Measurement id", "Contest", "Risk Measurement Method", "Risk Limit",
        "Risk Upset Threshold", "Sampling Mode", "Status", "Param 1", "Param 2"
    ]

    for election_dir in os.listdir(dirpath):
        # Find the correct directory
        if election_dir.startswith(prefix):
            election_dir = path.join(dirpath, election_dir)
            output_dir = path.join(election_dir, output_inner_dir)
            if path.exists(output_dir):
                file = greatest_name(output_dir, output_prefix, output_postfix)
                file = path.join(output_dir, file)
                field_dict = read_csv_file(file,
                                           required_fieldnames=output_fields)
                print(file)
                print(field_dict)
                for row in field_dict:
                    results[row["Status"]] += 1

    result_path = path.join(".", "results")
    if not path.exists(result_path):
        os.makedirs(result_path)

    all_status = ["Upset", "Passed", "Open"]
    result_csv = path.join(result_path, prefix + ".csv")
    try:
        with open(result_csv, 'w') as csv_file:
            writer = csv.DictWriter(csv_file, fieldnames=all_status)
            writer.writeheader()
            for data in [
                    results,
            ]:
                writer.writerow(data)
    except IOError:
        print("I/O error")
    return results

示例#29

0

显示文件

def test_csv_varlen():
    # Second sample given in csv_readers.py
    contents = 'A,B,C\n1,2,3\n4,5\n6,7,8,9'
    with open(test_filename, 'w') as f:
        f.write(contents)
    expected = [{
        'A': '1',
        'B': '2',
        'C': ('3', )
    }, {
        'A': '4',
        'B': '5',
        'C': ()
    }, {
        'A': '6',
        'B': '7',
        'C': ('8', '9')
    }]
    assert read_csv_file(test_filename, varlen=True) == expected
    os.remove(test_filename)

示例#30

0

显示文件

def test_csv_empty_line_len():
    # 5 headers with no entries
    contents = 'A,B,C,D,E\n\n1,2,3,4,5'
    expected = [{
        'A': '',
        'B': '',
        'C': '',
        'D': '',
        'E': ''
    }, {
        'A': '1',
        'B': '2',
        'C': '3',
        'D': '4',
        'E': '5'
    }]
    with open(test_filename, 'w') as f:
        f.write(contents)
    assert read_csv_file(test_filename) == expected
    os.remove(test_filename)