def speed_test(filepath):
    print("+++++\nRunning speed test on %s" % filepath)
    formtypecount = Counter()

    start = datetime.now()
    parsed = {}
    with open(filepath) as file:
        linecount = 0
        version = None
        for line in file:
            linecount += 1
            if version is None:
                results = fecfile.parse_header(line)
                version = results[1]
            else:
                parsed = fecfile.parse_line(line, version)
                if not parsed:
                    print("** not parsed %s" % line)
                else:
                    # count the form type, if given
                    try:
                        formtypecount.update({parsed['form_type'].upper(): 1})
                    except KeyError:
                        continue

    end = datetime.now()
    print("+++++\nResults:")
    print("\tRan %s rows in %s" % (sum(formtypecount.values()), end-start))
    print("\tTotal rows processed = %s" % formtypecount)
def readfile(filepath, writer):

    filename = os.path.basename(filepath)
    filename = filename.replace(".fec", "")
    file_number = int(filename)

    firstline = None
    secondline = None
    linecount = 2  # header + formline
    with open(filepath, encoding="ISO-8859-1") as file:
        firstline = file.readline()
        secondline = file.readline()

        while True:
            nextline = file.readline()
            if not nextline:
                break
            linecount += 1

    file_size = os.path.getsize(filepath)

    firstline = firstline.replace("\n", "")
    raw_results = fecfile.parse_header(firstline)
    results = raw_results[0]
    results["filing_number"] = file_number
    version = raw_results[1]
    lines = None
    if len(raw_results) == 3:
        lines = raw_results[1]

    original_report = results.get('report_id', None)
    report_number = results.get('report_number', None)
    if original_report:
        original_report = original_report.replace("FEC-", "")
        original_report_number = int(original_report)
        results["amends"] = original_report_number
        #print("Found amended filing %s amends %s # %s" % (file_number, original_report_number, report_number))

    secondlineparsed = fecfile.parse_line(secondline, version)
    #print(secondlineparsed)
    results["form_type"] = secondlineparsed.get('form_type', '')
    results["filer_committee_id_number"] = secondlineparsed.get(
        'filer_committee_id_number', '')
    results["committee_name"] = secondlineparsed.get('committee_name', '')
    results["date_signed"] = secondlineparsed.get('date_signed', '')
    results["form_type"] = secondlineparsed.get('form_type', '')
    results["coverage_through_date"] = secondlineparsed.get(
        'coverage_through_date', '')
    results["coverage_from_date"] = secondlineparsed.get(
        'coverage_from_date', '')
    results["file_size"] = file_size
    results["file_linecount"] = linecount

    # hack for F7 / F5 / F9
    if not results["committee_name"]:
        results["committee_name"] = secondlineparsed.get(
            'organization_name', '')

    writer.writerow(results)
def readfile(path_to_file, schedule_writer, year):
    filename = os.path.basename(path_to_file)
    filenumber = int(filename.replace(".fec", ""))
    #print("reading filing %s from %s" % (filenumber, path_to_file))

    formtypecount = Counter()

    version = None
    with open(path_to_file, encoding="ISO-8859-1") as file:
        linecount = 0
        for line in file:
            linecount += 1
            if version is None:
                results = fecfile.parse_header(line)
                header = results[0]
                version = results[1]

            else:
                try:
                    parsed = fecfile.parse_line(line, version)
                except fecfile.cache.FecParserMissingMappingError as e:
                    print("error in %s line %s: %s" %
                          (filenumber, linecount, e))
                    continue
                if not parsed:
                    pass
                    print("** not parsed %s" % line)
                else:
                    # count the form type, if given
                    try:
                        formtypecount.update({parsed['form_type'].upper(): 1})
                    except KeyError:
                        continue

                    form_type = parsed['form_type'].upper()

                    parsed['filing_number'] = filenumber
                    parsed['line_sequence'] = linecount

                    if form_type.startswith("SA"):
                        schedule_writer['A'][year]['writer'].writerow(parsed)

                    elif form_type.startswith("SB"):
                        schedule_writer['B'][year]['writer'].writerow(parsed)

                    elif form_type.startswith("F132"):
                        remapped = remap_132_to_a(parsed)
                        schedule_writer['F132']['writer'].writerow(remapped)

                #print("%s %s" % (linecount, parsed))

    return formtypecount
示例#4
0
def readfile(filepath, writer):

    filename = os.path.basename(filepath)
    filename = filename.replace(".fec", "")
    file_number = int(filename)

    file = open(filepath, encoding="ISO-8859-1")
    #file = open(filepath)

    firstline = file.readline()
    secondline = file.readline()
    firstline = firstline.replace("\n", "")
    raw_results = fecfile.parse_header(firstline)
    results = raw_results[0]
    results["filing_number"] = file_number
    version = raw_results[1]
    lines = None
    if len(raw_results) == 3:
        lines = raw_results[1]

    original_report = results.get('report_id', None)
    report_number = results.get('report_number', None)
    if original_report:
        original_report = original_report.replace("FEC-", "")
        original_report_number = int(original_report)
        results["amends"] = original_report_number
        #print("Found amended filing %s amends %s # %s" % (file_number, original_report_number, report_number))

    secondlineparsed = fecfile.parse_line(secondline, version)
    #print(secondlineparsed)
    results["form_type"] = secondlineparsed.get('form_type', '')
    results["filer_committee_id_number"] = secondlineparsed.get(
        'filer_committee_id_number', '')
    results["committee_name"] = secondlineparsed.get('committee_name', '')
    results["date_signed"] = secondlineparsed.get('date_signed', '')
    results["coverage_from_date"] = secondlineparsed.get(
        'coverage_from_date', '')
    results["coverage_through_date"] = secondlineparsed.get(
        'coverage_through_date', '')

    writer.writerow(results)