def process_filing_body(filingnum, fp=None, logger=None): # It's useful to pass the form parser in when running in bulk so we don't have to keep creating new ones. if not fp: fp = form_parser() if not logger: logger = fec_logger() msg = "process_filing_body: Starting # %s" % (filingnum) # print msg logger.info(msg) connection = get_connection() cursor = connection.cursor() cmd = "select fec_id, superseded_by_amendment, data_is_processed from efilings_filing where filing_number=%s" % ( filingnum ) cursor.execute(cmd) cd = CSV_dumper(connection) result = cursor.fetchone() if not result: msg = "process_filing_body: Couldn't find a new_filing for filing %s" % (filingnum) logger.error(msg) raise FilingHeaderDoesNotExist(msg) # will throw a TypeError if it's missing. line_sequence = 1 is_amended = result[1] is_already_processed = result[2] if is_already_processed == "1": msg = "process_filing_body: This filing has already been entered." print msg logger.error(msg) raise FilingHeaderAlreadyProcessed(msg) # print "Processing filing %s" % (filingnum) f1 = filing(filingnum) form = f1.get_form_type() version = f1.get_version() filer_id = f1.get_filer_id() # only parse forms that we're set up to read if not fp.is_allowed_form(form): if verbose: msg = "process_filing_body: Not a parseable form: %s - %s" % (form, filingnum) print msg logger.info(msg) return None linenum = 0 while True: linenum += 1 row = f1.get_body_row() if not row: break # print "row is %s" % (row) # print "\n\n\nForm is %s" % form try: linedict = fp.parse_form_line(row, version) # print "\n\n\nform is %s" % form process_body_row(linedict, filingnum, line_sequence, is_amended, cd, filer_id) except ParserMissingError: msg = "process_filing_body: Unknown line type in filing %s line %s: type=%s Skipping." % ( filingnum, linenum, row[0], ) print msg logger.warn(msg) continue # commit all the leftovers cd.commit_all() cd.close() counter = cd.get_counter() total_rows = 0 for i in counter: total_rows += counter[i] msg = "process_filing_body: Filing # %s Total rows: %s Tally is: %s" % (filingnum, total_rows, counter) print msg logger.info(msg) ######## DIRECT DB UPDATES. PROBABLY A BETTER APPROACH, BUT... header_data = dict_to_hstore(counter) cmd = "update efilings_filing set lines_present='%s'::hstore where filing_number=%s" % (header_data, filingnum) cursor.execute(cmd) # mark file as having been entered. cmd = "update efilings_filing set data_is_processed='1' where filing_number=%s" % (filingnum) cursor.execute(cmd) # flag this filer as one who has changed. cmd = "update efilings_committee set is_dirty=True where fec_id='%s' and cycle='%s'" % (filer_id, CURRENT_CYCLE) cursor.execute(cmd) # should also update the candidate is dirty flag too by joining w/ ccl table. # these tables aren't indexed, so do as two separate queries. cmd = "select cand_id from ftpdata_candcomlink where cmte_id = '%s' and cmte_dsgn in ('A', 'P')" % (filer_id) cursor.execute(cmd) result = cursor.fetchone() if result: cand_id = result[0] cmd = "update efilings_candidate set is_dirty=True where fec_id = '%s' and cycle='%s'" % ( cand_id, CURRENT_CYCLE, ) cursor.execute(cmd) connection.close()
def process_filing_body(filingnum, fp=None, logger=None): #It's useful to pass the form parser in when running in bulk so we don't have to keep creating new ones. if not fp: fp = form_parser() if not logger: logger=fec_logger() msg = "process_filing_body: Starting # %s" % (filingnum) #print msg logger.info(msg) connection = get_connection() cursor = connection.cursor() cmd = "select fec_id, superseded_by_amendment, data_is_processed from efilings_filing where filing_number=%s" % (filingnum) cursor.execute(cmd) cd = CSV_dumper(connection) result = cursor.fetchone() if not result: msg = 'process_filing_body: Couldn\'t find a new_filing for filing %s' % (filingnum) logger.error(msg) raise FilingHeaderDoesNotExist(msg) # will throw a TypeError if it's missing. line_sequence = 1 is_amended = result[1] is_already_processed = result[2] if is_already_processed == "1": msg = 'process_filing_body: This filing has already been entered.' print msg logger.error(msg) raise FilingHeaderAlreadyProcessed(msg) #print "Processing filing %s" % (filingnum) f1 = filing(filingnum) form = f1.get_form_type() version = f1.get_version() filer_id = f1.get_filer_id() # only parse forms that we're set up to read if not fp.is_allowed_form(form): if verbose: msg = "process_filing_body: Not a parseable form: %s - %s" % (form, filingnum) print msg logger.info(msg) return None linenum = 0 while True: linenum += 1 row = f1.get_body_row() if not row: break #print "row is %s" % (row) #print "\n\n\nForm is %s" % form try: linedict = fp.parse_form_line(row, version) #print "\n\n\nform is %s" % form process_body_row(linedict, filingnum, line_sequence, is_amended, cd, filer_id) except ParserMissingError: msg = 'process_filing_body: Unknown line type in filing %s line %s: type=%s Skipping.' % (filingnum, linenum, row[0]) print msg logger.warn(msg) continue # commit all the leftovers cd.commit_all() cd.close() counter = cd.get_counter() total_rows = 0 for i in counter: total_rows += counter[i] msg = "process_filing_body: Filing # %s Total rows: %s Tally is: %s" % (filingnum, total_rows, counter) print msg logger.info(msg) ######## DIRECT DB UPDATES. PROBABLY A BETTER APPROACH, BUT... header_data = dict_to_hstore(counter) cmd = "update efilings_filing set lines_present='%s'::hstore where filing_number=%s" % (header_data, filingnum) cursor.execute(cmd) # mark file as having been entered. cmd = "update efilings_filing set data_is_processed='1' where filing_number=%s" % (filingnum) cursor.execute(cmd) # flag this filer as one who has changed. cmd = "update efilings_committee set is_dirty=True where fec_id='%s' and cycle='%s'" % (filer_id, CURRENT_CYCLE) cursor.execute(cmd) # should also update the candidate is dirty flag too by joining w/ ccl table. # these tables aren't indexed, so do as two separate queries. cmd = "select cand_id from ftpdata_candcomlink where cmte_id = '%s' and cmte_dsgn in ('A', 'P')" % (filer_id) cursor.execute(cmd) result = cursor.fetchone() if result: cand_id = result[0] cmd = "update efilings_candidate set is_dirty=True where fec_id = '%s' and cycle='%s'" % (cand_id, CURRENT_CYCLE) cursor.execute(cmd) connection.close()
from parsing.utils.db_utils import get_connection from parsing.utils.write_csv_to_db import CSV_dumper from parsing.utils.filing_body_processor import process_body_row from parsing.utils.fec_import_logging import fec_logger BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.append(BASE_DIR) fp = form_parser() fec_format_file = re.compile(r"\d+\.fec") connection = get_connection() cursor = connection.cursor() logger = fec_logger() # Process all .fec files in the FILECACHE_DIRECTORY for d, _, files in os.walk(FILECACHE_DIRECTORY): for this_file in files: # Ignore it if it isn't a numeric fec file, e.g. \d+\.fec if not fec_format_file.match(this_file): continue filingnum = this_file.replace(".fec", "") cd = CSV_dumper(connection) f1 = filing(filingnum) formtype = f1.get_form_type()
from parsing.utils.db_utils import get_connection from parsing.utils.write_csv_to_db import CSV_dumper from parsing.utils.filing_body_processor import process_body_row from parsing.utils.fec_import_logging import fec_logger BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.append(BASE_DIR) fp = form_parser() fec_format_file = re.compile(r'\d+\.fec') connection = get_connection() cursor = connection.cursor() logger = fec_logger() # Process all .fec files in the FILECACHE_DIRECTORY for d, _, files in os.walk(FILECACHE_DIRECTORY): for this_file in files: # Ignore it if it isn't a numeric fec file, e.g. \d+\.fec if not fec_format_file.match(this_file): continue filingnum = this_file.replace(".fec", "") cd = CSV_dumper(connection) f1 = filing(filingnum) formtype = f1.get_form_type() version = f1.version