def test_accent_insensitive_sched_a(self): ''' Test to see that both accented and unaccented data are returned ''' connection = db.engine.connect() # each list value in the dict below has 3 "good" names, one "bad" name names = { 'Tést.com', 'Test com', 'Test .com', 'test.com', 'TEST.COM', 'Test.com' } i = 0 for n in names: i += 1 data = { 'contbr_employer': n, 'sub_id': 9999999959999999980 + i, 'filing_form': 'F3' } insert = "INSERT INTO disclosure.fec_fitem_sched_a " + \ "(contbr_employer, sub_id, filing_form) " + \ " VALUES (%(contbr_employer)s, %(sub_id)s, %(filing_form)s)" connection.execute(insert, data) manage.refresh_materialized(concurrent=False) results = self._results( api.url_for(ScheduleAView, contributor_employer='Test.com')) contbr_employer_list = {r['contributor_employer'] for r in results} assert (names.issubset(contbr_employer_list)) results = self._results( api.url_for(ScheduleAView, contributor_employer='Tést.com')) contbr_employer_list = {r['contributor_employer'] for r in results} assert (names.issubset(contbr_employer_list)) connection.close()
def test_simple_amended_filing(self): expected_filing = self.STOCK_SECOND_F1 self.create_filing(1, self.STOCK_FIRST_F1) self.create_filing(2, expected_filing) # Refresh downstream `ofec_amendments_mv` and `ofec_filings_all_mv` manage.refresh_materialized(concurrent=False) # /filings/ endpoint results = self._results(rest.api.url_for(FilingsList, committee_id=expected_filing['committee_id'], most_recent=True)) assert len(results) == 1 list_result = results[0] self.assert_filings_equal(list_result, expected_filing) # FilingsView view # /committee/<committee_id>/filings/ and /candidate/<candidate_id>/filings/ results = self._results(rest.api.url_for(FilingsView, committee_id=expected_filing['committee_id'], most_recent=True)) assert len(results) == 1 view_result = results[0] self.assert_filings_equal(view_result, expected_filing)
def test_schedule_f_payee_name_text(self): ''' Test to see that pye_nm is parsed correctly and retrieved as expected from ts_vector column payee_name_text ''' connection = db.engine.connect() name = "O'Reilly" names_good = {'O Reilly', "O'Reilly", 'O.Reilly', 'O-Reilly'} names_bad = {'O', "O'Hare", "Reilly"} i = 0 for n in names_good.union(names_bad): i += 1 data = { 'pye_nm': n, 'sub_id': 9999999999999999970 + i, 'filing_form': 'F3' } insert = "INSERT INTO disclosure.fec_fitem_sched_f " + \ "(pye_nm, sub_id, filing_form) " + \ " VALUES (%(pye_nm)s, %(sub_id)s, %(filing_form)s)" connection.execute(insert, data) manage.refresh_materialized(concurrent=False) select = "SELECT * from disclosure.fec_fitem_sched_f " + \ "WHERE payee_name_text @@ to_tsquery('" + parse_fulltext(name) + "');" results = connection.execute(select).fetchall() pye_nm_list = {na[14] for na in results} #assert all good names in result set assert (names_good.issubset(pye_nm_list)) #assert no bad names in result set assert (names_bad.isdisjoint(pye_nm_list)) connection.close()
def test_candidate_committee_wrong_form_totals(self): self.create_cmte_valid(self.STOCK_CMTE_VALID_FEC_YR) self.create_cand_cmte_linkage(self.STOCK_CAND_CMTE_LINKAGE) self.insert_vsum(self.STOCK_FILING_F3P_Q1) self.insert_vsum(self.STOCK_FILING_F3X_Q2) self.insert_vsum(self.STOCK_FILING_F3_Q1) manage.refresh_materialized(concurrent=False) """ Test committee 'C001' filed wrong form: F3P and F3X(wrong form) totals should be sum of them """ params_cmte = { 'committee_id': 'C001', } committee_totals_api = self._results( rest.api.url_for(TotalsCommitteeView, **params_cmte)) assert (len(committee_totals_api) == 1) assert committee_totals_api[0]['receipts'] == 300 assert committee_totals_api[0]['disbursements'] == 80 params_cand = { 'candidate_id': 'P01', 'election_full': True, } candidate_totals_api = self._results( rest.api.url_for(CandidateTotalsView, **params_cand)) assert (len(candidate_totals_api) == 1) assert candidate_totals_api[0]['receipts'] == 311 assert candidate_totals_api[0]['disbursements'] == 85
def migrate_db(request): with rest.app.app_context(): rest.app.config['TESTING'] = True rest.app.config['SQLALCHEMY_DATABASE_URI'] = common.TEST_CONN reset_schema() run_migrations() manage.refresh_materialized(concurrent=False)
def real_efile_sa7(self): """ Test tsvector trigger for real_efile.sa7 --contributor_name_text --contributor_employer_text --contributor_occupation_text """ connection = db.engine.connect() data = { 'repid': {111111111111, 222222222222, 333333333333}, 'tran_id': {'4', '5', '6'}, 'fname': {'Oscar', 'The', 'Mr.'}, 'mname': {'The', '', ''}, 'name': {'Grouch', 'Count', 'Rogers'}, 'indemp': {'The Street', 'The Street', 'The Neighborhood'}, 'indocc': {'Lead Grouch', 'Vampire/Educator', 'Neighbor'} } insert = "INSERT INTO real_efile_sa7 " + \ "(repid, tran_id, fname, mname, name, indemp, indocc) " + \ "VALUES (%(repid)s, %(tran_id)s, %(fname)s, %(mname)s, %(name)s, %(indemp)s, %(indocc)s)" connection.executemany(insert, data) manage.refresh_materialized(concurrent=False) results = self._results( api.url_for(ScheduleAEfileView, contributor_employer='Neighborhood')) employer_set = {r['contributor_employer'] for r in results} assert ({'The Neighborhood'}.issubset(employer_set)) name_set = {r['contributor_name'] for r in results} assert ({'Mr.'}.issubset(name_set)) occupation_set = {r['contributor_occupation'] for r in results} assert ({'Educator'}.issubset(occupation_set)) connection.close()
def test_negative_filing_chain(self): # Make sure no negative numbers appear in amendment chain negative_f1 = { 'committee_id': 'C006', 'report_year': 2018, 'amendment_indicator': 'N', 'receipt_date': datetime.date(2018, 1, 31), 'file_number': -1180841, 'amendment_chain': None, 'previous_file_number': -1180841, 'most_recent_file_number': None, 'most_recent': None, 'report_type': None, 'form_type': 'F1' } non_negative_f1 = copy.deepcopy(self.STOCK_FIRST_F1) self.create_filing(1, negative_f1) self.create_filing(2, non_negative_f1) # Refresh downstream `ofec_amendments_mv` and `ofec_filings_all_mv` manage.refresh_materialized(concurrent=False) results = self._results(rest.api.url_for(FilingsList, committee_id=non_negative_f1['committee_id'], most_recent=True)) assert len(results) == 1 result = results[0] self.assert_filings_equal(result, non_negative_f1)
def test_schedule_f_payee_name_text_accent(self): ''' Test to see that pye_nm is parsed correctly and retrieved as expected from ts_vector column payee_name_text ''' connection = db.engine.connect() names = {'ÁCCENTED NAME', 'ACCENTED NAME'} i = 0 for n in names: i += 1 data = { 'pye_nm': n, 'sub_id': 9999999998999999970 + i, 'filing_form': 'F3' } insert = "INSERT INTO disclosure.fec_fitem_sched_f " + \ "(pye_nm, sub_id, filing_form) " + \ " VALUES (%(pye_nm)s, %(sub_id)s, %(filing_form)s)" connection.execute(insert, data) manage.refresh_materialized(concurrent=False) select = "SELECT * from disclosure.fec_fitem_sched_f " + \ "WHERE payee_name_text @@ to_tsquery('" + parse_fulltext('ÁCCENTED NAME') + "');" results = connection.execute(select).fetchall() pye_nm_list = {na[14] for na in results} assert (names.issubset(pye_nm_list)) select = "SELECT * from disclosure.fec_fitem_sched_f " + \ "WHERE payee_name_text @@ to_tsquery('" + parse_fulltext('ACCENTED NAME') + "');" results = connection.execute(select).fetchall() pye_nm_list = {na[14] for na in results} assert (names.issubset(pye_nm_list)) connection.close()
def test_accent_insensitive_sched_b(self): ''' Test to see that both accented and unaccented data are returned ''' connection = db.engine.connect() # each list value in the dict below has 3 "good" names, one "bad" name names = { 'ést-lou', 'Est lou', 'ést lóu', 'EST LOU', 'est lou', '@@@est lou---@' } i = 0 for n in names: i += 1 data = { 'recipient_nm': n, 'sub_id': 9999999999995699990 + i, 'filing_form': 'F3' } insert = "INSERT INTO disclosure.fec_fitem_sched_b " + \ "(recipient_nm, sub_id, filing_form) " + \ " VALUES (%(recipient_nm)s, %(sub_id)s, %(filing_form)s)" connection.execute(insert, data) manage.refresh_materialized(concurrent=False) results = self._results( api.url_for(ScheduleBView, contributor_employer='ést-lou')) print('results = ', results) contbr_employer_list = {r['recipient_name'] for r in results} assert (names.issubset(contbr_employer_list)) results = self._results( api.url_for(ScheduleBView, contributor_employer='est lou')) contbr_employer_list = {r['recipient_name'] for r in results} assert (names.issubset(contbr_employer_list)) connection.close()
def test_multiple_form_1s(self): first_f1 = { 'committee_id': 'C006', 'report_year': 2018, 'amendment_indicator': 'N', 'receipt_date': datetime.date(2018, 1, 31), 'file_number': 1111, 'amendment_chain': [1111], 'previous_file_number': 1111, 'most_recent_file_number': 3333, 'most_recent': False, 'report_type': None, 'form_type': 'F1' } second_f1 = { 'committee_id': 'C006', 'report_year': 2018, 'amendment_indicator': 'A', 'receipt_date': datetime.date(2018, 2, 28), 'file_number': 2222, 'amendment_chain': [1111, 2222], 'previous_file_number': 1111, 'most_recent_file_number': 3333, 'most_recent': False, 'report_type': None, 'form_type': 'F1' } third_f1 = { 'committee_id': 'C006', 'report_year': 2018, 'amendment_indicator': 'A', 'receipt_date': datetime.date(2018, 3, 28), 'file_number': 3333, 'amendment_chain': [1111, 2222, 3333], 'previous_file_number': 2222, 'most_recent_file_number': 3333, 'most_recent': True, 'report_type': None, 'form_type': 'F1' } unusual_entry_for_second_f1 = copy.deepcopy(second_f1) unusual_entry_for_second_f1['previous_file_number'] = unusual_entry_for_second_f1['file_number'] self.create_filing(1, first_f1) self.create_filing(2, unusual_entry_for_second_f1) self.create_filing(3, third_f1) # Refresh downstream `ofec_amendments_mv` and `ofec_filings_all_mv` manage.refresh_materialized(concurrent=False) results = self._results(rest.api.url_for(FilingsList, committee_id=first_f1['committee_id'])) for result in sorted(results, key=lambda x: x['file_number']): for filing in (first_f1, unusual_entry_for_second_f1, third_f1): if result['file_number'] == filing['file_number']: self.assert_filings_equal(result, filing) # Note: we're leaving data-entered previous_file_number alone assert result['previous_file_number'] == filing['previous_file_number']
def setUpClass(cls): rest.app.config['TESTING'] = True rest.app.config['SQLALCHEMY_DATABASE_URI'] = TEST_CONN rest.app.config['PRESERVE_CONTEXT_ON_EXCEPTION'] = False cls.app = rest.app.test_client() cls.client = TestApp(rest.app) cls.app_context = rest.app.app_context() cls.app_context.push() _reset_schema_for_integration() run_migrations() manage.refresh_materialized(concurrent=False)
def refresh(): """Update incremental aggregates and materialized views, then email logs to the development team. """ buffer = io.StringIO() with mail.CaptureLogs(manage.logger, buffer): try: manage.update_aggregates() manage.refresh_materialized() download.clear_bucket() except Exception as error: manage.logger.exception(error) try: mail.send_mail(buffer) except Exception as error: logger.exception(error)
def refresh(): """Update incremental aggregates, itemized schedules, materialized views, then slack a notification to the development team. """ manage.logger.info('Starting nightly refresh...') try: manage.refresh_itemized() manage.refresh_materialized() download.clear_bucket() slack_message = '*Success* nightly updates for {0} completed'.format(env.get_credential('NEW_RELIC_APP_NAME')) utils.post_to_slack(slack_message, '#bots') manage.logger.info(slack_message) except Exception as error: manage.logger.exception(error) slack_message = '*ERROR* nightly update failed for {0}. Check logs.'.format(env.get_credential('NEW_RELIC_APP_NAME')) utils.post_to_slack(slack_message, '#bots') manage.logger.exception(error)
def test_nulls_in_committee_history(self): self.insert_cmte_valid(self.committee_data) self.insert_cand_cmte_linkage(self.cand_cmte_linkage) self.insert_f_rpt_or_form_sub(self.f_rpt_or_form_sub_data) manage.refresh_materialized(concurrent=False) params_cmte = { 'committee_id': 'C001', } committee_api = self._results( rest.api.url_for(CommitteeHistoryView, **params_cmte)) self.check_nulls_in_array_column(committee_api, array_column='cycles') self.check_nulls_in_array_column(committee_api, array_column='cycles_has_activity') self.check_nulls_in_array_column(committee_api, array_column='cycles_has_financial')
def refresh_materialized_views(): """Update incremental aggregates, itemized schedules, materialized views, then slack a notification to the development team. """ manage.logger.info('Starting nightly refresh...') try: manage.refresh_materialized() download.clear_bucket() slack_message = '*Success* nightly updates for {0} completed'.format( get_app_name()) utils.post_to_slack(slack_message, '#bots') manage.logger.info(slack_message) except Exception as error: manage.logger.exception(error) slack_message = '*ERROR* nightly update failed for {0}. Check logs.'.format( get_app_name()) utils.post_to_slack(slack_message, '#bots')
def refresh(): """Update incremental aggregates and materialized views, then email logs to the development team. """ buffer = io.StringIO() with mail.CaptureLogs(manage.logger, buffer): try: manage.update_aggregates() manage.refresh_materialized() download.clear_bucket() legal_docs.index_advisory_opinions() legal_docs.load_advisory_opinions_into_s3() # TODO: needs to work with celery # legal_docs.load_current_murs() except Exception as error: manage.logger.exception(error) try: mail.send_mail(buffer) except Exception as error: logger.exception(error)
def test_schedule_a_contributor_occupation_text(self): ''' Test to see that contbr_occupation insert is parsed correctly and retrieved as expected from ts_vector column contributor_occupation_text ''' connection = db.engine.connect() # each list value in the dict below has 3 "good" names, one "bad" name names = { "Test.com": ['Test.com', 'Test com', 'Test .com', 'Test'], "Steven O'Reilly": [ "Steven O'Reilly", "Steven O' Reilly", "Steven O Reilly", "O'Reilly" ] } i = 0 for key in names: for n in names[key]: i += 1 data = { 'contbr_occupation': n, 'sub_id': 9999999999999999970 + i, 'filing_form': 'F3' } insert = "INSERT INTO disclosure.fec_fitem_sched_a " + \ "(contbr_occupation, sub_id, filing_form) " + \ " VALUES (%(contbr_occupation)s, %(sub_id)s, %(filing_form)s)" connection.execute(insert, data) manage.refresh_materialized(concurrent=False) select = "SELECT * from disclosure.fec_fitem_sched_a " + \ "WHERE contributor_occupation_text @@ to_tsquery('" + parse_fulltext(key) + "');" results = connection.execute(select).fetchall() contbr_occupation_list = [name[17] for name in results] #the only result not returned is the "bad" last element self.assertEquals( set(names[key]) - set(contbr_occupation_list), {names[key][-1]}) connection.close()
def test_schedule_b_exclude(self): ''' Test that for each set of names, searching by the parsed key returns all but the last result. This is a test of adding extra information to reduce undesired returns ''' connection = db.engine.connect() # each list value in the dict below has 3 "good" names, one "bad" name names = { "Test.com": ['Test.com', 'Test com', 'Test .com', 'Test'], "Steven O'Reilly": [ "Steven O'Reilly", "Steven O' Reilly", "Steven O Reilly", "O'Reilly" ] } i = 0 for key in names: for n in names[key]: i += 1 data = { 'recipient_nm': n, 'sub_id': 9999999999999999990 + i, 'filing_form': 'F3' } insert = "INSERT INTO disclosure.fec_fitem_sched_b " + \ "(recipient_nm, sub_id, filing_form) " + \ " VALUES (%(recipient_nm)s, %(sub_id)s, %(filing_form)s)" connection.execute(insert, data) manage.refresh_materialized(concurrent=False) select = "SELECT * from disclosure.fec_fitem_sched_b " + \ "WHERE recipient_name_text @@ to_tsquery('" + parse_fulltext(key) + "');" results = connection.execute(select).fetchall() recipient_nm_list = [name[2] for name in results] #the only result not returned is the "bad" last element self.assertEquals( set(names[key]) - set(recipient_nm_list), {names[key][-1]}) connection.close()
def setUpClass(cls): super(TestViews, cls).setUpClass() cls.NmlSchedAFactory, cls.NmlSchedBFactory, cls.FItemReceiptOrExp = make_factory( ) manage.refresh_materialized(concurrent=False)
def test_multiple_form_3s(self): form_3_q2_new = { 'committee_id': 'C006', 'report_year': 2018, 'amendment_indicator': 'N', 'receipt_date': datetime.date(2018, 7, 15), 'file_number': 20001, 'amendment_chain': [20001], 'previous_file_number': 20001, 'most_recent_file_number': 20003, 'most_recent': False, 'report_type': 'Q2', 'form_type': 'F3' } form_3_q2_amend_1 = { 'committee_id': 'C006', 'report_year': 2018, 'amendment_indicator': 'A', 'receipt_date': datetime.date(2018, 8, 15), 'file_number': 20002, 'amendment_chain': [20001, 20002], 'previous_file_number': 20001, 'most_recent_file_number': 20003, 'most_recent': False, 'report_type': 'Q2', 'form_type': 'F3' } form_3_q3_new = { 'committee_id': 'C006', 'report_year': 2018, 'amendment_indicator': 'N', 'receipt_date': datetime.date(2018, 10, 15), 'file_number': 30001, 'amendment_chain': [30001], 'previous_file_number': 30001, 'most_recent_file_number': 30002, 'most_recent': False, 'report_type': 'Q3', 'form_type': 'F3' } form_3_q3_amend_1 = { 'committee_id': 'C006', 'report_year': 2018, 'amendment_indicator': 'A', 'receipt_date': datetime.date(2018, 11, 15), 'file_number': 30002, 'amendment_chain': [30001, 30002], 'previous_file_number': 30001, 'most_recent_file_number': 30002, 'most_recent': True, 'report_type': 'Q3', 'form_type': 'F3' } form_3_q2_amend_2 = { 'committee_id': 'C006', 'report_year': 2018, 'amendment_indicator': 'A', 'receipt_date': datetime.date(2018, 12, 15), 'file_number': 20003, 'amendment_chain': [20001, 20002, 20003], 'previous_file_number': 20002, 'most_recent_file_number': 20003, 'most_recent': True, 'report_type': 'Q2', 'form_type': 'F3' } self.create_filing(1, form_3_q2_new) self.create_filing(2, form_3_q2_amend_1) self.create_filing(3, form_3_q3_new) self.create_filing(4, form_3_q3_amend_1) self.create_filing(5, form_3_q2_amend_2) # Refresh downstream `ofec_amendments_mv` and `ofec_filings_all_mv` manage.refresh_materialized(concurrent=False) q2_results = self._results(rest.api.url_for(FilingsList, committee_id=form_3_q2_new['committee_id'], report_type='Q2')) for result in q2_results: for filing in (form_3_q2_new, form_3_q2_amend_1, form_3_q2_amend_2): if result['file_number'] == filing['file_number']: self.assert_filings_equal(result, filing) q3_results = self._results(rest.api.url_for(FilingsList, committee_id=form_3_q3_new['committee_id'], report_type='Q3')) for result in q3_results: for filing in (form_3_q3_new, form_3_q3_amend_1): if result['file_number'] == filing['file_number']: self.assert_filings_equal(result, filing)
def refresh(): with manage.app.test_request_context(): manage.refresh_materialized()
def refresh(): with manage.app.test_request_context(): manage.update_aggregates() manage.refresh_materialized()
def setUpClass(cls): super(TestViews, cls).setUpClass() cls.NmlSchedAFactory, cls.NmlSchedBFactory, cls.FItemReceiptOrExp = make_factory() manage.refresh_materialized(concurrent=False)
def migrate_db(request): reset_schema() run_migrations() manage.refresh_materialized(concurrent=False)
def test_candidate_counts_house(self): """ Given base table candidate data, the list of candidates should be the same For /candidates/, /candidates/totals/, and /elections/ """ cand_valid_fec_yr_data = [ { 'cand_valid_yr_id': 1, 'cand_id': 'H1', 'fec_election_yr': 2020, 'cand_election_yr': 2020, 'cand_status': 'A', 'cand_office': 'H', 'cand_office_st': 'MD', 'cand_office_district': '01', 'date_entered': 'now()', }, { 'cand_valid_yr_id': 2, 'cand_id': 'H2', 'fec_election_yr': 2020, 'cand_election_yr': 2020, 'cand_status': 'A', 'cand_office': 'H', 'cand_office_st': 'MD', 'cand_office_district': '01', 'date_entered': 'now()', }, { 'cand_valid_yr_id': 3, 'cand_id': 'H3', 'fec_election_yr': 2020, 'cand_election_yr': 2020, 'cand_status': 'A', 'cand_office': 'H', 'cand_office_st': 'MD', 'cand_office_district': '01', 'date_entered': 'now()', }, ] election_year = 2020 self.create_cand_valid(cand_valid_fec_yr_data) cand_cmte_linkage_data = [ { 'linkage_id': 2, 'cand_id': 'H1', 'fec_election_yr': 2020, 'cand_election_yr': 2020, 'cmte_id': '2', 'cmte_count_cand_yr': 1, 'cmte_tp': 'H', 'cmte_dsgn': 'P', 'linkage_type': 'P', 'date_entered': 'now()', }, { 'linkage_id': 4, 'cand_id': 'H2', 'fec_election_yr': 2020, 'cand_election_yr': 2020, 'cmte_id': '3', 'cmte_count_cand_yr': 1, 'cmte_tp': 'H', 'cmte_dsgn': 'P', 'linkage_type': 'P', 'date_entered': 'now()', }, { 'linkage_id': 6, 'cand_id': 'H3', 'fec_election_yr': 2020, 'cand_election_yr': 2020, 'cmte_id': '3', 'cmte_count_cand_yr': 1, 'cmte_tp': 'H', 'cmte_dsgn': 'P', 'linkage_type': 'P', 'date_entered': 'now()', }, ] self.create_cand_cmte_linkage(cand_cmte_linkage_data) manage.refresh_materialized(concurrent=False) sql_extract = ("SELECT * from disclosure.cand_valid_fec_yr " + "WHERE cand_election_yr in ({}, {})".format( election_year - 1, election_year)) results_tab = self.connection.execute(sql_extract).fetchall() params = { 'election_year': election_year, 'cycle': election_year, 'election_full': True, 'district': '01', 'state': 'MD', } candidates_api = self._results( rest.api.url_for(CandidateList, **params)) candidates_totals_api = self._results( rest.api.url_for(TotalsCandidateView, **params)) elections_api = self._results( rest.api.url_for(ElectionView, office='house', **params)) assert (len(results_tab) == len(candidates_api) == len(candidates_totals_api) == len(elections_api))