def test_schedule_f_payee_name_text_accent(self): ''' Test to see that pye_nm is parsed correctly and retrieved as expected from ts_vector column payee_name_text ''' connection = db.engine.connect() names = {'ÁCCENTED NAME', 'ACCENTED NAME'} i = 0 for n in names: i += 1 data = { 'pye_nm': n, 'sub_id': 9999999998999999970 + i, 'filing_form': 'F3' } insert = "INSERT INTO disclosure.fec_fitem_sched_f " + \ "(pye_nm, sub_id, filing_form) " + \ " VALUES (%(pye_nm)s, %(sub_id)s, %(filing_form)s)" connection.execute(insert, data) manage.refresh_materialized(concurrent=False) select = "SELECT * from disclosure.fec_fitem_sched_f " + \ "WHERE payee_name_text @@ to_tsquery('" + parse_fulltext('ÁCCENTED NAME') + "');" results = connection.execute(select).fetchall() pye_nm_list = {na[14] for na in results} assert (names.issubset(pye_nm_list)) select = "SELECT * from disclosure.fec_fitem_sched_f " + \ "WHERE payee_name_text @@ to_tsquery('" + parse_fulltext('ACCENTED NAME') + "');" results = connection.execute(select).fetchall() pye_nm_list = {na[14] for na in results} assert (names.issubset(pye_nm_list)) connection.close()
def test_schedule_f_payee_name_text(self): ''' Test to see that pye_nm is parsed correctly and retrieved as expected from ts_vector column payee_name_text ''' connection = db.engine.connect() name = "O'Reilly" names_good = {'O Reilly', "O'Reilly", 'O.Reilly', 'O-Reilly'} names_bad = {'O', "O'Hare", "Reilly"} i = 0 for n in names_good.union(names_bad): i += 1 data = { 'pye_nm': n, 'sub_id': 9999999999999999970 + i, 'filing_form': 'F3' } insert = "INSERT INTO disclosure.fec_fitem_sched_f " + \ "(pye_nm, sub_id, filing_form) " + \ " VALUES (%(pye_nm)s, %(sub_id)s, %(filing_form)s)" connection.execute(insert, data) manage.refresh_materialized(concurrent=False) select = "SELECT * from disclosure.fec_fitem_sched_f " + \ "WHERE payee_name_text @@ to_tsquery('" + parse_fulltext(name) + "');" results = connection.execute(select).fetchall() pye_nm_list = {na[14] for na in results} #assert all good names in result set assert (names_good.issubset(pye_nm_list)) #assert no bad names in result set assert (names_bad.isdisjoint(pye_nm_list)) connection.close()
def filter_fulltext(query, kwargs, fields): for key, column in fields: if kwargs.get(key): filters = [ column.match(utils.parse_fulltext(value)) for value in kwargs[key] ] query = query.filter(sa.or_(*filters)) return query
def filter_fulltext(query, kwargs, fields): for key, column in fields: if kwargs.get(key): exclude_list = build_exclude_list(kwargs.get(key)) include_list = build_include_list(kwargs.get(key)) if exclude_list: filters = [ sa.not_(column.match(utils.parse_fulltext(value))) for value in exclude_list ] query = query.filter(sa.and_(*filters)) if include_list: filters = [ column.match(utils.parse_fulltext(value)) for value in include_list ] query = query.filter(sa.or_(*filters)) return query
def filter_fulltext(query, kwargs, fields): for key, column in fields: if kwargs.get(key): exclude_list = [parse_exclude_arg(value) for value in kwargs[key] if is_exclude_arg(value)] include_list = [value for value in kwargs[key] if not is_exclude_arg(value)] if exclude_list: filters = [ sa.not_(column.match(utils.parse_fulltext(value))) for value in exclude_list ] query = query.filter(sa.and_(*filters)) if include_list: filters = [ column.match(utils.parse_fulltext(value)) for value in include_list ] query = query.filter(sa.or_(*filters)) return query
def test_schedule_a_contributor_occupation_text(self): ''' Test to see that contbr_occupation insert is parsed correctly and retrieved as expected from ts_vector column contributor_occupation_text ''' connection = db.engine.connect() # each list value in the dict below has 3 "good" names, one "bad" name names = { "Test.com": ['Test.com', 'Test com', 'Test .com', 'Test'], "Steven O'Reilly": [ "Steven O'Reilly", "Steven O' Reilly", "Steven O Reilly", "O'Reilly" ] } i = 0 for key in names: for n in names[key]: i += 1 data = { 'contbr_occupation': n, 'sub_id': 9999999999999999970 + i, 'filing_form': 'F3' } insert = "INSERT INTO disclosure.fec_fitem_sched_a " + \ "(contbr_occupation, sub_id, filing_form) " + \ " VALUES (%(contbr_occupation)s, %(sub_id)s, %(filing_form)s)" connection.execute(insert, data) manage.refresh_materialized(concurrent=False) select = "SELECT * from disclosure.fec_fitem_sched_a " + \ "WHERE contributor_occupation_text @@ to_tsquery('" + parse_fulltext(key) + "');" results = connection.execute(select).fetchall() contbr_occupation_list = [name[17] for name in results] #the only result not returned is the "bad" last element self.assertEquals( set(names[key]) - set(contbr_occupation_list), {names[key][-1]}) connection.close()
def test_schedule_b_exclude(self): ''' Test that for each set of names, searching by the parsed key returns all but the last result. This is a test of adding extra information to reduce undesired returns ''' connection = db.engine.connect() # each list value in the dict below has 3 "good" names, one "bad" name names = { "Test.com": ['Test.com', 'Test com', 'Test .com', 'Test'], "Steven O'Reilly": [ "Steven O'Reilly", "Steven O' Reilly", "Steven O Reilly", "O'Reilly" ] } i = 0 for key in names: for n in names[key]: i += 1 data = { 'recipient_nm': n, 'sub_id': 9999999999999999990 + i, 'filing_form': 'F3' } insert = "INSERT INTO disclosure.fec_fitem_sched_b " + \ "(recipient_nm, sub_id, filing_form) " + \ " VALUES (%(recipient_nm)s, %(sub_id)s, %(filing_form)s)" connection.execute(insert, data) manage.refresh_materialized(concurrent=False) select = "SELECT * from disclosure.fec_fitem_sched_b " + \ "WHERE recipient_name_text @@ to_tsquery('" + parse_fulltext(key) + "');" results = connection.execute(select).fetchall() recipient_nm_list = [name[2] for name in results] #the only result not returned is the "bad" last element self.assertEquals( set(names[key]) - set(recipient_nm_list), {names[key][-1]}) connection.close()