def test3_7_multifield(first_names): # GIVEN a table containing three headers similar to 'name'... path = first_names.path # WHEN user extracts these columns into a single multifield... min_ratio = 0.3 fields = [ 'id', FieldPattern('name', multifield=True, min_ratio=min_ratio), ] ft = FuzzyTable( path=path, approximate_match=True, fields=fields, header_row_seek=True, ) # THEN both fields are extracted. actual_fieldnames = list(ft.keys()) expected_fieldnames = 'id name'.split() assert actual_fieldnames == expected_fieldnames # THEN the 'name' field contains three subfields. # namefield = ft.get_field('name') name_field: datamodel.MultiField = ft.get_field('name') actual_name_count = len(name_field.subfields) expected_name_count = 3 assert actual_name_count == expected_name_count # THEN the 'name' field's last column is 4: actual_namefield_finalcol = name_field.col_num_last expected_namefield_finalcol = 4 assert actual_namefield_finalcol == expected_namefield_finalcol # THEN the 'name' multifield's data can be accessed as a dict: actual_firstrow_names = name_field[0] expected_firstrow_names = tuple('frank susan james'.split()) assert actual_firstrow_names == expected_firstrow_names assert actual_firstrow_names == name_field.data[0] # THEN the 'id' singlefield's data can be accessed as a dict: id_field = ft.get_field('id') actual_firstrow_id = id_field[0] expected_firstrow_id = 0 assert actual_firstrow_id == expected_firstrow_id # THEN the len of both fields are equal len_id = len(id_field) len_name = len(name_field) assert len_id == len_name == 3 assert name_field.header == ('name 2', 'name 1', 'name 3') assert name_field.ratio >= min_ratio
def test_user_generated_fieldpatterns(firstlastnames): # GIVEN a set of user-generated fieldpatterns... fields = [ FieldPattern( name='something totally different', alias='first name', approximate_match=True, ), FieldPattern( name='last_name', alias=['last name', 'LastName'], ) ] # WHEN they are passed to FuzzyTable... names = FuzzyTable( path=firstlastnames.path, fields=fields, header_row_seek=True, name='names', ) # THEN the same two subfields are found. actual_field_count = len(names) expected_field_count = len(firstlastnames.fields) assert actual_field_count == expected_field_count
def test3_6_compare_fieldnames(first_names): # GIVEN a table whose headers are NOT in row 1... kwargs = { 'path': first_names.path, 'header_row_seek': True, 'fields': first_names.fieldnames, } # WHEN user seeks header row... ft = FuzzyTable(**kwargs) # THEN all desired field_names are extracted. actual_fieldnames = list(ft.keys()) expected_fieldnames = first_names.fieldnames assert actual_fieldnames == expected_fieldnames
def ft_dr_who(field_names): path = _get_test_path('csv') return FuzzyTable( path=path, header_row_seek=True, fields=field_names, )
def test3_9_fuzzytable_invalidmode(first_names): # WHEN FuzzyTable rcv invalid mode argument.... mode = 'this is an invalid mode!' # THEN raise ModeError. with pytest.raises(exceptions.ModeError): FuzzyTable(path=first_names.path, mode=mode)
def test_10_1_fieldpatternerror(): with pytest.raises(exceptions.InvalidFieldError): FuzzyTable( name='does not matter', path='also does not matter', fields= 42, # This raises and error since it's neither string nor FieldPattern )
def test3_8_fuzzytableproperties(first_names): # GIVEN a default fuzzytable call... ft = FuzzyTable(path=first_names.path) # THEN min_ratio and mode properties return appropriate defaults. assert ft.min_ratio == 0.6 assert ft.mode == 'exact' assert ft.case_sensitive == True
def test_missingfielderror(firstlastnames, kwargs): with pytest.raises(MissingFieldError): FuzzyTable( path=firstlastnames.path, fields='first_name last_name middle_name'.split(), missingfieldserror_active=True, **kwargs, )
def test_10_2_seek_but_no_fields(get_test_path, header_row_seek, fields): # GIVEN a table whose headers are NOT in row 1... path = get_test_path('csv') # WHEN user seeks header row without supplying needed or correct field_names... with pytest.raises(exceptions.InvalidFieldError): FuzzyTable( path=path, header_row_seek=header_row_seek, fields=fields, )
def test_3_4_seek_single_field(get_test_path, kwargs): # GIVEN a table whose headers are NOT in row 1... path = get_test_path('csv') # WHEN user seeks header row and supplies single field_names... ft = FuzzyTable( path=path, header_row_seek=True, **kwargs, ) # THEN nothing breaks assert ft.fields[0].name == 'first_name'
def test_seek_too_few_rows(firstlastnames_startrow4, kwargs, expected_fieldcount): # WHEN user seeks table in too few rows... ft = FuzzyTable( path=firstlastnames_startrow4.path, fields=firstlastnames_startrow4.fields.keys(), **kwargs, ) # THEN no field_names are extracted. actual_fieldcount = len(ft) assert actual_fieldcount == expected_fieldcount # ALSO print(ft)
def test_10_4_casesensitive(firstlastnames): # GIVEN a table with headers 'first_name' and 'last_name'... path = firstlastnames.path # WHEN doing exact, but case-insensitive header search... expected_fieldnames = 'FIRST_NAME LAST_NAME'.split() ft = FuzzyTable( path=path, fields=expected_fieldnames, case_sensitive=False, ) # THEN those fields are successfully found actual_fieldnames = [field.name for field in ft.fields] assert actual_fieldnames == expected_fieldnames
def test_7_1_approx_names(firstlastnames, min_ratio, expected_fieldcount): # GIVEN a table with headers 'first_name' and 'last_name'... path = firstlastnames.path # WHEN the user desires the following slightly different subfields... fields = ['first_name', 'given name', 'twas the night before christmas'] # THEN the first name always matches; last name depends on the min_ratio ft = FuzzyTable( path=path, fields=fields, header_row_seek=True, name='names', approximate_match=True, min_ratio=min_ratio, ) actual_field_count = len(ft.fields) assert actual_field_count == expected_fieldcount
def test_header_row_errors(get_test_path, dr_who_fields, header_row): header_error: HeaderError # GIVEN a table whose headers are NOT in row 1... path = get_test_path('csv') # WHEN user gives an invalid header_row value, # regardless of the bool value of header_row_seek... fields = dr_who_fields.keys() try: FuzzyTable( path=path, fields=fields, header_row=header_row, ) # THEN InvalidRowError is raised. except exceptions.InvalidRowError: assert True else: assert False
def test_10_3_searchterms_excludename(searchterms_excludename, expected_matchedheader, firstlastnames): # GIVEN a table with headers 'first_name' and 'last_name'... path = firstlastnames.path # WHEN user seeks header row without supplying needed or correct field_names... field = FieldPattern( name='first_name', alias='last_name', searchterms_excludename=searchterms_excludename, ) ft = FuzzyTable( path=path, fields=field, ) actual_matchedheader = ft.fields[0].header assert actual_matchedheader == expected_matchedheader
def execute(self) -> Dict[utils.ApplicantType, List[Dict]]: # For this to work, there needs to be one excel workbook with the following worksheets: # mentor # mentee # favor # --- get applications from excel ------------------------------------- all_applications: Dict[utils.ApplicantType, List[Dict]] = {} for applicant_type, fieldpatterns in fieldschemas.items(): try: applications = FuzzyTable( path=self._path, sheetname=applicant_type.name.lower(), fields=fieldpatterns, header_row=1, name=applicant_type.name, missingfieldserror_active=True, ) except fe.MissingFieldError as e: # pragma: no cover msg = str(e) + "/nMake sure your headers are in row 1." raise MentormatchError(msg) except fe.FuzzyTableError as e: # pragma: no cover raise MentormatchError(str(e)) application_list = [] locs_and_genders = utils.ApplicationSchema.get_locations_and_genders( ) for record in applications.records: application = dict(record) application.update( {val.get_preference_key(): [] for val in utils.YesNoMaybe}) for loc_or_gender in locs_and_genders: # e.g. 'horsham' pref_str = application.pop(loc_or_gender) # e.g. 'no' pref_key = utils.YesNoMaybe.get_enum( pref_str).get_preference_key() # e.g. 'preference_no' application[pref_key].append(loc_or_gender) application_list.append(application) all_applications[applicant_type] = application_list # --- get "favored" status for mentees -------------------------------- try: favored_mentees = FuzzyTable( path=self._path, sheetname='favor', fields=favor, name='favored_mentees', approximate_match=False, missingfieldserror_active=True, ) except fe.FuzzyTableError as e: # pragma: no cover raise MentormatchError(str(e)) favored_mentees = { mentee['wwid']: mentee['favor'] for mentee in favored_mentees.records } for mentee in all_applications[utils.ApplicantType.MENTEE]: wwid = mentee['wwid'] favor_val = favored_mentees.get(wwid, 0) mentee['favor'] = favor_val # --- return applications --------------------------------------------- return all_applications