Python FuzzyTable示例，fuzzytable.FuzzyTable Python示例

示例#1

0

显示文件

文件： test_3_csv.py 项目： jonathanchukinas/fuzzytable

def test3_7_multifield(first_names):

    # GIVEN a table containing three headers similar to 'name'...
    path = first_names.path

    # WHEN user extracts these columns into a single multifield...
    min_ratio = 0.3
    fields = [
        'id',
        FieldPattern('name', multifield=True, min_ratio=min_ratio),
    ]
    ft = FuzzyTable(
        path=path,
        approximate_match=True,
        fields=fields,
        header_row_seek=True,
    )

    # THEN both fields are extracted.
    actual_fieldnames = list(ft.keys())
    expected_fieldnames = 'id name'.split()
    assert actual_fieldnames == expected_fieldnames

    # THEN the 'name' field contains three subfields.
    # namefield = ft.get_field('name')
    name_field: datamodel.MultiField = ft.get_field('name')
    actual_name_count = len(name_field.subfields)
    expected_name_count = 3
    assert actual_name_count == expected_name_count

    # THEN the 'name' field's last column is 4:
    actual_namefield_finalcol = name_field.col_num_last
    expected_namefield_finalcol = 4
    assert actual_namefield_finalcol == expected_namefield_finalcol

    # THEN the 'name' multifield's data can be accessed as a dict:
    actual_firstrow_names = name_field[0]
    expected_firstrow_names = tuple('frank susan james'.split())
    assert actual_firstrow_names == expected_firstrow_names
    assert actual_firstrow_names == name_field.data[0]

    # THEN the 'id' singlefield's data can be accessed as a dict:
    id_field = ft.get_field('id')
    actual_firstrow_id = id_field[0]
    expected_firstrow_id = 0
    assert actual_firstrow_id == expected_firstrow_id

    # THEN the len of both fields are equal
    len_id = len(id_field)
    len_name = len(name_field)
    assert len_id == len_name == 3

    assert name_field.header == ('name 2', 'name 1', 'name 3')

    assert name_field.ratio >= min_ratio

示例#2

0

显示文件

文件： test_3_csv.py 项目： jonathanchukinas/fuzzytable

def test_user_generated_fieldpatterns(firstlastnames):

    # GIVEN a set of user-generated fieldpatterns...
    fields = [
        FieldPattern(
            name='something totally different',
            alias='first name',
            approximate_match=True,
        ),
        FieldPattern(
            name='last_name',
            alias=['last name', 'LastName'],
        )
    ]

    # WHEN they are passed to FuzzyTable...
    names = FuzzyTable(
        path=firstlastnames.path,
        fields=fields,
        header_row_seek=True,
        name='names',
    )

    # THEN the same two subfields are found.
    actual_field_count = len(names)
    expected_field_count = len(firstlastnames.fields)
    assert actual_field_count == expected_field_count

示例#3

0

显示文件

文件： test_3_csv.py 项目： jonathanchukinas/fuzzytable

def test3_6_compare_fieldnames(first_names):

    # GIVEN a table whose headers are NOT in row 1...
    kwargs = {
        'path': first_names.path,
        'header_row_seek': True,
        'fields': first_names.fieldnames,
    }

    # WHEN user seeks header row...
    ft = FuzzyTable(**kwargs)

    # THEN all desired field_names are extracted.
    actual_fieldnames = list(ft.keys())
    expected_fieldnames = first_names.fieldnames
    assert actual_fieldnames == expected_fieldnames

示例#4

0

显示文件

def ft_dr_who(field_names):
    path = _get_test_path('csv')
    return FuzzyTable(
        path=path,
        header_row_seek=True,
        fields=field_names,
    )

示例#5

0

显示文件

文件： test_3_csv.py 项目： jonathanchukinas/fuzzytable

def test3_9_fuzzytable_invalidmode(first_names):

    # WHEN FuzzyTable rcv invalid mode argument....
    mode = 'this is an invalid mode!'

    # THEN raise ModeError.
    with pytest.raises(exceptions.ModeError):
        FuzzyTable(path=first_names.path, mode=mode)

示例#6

0

显示文件

文件： test_v010_fieldpatterns.py 项目： jonathanchukinas/fuzzytable

def test_10_1_fieldpatternerror():
    with pytest.raises(exceptions.InvalidFieldError):
        FuzzyTable(
            name='does not matter',
            path='also does not matter',
            fields=
            42,  # This raises and error since it's neither string nor FieldPattern
        )

示例#7

0

显示文件

文件： test_3_csv.py 项目： jonathanchukinas/fuzzytable

def test3_8_fuzzytableproperties(first_names):

    # GIVEN a default fuzzytable call...
    ft = FuzzyTable(path=first_names.path)

    # THEN  min_ratio and mode properties return appropriate defaults.
    assert ft.min_ratio == 0.6
    assert ft.mode == 'exact'
    assert ft.case_sensitive == True

示例#8

0

显示文件

def test_missingfielderror(firstlastnames, kwargs):

    with pytest.raises(MissingFieldError):
        FuzzyTable(
            path=firstlastnames.path,
            fields='first_name last_name middle_name'.split(),
            missingfieldserror_active=True,
            **kwargs,
        )

示例#9

0

显示文件

文件： test_v010_fieldpatterns.py 项目： jonathanchukinas/fuzzytable

def test_10_2_seek_but_no_fields(get_test_path, header_row_seek, fields):

    # GIVEN a table whose headers are NOT in row 1...
    path = get_test_path('csv')

    # WHEN user seeks header row without supplying needed or correct field_names...
    with pytest.raises(exceptions.InvalidFieldError):
        FuzzyTable(
            path=path,
            header_row_seek=header_row_seek,
            fields=fields,
        )

示例#10

0

显示文件

文件： test_3_csv.py 项目： jonathanchukinas/fuzzytable

def test_3_4_seek_single_field(get_test_path, kwargs):

    # GIVEN a table whose headers are NOT in row 1...
    path = get_test_path('csv')

    # WHEN user seeks header row and supplies single field_names...
    ft = FuzzyTable(
        path=path,
        header_row_seek=True,
        **kwargs,
    )

    # THEN nothing breaks
    assert ft.fields[0].name == 'first_name'

示例#11

0

显示文件

文件： test_3_csv.py 项目： jonathanchukinas/fuzzytable

def test_seek_too_few_rows(firstlastnames_startrow4, kwargs,
                           expected_fieldcount):
    # WHEN user seeks table in too few rows...
    ft = FuzzyTable(
        path=firstlastnames_startrow4.path,
        fields=firstlastnames_startrow4.fields.keys(),
        **kwargs,
    )

    # THEN no field_names are extracted.
    actual_fieldcount = len(ft)
    assert actual_fieldcount == expected_fieldcount

    # ALSO
    print(ft)

示例#12

0

显示文件

文件： test_v010_fieldpatterns.py 项目： jonathanchukinas/fuzzytable

def test_10_4_casesensitive(firstlastnames):

    # GIVEN a table with headers 'first_name' and 'last_name'...
    path = firstlastnames.path

    # WHEN doing exact, but case-insensitive header search...
    expected_fieldnames = 'FIRST_NAME LAST_NAME'.split()
    ft = FuzzyTable(
        path=path,
        fields=expected_fieldnames,
        case_sensitive=False,
    )

    # THEN those fields are successfully found
    actual_fieldnames = [field.name for field in ft.fields]

    assert actual_fieldnames == expected_fieldnames

示例#13

0

显示文件

文件： test_7_fuzzymatching.py 项目： jonathanchukinas/fuzzytable

def test_7_1_approx_names(firstlastnames, min_ratio, expected_fieldcount):

    # GIVEN a table with headers 'first_name' and 'last_name'...
    path = firstlastnames.path

    # WHEN the user desires the following slightly different subfields...
    fields = ['first_name', 'given name', 'twas the night before christmas']

    # THEN the first name always matches; last name depends on the min_ratio
    ft = FuzzyTable(
        path=path,
        fields=fields,
        header_row_seek=True,
        name='names',
        approximate_match=True,
        min_ratio=min_ratio,
    )
    actual_field_count = len(ft.fields)
    assert actual_field_count == expected_fieldcount

示例#14

0

显示文件

文件： test_3_csv.py 项目： jonathanchukinas/fuzzytable

def test_header_row_errors(get_test_path, dr_who_fields, header_row):
    header_error: HeaderError

    # GIVEN a table whose headers are NOT in row 1...
    path = get_test_path('csv')

    # WHEN user gives an invalid header_row value,
    # regardless of the bool value of header_row_seek...
    fields = dr_who_fields.keys()
    try:
        FuzzyTable(
            path=path,
            fields=fields,
            header_row=header_row,
        )

    # THEN InvalidRowError is raised.
    except exceptions.InvalidRowError:
        assert True
    else:
        assert False

示例#15

0

显示文件

文件： test_v010_fieldpatterns.py 项目： jonathanchukinas/fuzzytable

def test_10_3_searchterms_excludename(searchterms_excludename,
                                      expected_matchedheader, firstlastnames):

    # GIVEN a table with headers 'first_name' and 'last_name'...
    path = firstlastnames.path

    # WHEN user seeks header row without supplying needed or correct field_names...
    field = FieldPattern(
        name='first_name',
        alias='last_name',
        searchterms_excludename=searchterms_excludename,
    )

    ft = FuzzyTable(
        path=path,
        fields=field,
    )

    actual_matchedheader = ft.fields[0].header

    assert actual_matchedheader == expected_matchedheader

示例#16

0

显示文件

文件： importer_implementation_excel.py 项目： jonathanchukinas/mentormatch

    def execute(self) -> Dict[utils.ApplicantType, List[Dict]]:

        # For this to work, there needs to be one excel workbook with the following worksheets:
        # mentor
        # mentee
        # favor

        # --- get applications from excel -------------------------------------
        all_applications: Dict[utils.ApplicantType, List[Dict]] = {}
        for applicant_type, fieldpatterns in fieldschemas.items():
            try:
                applications = FuzzyTable(
                    path=self._path,
                    sheetname=applicant_type.name.lower(),
                    fields=fieldpatterns,
                    header_row=1,
                    name=applicant_type.name,
                    missingfieldserror_active=True,
                )
            except fe.MissingFieldError as e:  # pragma: no cover
                msg = str(e) + "/nMake sure your headers are in row 1."
                raise MentormatchError(msg)
            except fe.FuzzyTableError as e:  # pragma: no cover
                raise MentormatchError(str(e))
            application_list = []
            locs_and_genders = utils.ApplicationSchema.get_locations_and_genders(
            )
            for record in applications.records:
                application = dict(record)
                application.update(
                    {val.get_preference_key(): []
                     for val in utils.YesNoMaybe})
                for loc_or_gender in locs_and_genders:  # e.g. 'horsham'
                    pref_str = application.pop(loc_or_gender)  # e.g. 'no'
                    pref_key = utils.YesNoMaybe.get_enum(
                        pref_str).get_preference_key()  # e.g. 'preference_no'
                    application[pref_key].append(loc_or_gender)
                application_list.append(application)
            all_applications[applicant_type] = application_list

        # --- get "favored" status for mentees --------------------------------
        try:
            favored_mentees = FuzzyTable(
                path=self._path,
                sheetname='favor',
                fields=favor,
                name='favored_mentees',
                approximate_match=False,
                missingfieldserror_active=True,
            )
        except fe.FuzzyTableError as e:  # pragma: no cover
            raise MentormatchError(str(e))
        favored_mentees = {
            mentee['wwid']: mentee['favor']
            for mentee in favored_mentees.records
        }
        for mentee in all_applications[utils.ApplicantType.MENTEE]:
            wwid = mentee['wwid']
            favor_val = favored_mentees.get(wwid, 0)
            mentee['favor'] = favor_val

        # --- return applications ---------------------------------------------
        return all_applications