def test_fuzzy_matcher_run_on_feat_flag_enabled(enable_fuzzy_matcher): data = {} extra_data = {} obj = MockObj(data, extra_data) eng = MockEng() with patch('inspirehep.modules.workflows.tasks.matching.match') as match: fuzzy_match(obj, eng) match.assert_called()
def test_fuzzy_matcher_run_on_feat_flag_enabled(enable_fuzzy_matcher): data = {} extra_data = {} obj = MockObj(data, extra_data) eng = MockEng() with patch('inspirehep.modules.workflows.tasks.matching.match') as match: fuzzy_match(obj, eng) match.assert_called()
def test_fuzzy_match_returns_true_if_something_matched_with_publication_info( mock_match, enable_fuzzy_matcher): schema = load_schema('hep') publication_info_schema = schema['properties']['publication_info'] titles_schema = schema['properties']['titles'] matched_record = { 'control_number': 1472986, 'titles': [ { 'title': 'title', }, ], 'publication_info': [ { 'artid': '054021', 'journal_issue': '5', 'journal_title': 'Phys.Rev.D', 'journal_volume': '94', 'pubinfo_freetext': 'Phys. Rev. D94 (2016) 054021', 'year': 2016 }, ], } assert validate(matched_record['titles'], titles_schema) is None assert validate(matched_record['publication_info'], publication_info_schema) is None mock_match.return_value = iter([{'_source': matched_record}]) data = {} extra_data = {} obj = MockObj(data, extra_data) eng = MockEng() assert fuzzy_match(obj, eng) assert 'matches' in obj.extra_data expected = [{ 'control_number': 1472986, 'title': 'title', 'publication_info': [ { 'artid': '054021', 'journal_issue': '5', 'journal_title': 'Phys.Rev.D', 'journal_volume': '94', 'pubinfo_freetext': 'Phys. Rev. D94 (2016) 054021', 'year': 2016 }, ], }] result = get_value(obj.extra_data, 'matches.fuzzy') assert expected == result
def test_fuzzy_match_returns_true_if_something_matched_without_abstracts( mock_match, enable_fuzzy_matcher): schema = load_schema('hep') titles_schema = schema['properties']['titles'] matched_record = { 'control_number': 4328, 'titles': [ { 'title': 'title', }, ], } assert validate(matched_record['titles'], titles_schema) is None mock_match.return_value = iter([{'_source': matched_record}]) data = {} extra_data = {} obj = MockObj(data, extra_data) eng = MockEng() assert fuzzy_match(obj, eng) assert 'matches' in obj.extra_data expected = [{ 'control_number': 4328, 'title': 'title', }] result = get_value(obj.extra_data, 'matches.fuzzy') assert expected == result
def test_fuzzy_match_returns_true_if_something_matched_with_earliest_date(mock_match, enable_fuzzy_matcher): schema = load_schema('hep') titles_schema = schema['properties']['titles'] matched_record = { 'control_number': 1472986, 'titles': [ { 'title': 'title', }, ], 'earliest_date': '2016-06-29', } assert validate(matched_record['titles'], titles_schema) is None mock_match.return_value = iter([{'_source': matched_record}]) data = {} extra_data = {} obj = MockObj(data, extra_data) eng = MockEng() assert fuzzy_match(obj, eng) assert 'matches' in obj.extra_data expected = [{ 'control_number': 1472986, 'title': 'title', 'earliest_date': '2016-06-29', }] result = get_value(obj.extra_data, 'matches.fuzzy') assert expected == result
def test_fuzzy_match_returns_true_if_something_matched_with_more_than_1_public_notes( mock_match, enable_fuzzy_matcher): schema = load_schema('hep') public_notes_schema = schema['properties']['public_notes'] titles_schema = schema['properties']['titles'] matched_record = { 'control_number': 1472986, 'titles': [ { 'title': 'title', }, ], 'public_notes': [ { 'source': 'arXiv', 'value': '4 pages, 4 figures', }, { 'source': 'arXiv', 'value': 'Some other public note', }, ], } assert validate(matched_record['titles'], titles_schema) is None assert validate(matched_record['public_notes'], public_notes_schema) is None mock_match.return_value = iter([{'_source': matched_record}]) data = {} extra_data = {} obj = MockObj(data, extra_data) eng = MockEng() assert fuzzy_match(obj, eng) assert 'matches' in obj.extra_data expected = [{ 'control_number': 1472986, 'title': 'title', 'public_notes': [ { 'value': '4 pages, 4 figures' }, { 'value': 'Some other public note' }, ], }] result = get_value(obj.extra_data, 'matches.fuzzy') assert expected == result
def test_fuzzy_match_returns_true_if_something_matched_with_publication_info(mock_match, enable_fuzzy_matcher): schema = load_schema('hep') publication_info_schema = schema['properties']['publication_info'] titles_schema = schema['properties']['titles'] matched_record = { 'control_number': 1472986, 'titles': [ { 'title': 'title', }, ], 'publication_info': [ { 'artid': '054021', 'journal_issue': '5', 'journal_title': 'Phys.Rev.D', 'journal_volume': '94', 'pubinfo_freetext': 'Phys. Rev. D94 (2016) 054021', 'year': 2016 }, ], } assert validate(matched_record['titles'], titles_schema) is None assert validate(matched_record['publication_info'], publication_info_schema) is None mock_match.return_value = iter([{'_source': matched_record}]) data = {} extra_data = {} obj = MockObj(data, extra_data) eng = MockEng() assert fuzzy_match(obj, eng) assert 'matches' in obj.extra_data expected = [{ 'control_number': 1472986, 'title': 'title', 'publication_info': [ { 'artid': '054021', 'journal_issue': '5', 'journal_title': 'Phys.Rev.D', 'journal_volume': '94', 'pubinfo_freetext': 'Phys. Rev. D94 (2016) 054021', 'year': 2016 }, ], }] result = get_value(obj.extra_data, 'matches.fuzzy') assert expected == result
def test_fuzzy_match_returns_true_if_something_matched_with_more_than_1_public_notes(mock_match, enable_fuzzy_matcher): schema = load_schema('hep') public_notes_schema = schema['properties']['public_notes'] titles_schema = schema['properties']['titles'] matched_record = { 'control_number': 1472986, 'titles': [ { 'title': 'title', }, ], 'public_notes': [ { 'source': 'arXiv', 'value': '4 pages, 4 figures', }, { 'source': 'arXiv', 'value': 'Some other public note', }, ], } assert validate(matched_record['titles'], titles_schema) is None assert validate(matched_record['public_notes'], public_notes_schema) is None mock_match.return_value = iter([{'_source': matched_record}]) data = {} extra_data = {} obj = MockObj(data, extra_data) eng = MockEng() assert fuzzy_match(obj, eng) assert 'matches' in obj.extra_data expected = [{ 'control_number': 1472986, 'title': 'title', 'public_notes': [ {'value': '4 pages, 4 figures'}, {'value': 'Some other public note'}, ], }] result = get_value(obj.extra_data, 'matches.fuzzy') assert expected == result
def test_fuzzy_match_returns_false_if_nothing_matched(mock_match, enable_fuzzy_matcher): mock_match.return_value = iter([]) data = {} extra_data = {} obj = MockObj(data, extra_data) eng = MockEng() assert not fuzzy_match(obj, eng) assert 'matches' in obj.extra_data expected = [] result = get_value(obj.extra_data, 'matches.fuzzy') assert expected == result
def test_fuzzy_match_returns_false_if_nothing_matched(mock_match, enable_fuzzy_matcher): mock_match.return_value = iter([]) data = {} extra_data = {} obj = MockObj(data, extra_data) eng = MockEng() assert not fuzzy_match(obj, eng) assert 'matches' in obj.extra_data expected = [] result = get_value(obj.extra_data, 'matches.fuzzy') assert expected == result
def test_fuzzy_match_returns_true_if_something_matched_with_1_author(mock_match, enable_fuzzy_matcher): schema = load_schema('hep') authors_schema = schema['properties']['authors'] titles_schema = schema['properties']['titles'] matched_record = { 'control_number': 4328, 'titles': [ { 'title': 'title', }, ], 'authors': [ { 'full_name': 'Author 1' }, ], 'authors_count': 1 } assert validate(matched_record['titles'], titles_schema) is None assert validate(matched_record['authors'], authors_schema) is None mock_match.return_value = iter([{'_source': matched_record}]) data = {} extra_data = {} obj = MockObj(data, extra_data) eng = MockEng() assert fuzzy_match(obj, eng) assert 'matches' in obj.extra_data expected = [{ 'control_number': 4328, 'title': 'title', 'authors': [ { 'full_name': 'Author 1' }, ], 'authors_count': 1 }] result = get_value(obj.extra_data, 'matches.fuzzy') assert expected == result
def test_fuzzy_match_returns_true_if_something_matched_with_arxiv_eprints(mock_match, enable_fuzzy_matcher): schema = load_schema('hep') arxiv_eprints_schema = schema['properties']['arxiv_eprints'] titles_schema = schema['properties']['titles'] matched_record = { 'control_number': 1472986, 'titles': [ { 'title': 'title', }, ], 'arxiv_eprints': [ { 'categories': [ 'hep-ph' ], 'value': '1606.09129' } ], } assert validate(matched_record['titles'], titles_schema) is None assert validate(matched_record['arxiv_eprints'], arxiv_eprints_schema) is None mock_match.return_value = iter([{'_source': matched_record}]) data = {} extra_data = {} obj = MockObj(data, extra_data) eng = MockEng() assert fuzzy_match(obj, eng) assert 'matches' in obj.extra_data expected = [{ 'control_number': 1472986, 'title': 'title', 'arxiv_eprint': '1606.09129', }] result = get_value(obj.extra_data, 'matches.fuzzy') assert expected == result
def test_fuzzy_match_returns_true_if_something_matched_with_4_authors( mock_match, enable_fuzzy_matcher): schema = load_schema('hep') authors_schema = schema['properties']['authors'] titles_schema = schema['properties']['titles'] matched_record = { 'control_number': 4328, 'titles': [ { 'title': 'title', }, ], 'authors': [{ 'full_name': 'Author 1' }, { 'full_name': 'Author, 2' }, { 'full_name': 'Author, 3' }, { 'full_name': 'Author, 4' }], 'authors_count': 4 } assert validate(matched_record['titles'], titles_schema) is None assert validate(matched_record['authors'], authors_schema) is None mock_match.return_value = iter([{'_source': matched_record}]) data = {} extra_data = {} obj = MockObj(data, extra_data) eng = MockEng() assert fuzzy_match(obj, eng) assert 'matches' in obj.extra_data expected = [{ 'control_number': 4328, 'title': 'title', 'authors': [ { 'full_name': 'Author 1' }, { 'full_name': 'Author, 2' }, { 'full_name': 'Author, 3' }, ], 'authors_count': 4 }] result = get_value(obj.extra_data, 'matches.fuzzy') assert expected == result