示例#1
0
def populate_authors_name_variations(record):
    """Generate name variations for an Author record."""
    author_name = get_value(record, 'name.value')

    if author_name:
        name_variations = generate_name_variations(author_name)
        record['name_variations'] = name_variations
示例#2
0
def populate_authors_name_variations(record):
    """Generate name variations for an Author record."""
    author_name = get_value(record, 'name.value')

    if author_name:
        name_variations = generate_name_variations(author_name)
        record['name_variations'] = name_variations
def test_driver_with_simple_query():
    author_name = 'Ellis, John'
    name_variations = generate_name_variations(author_name)

    query_str = 'author: ' + author_name
    expected_es_query = {
        "bool": {
            "filter": {
                "bool": {
                    "should": [{
                        "term": {
                            "authors.name_variations": name_variation
                        }
                    } for name_variation in name_variations]
                }
            },
            "must": {
                "match": {
                    "authors.full_name": "Ellis, John"
                }
            }
        }
    }

    es_query = parse_query(query_str)

    assert es_query == expected_es_query
示例#4
0
def test_generate_name_variations_with_two_lastnames():
    name = u'Caro Estevez, David'
    expected = {
        # Lastnames only
        u'caro',
        u'caro estevez',
        # Lastnames first and then non lastnames
        u'caro estevez d',
        u'caro estevez david',
        u'caro estevez, d',
        u'caro estevez, david',
        u'caro d',
        u'caro, d',
        u'caro david',
        u'caro, david',
        # Non lastnames first and then lastnames
        u'd caro',
        u'd, caro',
        u'd caro estevez',
        u'd, caro estevez',
        u'david caro',
        u'david, caro',
        u'david caro estevez',
        u'david, caro estevez',
    }

    result = generate_name_variations(name)

    assert set(result) == expected
def test_elastic_search_visitor_not_op():
    author_name = 'Ellis, John'
    name_variations = generate_name_variations(author_name)

    query_str = '-author ' + author_name
    expected_es_query = \
        {
            "bool": {
                "must_not": [{
                    "bool": {
                        "filter": {
                            "bool": {
                                "should": [
                                    {"term": {"authors.name_variations": name_variation}}
                                    for name_variation
                                    in name_variations
                                ]
                            }
                        },
                        "must": {
                            "match": {
                                "authors.full_name": "Ellis, John"
                            }
                        }
                    }
                }]
            }
        }

    generated_es_query = _parse_query(query_str)
    assert generated_es_query == expected_es_query
示例#6
0
    def get_name_variations_for_author(self, author):
        """Generate name variations for provided author."""
        full_name = author.get("full_name")
        if full_name:
            name_variations = generate_name_variations(full_name)

        return name_variations
示例#7
0
def test_generate_name_variations_with_three_lastnames_dashed_ignores_the_dash(
):
    name = u'Caro-Estévez Martínez, David'
    expected = {
        # Lastnames only
        u'caro',
        u'caro estevez martinez',
        # Lastnames first and then non lastnames
        u'caro estevez martinez d',
        u'caro estevez martinez david',
        u'caro estevez martinez, d',
        u'caro estevez martinez, david',
        u'caro d',
        u'caro, d',
        u'caro david',
        u'caro, david',
        # Non lastnames first and then lastnames
        u'd caro',
        u'd, caro',
        u'd caro estevez martinez',
        u'd, caro estevez martinez',
        u'david caro',
        u'david, caro',
        u'david caro estevez martinez',
        u'david, caro estevez martinez'
    }

    result = generate_name_variations(name)

    assert set(result) == expected
    def _generate_author_query(self, author_name):
        """Generates a match and a filter query handling specifically authors.

        Notes:
            The match query is generic enough to return many results. Then, using the filter clause we truncate these
            so that we imitate legacy's behaviour on return more "exact" results. E.g. Searching for `Smith, John`
            shouldn't return papers of 'Smith, Bob'.
        """
        name_variations = generate_name_variations(author_name)

        return {
            "bool": {
                "filter": {
                    "bool": {
                        "should": [{
                            "term": {
                                ElasticSearchVisitor.AUTHORS_NAME_VARIATIONS_FIELD:
                                name_variation
                            }
                        } for name_variation in name_variations]
                    }
                },
                "must": {
                    "match": {
                        ElasticSearchVisitor.KEYWORD_TO_ES_FIELDNAME['author']:
                        author_name
                    }
                }
            }
        }
示例#9
0
def test_generate_name_variations_with_three_lastnames_dashed_ignores_the_dash(
):
    name = u'Caro-Estévez Martínez, David'
    expected = {
        # Lastnames only
        'Caro',
        'Caro Estevez Martinez',
        # Lastnames first and then non lastnames
        u'Caro Estevez Martinez D',
        u'Caro Estevez Martinez David',
        u'Caro Estevez Martinez, D',
        u'Caro Estevez Martinez, David',
        u'Caro D',
        u'Caro, D',
        u'Caro David',
        u'Caro, David',
        # Non lastnames first and then lastnames
        u'D Caro',
        u'D, Caro',
        u'D Caro Estevez Martinez',
        u'D, Caro Estevez Martinez',
        u'David Caro',
        u'David, Caro',
        u'David Caro Estevez Martinez',
        u'David, Caro Estevez Martinez',
    }

    result = generate_name_variations(name)

    assert set(result) == expected
示例#10
0
def test_generate_name_variations_with_more_than_two_non_lastnames_does_not_add_extra_spaces(
):
    name = 'Ellis, John Richard Philip'

    result = generate_name_variations(name)

    assert 'ellis, john  philip' not in set(result)
示例#11
0
def test_generate_name_variations_with_two_lastnames():
    name = u'Caro Estevez, David'
    expected = {
        # Lastnames only
        'Caro',
        'Caro Estevez',
        # Lastnames first and then non lastnames
        u'Caro Estevez D',
        u'Caro Estevez David',
        u'Caro Estevez, D',
        u'Caro Estevez, David',
        u'Caro D',
        u'Caro, D',
        u'Caro David',
        u'Caro, David',
        # Non lastnames first and then lastnames
        u'D Caro',
        u'D, Caro',
        u'D Caro Estevez',
        u'D, Caro Estevez',
        u'David Caro',
        u'David, Caro',
        u'David Caro Estevez',
        u'David, Caro Estevez',
    }

    result = generate_name_variations(name)

    assert set(result) == expected
示例#12
0
def populate_name_variations(sender, json, *args, **kwargs):
    """Generate name variations for each signature of a Literature record."""
    if 'hep.json' not in json.get('$schema'):
        return

    authors = json.get('authors', [])

    for author in authors:
        full_name = author.get('full_name')
        if full_name:
            bais = [
                el['value'] for el in author.get('ids', [])
                if el['schema'] == 'INSPIRE BAI'
            ]
            name_variations = generate_name_variations(full_name)

            author.update({'name_variations': name_variations})
            author.update({
                'name_suggest': {
                    'input': name_variations,
                    'output': full_name,
                    'payload': {
                        'bai': bais[0] if bais else None
                    }
                }
            })
示例#13
0
def test_generate_name_variations_with_one_name_does_capitalization():
    name = 'jimmy'
    expected = {
        'Jimmy',
    }

    result = generate_name_variations(name)

    assert set(result) == expected
示例#14
0
def test_generate_name_variations_with_only_one_name():
    name = 'Jimmy'
    expected = {
        u'jimmy',
    }

    result = generate_name_variations(name)

    assert set(result) == expected
示例#15
0
def populate_authors_name_variations(sender, json, *args, **kwargs):
    """Generate name variations for an Author record."""
    if not is_author(json):
        return

    author_name = get_value(json, 'name.value')

    if author_name:
        name_variations = generate_name_variations(author_name)
        json.update({'name_variations': name_variations})
示例#16
0
def populate_name_variations(record):
    """Generate name variations for each signature of a Literature record."""
    authors = record.get('authors', [])

    for author in authors:
        full_name = author.get('full_name')
        if full_name:
            name_variations = generate_name_variations(full_name)

            author.update({'name_variations': name_variations})
            author.update({'name_suggest': {
                'input': [variation for variation in name_variations if variation],
            }})
示例#17
0
def populate_name_variations(record):
    """Generate name variations for each signature of a Literature record."""
    authors = record.get('authors', [])

    for author in authors:
        full_name = author.get('full_name')
        if full_name:
            name_variations = generate_name_variations(full_name)

            author.update({'name_variations': name_variations})
            author.update({'name_suggest': {
                'input': [variation for variation in name_variations if variation],
            }})
示例#18
0
def test_generate_name_variations_with_many_names_defers_generating_variations():
    import logging
    logger = logging.getLogger('inspire_utils.name')
    with patch.object(logger, 'error') as mock_error:
        many_names_as_one_author = 'Tseng, Farrukh Azfar Todd Huffman Thilo Pauly'

        result = generate_name_variations(many_names_as_one_author)

        assert result == [many_names_as_one_author]

        args, _ = mock_error.call_args
        assert args[0].startswith(
            'Skipping name variations generation - too many names')
示例#19
0
def test_generate_name_variations_with_firstname_as_initial():
    name = 'Smith, J'
    expected = {
        # Lastname only
        u'smith',
        # Lastnames first and then non lastnames
        u'smith j',
        u'smith, j',
        # Non lastnames first and then lastnames
        u'j smith',
        u'j, smith',
    }

    result = generate_name_variations(name)

    assert set(result) == expected
def test_populate_authors_name_variations():
    schema = load_schema('authors')

    record = {
        '$schema': 'http://localhost:5000/records/schemas/authors.json',
        'name': {'value': 'Silk, James Brian'},
        '_collections': ['Authors'],
    }
    assert validate(record, schema) is None

    populate_authors_name_variations(None, record)

    expected = generate_name_variations(record['name'].get('value'))
    result = record['name_variations']

    assert expected == result
def test_populate_authors_name_variations():
    schema = load_schema('authors')

    record = {
        '$schema': 'http://localhost:5000/records/schemas/authors.json',
        'name': {'value': 'Silk, James Brian'},
        '_collections': ['Authors'],
    }
    record = InspireRecord(record, model=RecordMetadata)
    assert validate(record, schema) is None

    populate_authors_name_variations(record)

    expected = generate_name_variations(record['name'].get('value'))
    result = record['name_variations']

    assert expected == result
示例#22
0
def test_generate_name_variations_with_short_lastname_and_initial():
    # Should not output something like `o y` or any similar variation.
    name = 'Oz, Y'
    expected = {
        # Lastname only
        u'oz',
        # Lastnames first and then non lastnames
        u'oz y',
        u'oz, y',
        # Non lastnames first and then lastnames
        u'y oz',
        u'y, oz',
    }

    result = generate_name_variations(name)

    assert len(result) == len(expected)

    assert set(result) == expected
示例#23
0
def populate_name_variations(sender, json, *args, **kwargs):
    """Generate name variations for each signature of a Literature record."""
    if not is_hep(json):
        return

    authors = json.get('authors', [])

    for author in authors:
        full_name = author.get('full_name')
        if full_name:
            name_variations = generate_name_variations(full_name)

            author.update({'name_variations': name_variations})
            author.update({
                'name_suggest': {
                    'input':
                    [variation for variation in name_variations if variation],
                }
            })
示例#24
0
def test_generate_name_variations_works_with_two_consecutive_commas():
    name = 'Perelstein,, Maxim'
    expected = {
        # Lastname only
        u'perelstein',
        # Lastnames first and then non lastnames
        u'perelstein m',
        u'perelstein, m',
        u'perelstein maxim',
        u'perelstein, maxim',
        # Non lastnames first and then lastnames
        u'maxim perelstein',
        u'maxim, perelstein',
        u'm perelstein',
        u'm, perelstein',
    }

    result = generate_name_variations(name)

    assert set(result) == expected
示例#25
0
def test_generate_name_variations_capitalizes_first_letters():
    name = 'mele, salvatore'
    expected = {
        # Lastname only
        'Mele',
        # Lastnames first and then non lastnames
        'Mele S',
        'Mele, S',
        'Mele Salvatore',
        'Mele, Salvatore',
        # Non lastnames first and then lastnames
        'Salvatore Mele',
        'Salvatore, Mele',
        'S Mele',
        'S, Mele',
    }

    result = generate_name_variations(name)

    assert set(result) == expected
示例#26
0
def test_generate_name_variations_capitalizes_first_letters():
    name = 'mele, salvatore'
    expected = {
        # Lastname only
        u'mele',
        # Lastnames first and then non lastnames
        u'mele s',
        u'mele, s',
        u'mele salvatore',
        u'mele, salvatore',
        # Non lastnames first and then lastnames
        u'salvatore mele',
        u'salvatore, mele',
        u's mele',
        u's, mele',
    }

    result = generate_name_variations(name)

    assert set(result) == expected
示例#27
0
def test_generate_name_variations_works_with_two_consecutive_commas():
    name = 'Perelstein,, Maxim'
    expected = {
        # Lastname only
        'Perelstein',
        # Lastnames first and then non lastnames
        'Perelstein M',
        'Perelstein, M',
        'Perelstein Maxim',
        'Perelstein, Maxim',
        # Non lastnames first and then lastnames
        'Maxim Perelstein',
        'Maxim, Perelstein',
        'M Perelstein',
        'M, Perelstein',
    }

    result = generate_name_variations(name)

    assert set(result) == expected
示例#28
0
def test_generate_name_variations_with_two_non_lastnames():
    name = 'Ellis, John Richard'
    expected_name_variations = {
        'Ellis',
        'Ellis J',
        'Ellis J R',
        'Ellis J Richard',
        'Ellis John',
        'Ellis John R',
        'Ellis John Richard',
        'Ellis R',
        'Ellis Richard',
        'Ellis, J',
        'Ellis, J R',
        'Ellis, J Richard',
        'Ellis, John',
        'Ellis, John R',
        'Ellis, John Richard',
        'Ellis, R',
        'Ellis, Richard',
        'J Ellis',
        'J R Ellis',
        'J Richard Ellis',
        'John Ellis',
        'John R Ellis',
        'John Richard Ellis',
        'R Ellis',
        'Richard Ellis',
        'J, Ellis',
        'J R, Ellis',
        'J Richard, Ellis',
        'John, Ellis',
        'John R, Ellis',
        'John Richard, Ellis',
        'R, Ellis',
        'Richard, Ellis',
    }

    result = generate_name_variations(name)

    assert set(result) == expected_name_variations
示例#29
0
def test_generate_name_variations_with_two_non_lastnames():
    name = 'Ellis, John Richard'
    expected_name_variations = {
        'ellis',
        'ellis j',
        'ellis j r',
        'ellis j richard',
        'ellis john',
        'ellis john r',
        'ellis john richard',
        'ellis r',
        'ellis richard',
        'ellis, j',
        'ellis, j r',
        'ellis, j richard',
        'ellis, john',
        'ellis, john r',
        'ellis, john richard',
        'ellis, r',
        'ellis, richard',
        'j ellis',
        'j r ellis',
        'j richard ellis',
        'john ellis',
        'john r ellis',
        'john richard ellis',
        'r ellis',
        'richard ellis',
        'j, ellis',
        'j r, ellis',
        'j richard, ellis',
        'john, ellis',
        'john r, ellis',
        'john richard, ellis',
        'r, ellis',
        'richard, ellis',
    }

    result = generate_name_variations(name)

    assert set(result) == expected_name_variations
示例#30
0
def populate_name_variations(sender, json, *args, **kwargs):
    """Generate name variations for each signature of a Literature record."""
    if not is_hep(json):
        return

    authors = json.get('authors', [])

    for author in authors:
        full_name = author.get('full_name')
        if full_name:
            bais = [
                el['value'] for el in author.get('ids', [])
                if el['schema'] == 'INSPIRE BAI'
            ]
            name_variations = generate_name_variations(full_name)

            author.update({'name_variations': name_variations})
            author.update({'name_suggest': {
                'input': name_variations,
                'output': full_name,
                'payload': {'bai': bais[0] if bais else None}
            }})
示例#31
0
 def generate_name_variations(self, full_name):
     name_variations = generate_name_variations(full_name)
     return [variation for variation in name_variations if variation]