Exemplos de iunaccent em Python, exemplos de papers.utils.iunaccent em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: name.py Projeto: Lysxia/dissemin

def name_signature(first, last):
    ident = iunaccent(last.strip())
    ident = nn_escaping_chars_re.sub('',ident)
    ident = nn_final_nontext_re.sub('',ident)
    ident = nn_nontext_re.sub('-',ident)
    if len(first):
        ident = iunaccent(first[0])+'-'+ident
    return ident

Exemplo n.º 2

0

Exibir arquivo

def name_signature(first, last):
    ident = iunaccent(last.strip())
    ident = nn_escaping_chars_re.sub('', ident)
    ident = nn_final_nontext_re.sub('', ident)
    ident = nn_nontext_re.sub('-', ident)
    if len(first):
        ident = iunaccent(first[0]) + '-' + ident
    return ident

Exemplo n.º 3

0

Exibir arquivo

def name_similarity(a, b):
    """
    Returns a float: how similar are these two names?
    """

    if not a or not b or len(a) != 2 or len(b) != 2:
        return False
    firstA, lastA = a
    firstB, lastB = b
    firstA = iunaccent(firstA)
    firstB = iunaccent(firstB)
    lastA = iunaccent(lastA)
    lastB = iunaccent(lastB)
    if lastA != lastB:
        return 0.
    partsA, sepsA = split_name_words(firstA)
    partsB, sepsB = split_name_words(firstB)
    parts = list(zip(partsA, partsB))
    if not all(map(match_first_names, parts)):
        # Try to match in reverse
        partsA.reverse()
        partsB.reverse()
        parts = list(zip(partsA, partsB))
        if not all(map(match_first_names, parts)):
            return 0.

    maxlen = max(len(partsA), len(partsB))
    sumscores = 0
    expanded = []
    for i in range(maxlen):
        if i < len(parts):
            sumscores += weight_first_names(parts[i])
            expanded.append((len(partsA[i]) > 1, len(partsB[i]) > 1))
        elif i < len(partsA):
            sumscores -= 0.25*weight_first_name(partsA[i])
            expanded.append((len(partsA[i]) > 1, False))
        else:
            sumscores -= 0.25*weight_first_name(partsB[i])
            expanded.append((False, len(partsB[i]) > 1))

    # Make sure expanded first names of A are included in that of B
    # or that of B and included in that of A
    # This prevents ('Amanda P.','Brown') and ('A. Patrick','Brown')
    # frow matching
    if not (all([wa or not wb for wa, wb in expanded]) or
            all([wb or not wa for wa, wb in expanded])):
        return 0.

    sumscores = max(min(sumscores, 1), 0)
    return sumscores

Exemplo n.º 4

0

Exibir arquivo

def name_similarity(a, b):
    """
    Returns a float: how similar are these two names?
    Examples:
    name_similarity(('Robin', 'Ryder'),('Robin', 'Ryder')) == 0.8
    name_similarity(('Robin', 'Ryder'),('R.', 'Ryder')) == 0.4
    name_similarity(('R.', 'Ryder'),('R.', 'Ryder')) == 0.4
    name_similarity(('Robin J.', 'Ryder'),('R.', 'Ryder')) ==0.3
    name_similarity(('Robin J.', 'Ryder'),('R. J.', 'Ryder')) == 0.8
    name_similarity(('R. J.', 'Ryder'),('J.', 'Ryder')) == 0.3
    name_similarity(('Robin', 'Ryder'),('Robin J.', 'Ryder')) == 0.7
    name_similarity(('W. Timothy','Gowers'), ('Timothy','Gowers')) == 0.7
    name_similarity(('Robin K.','Ryder'), ('Robin J.', 'Ryder')) == 0
    name_similarity(('Claire', 'Mathieu'),('Claire', 'Kenyon-Mathieu') == 0
    """

    if not a or not b:
        return False
    (firstA, lastA) = a
    (firstB, lastB) = b
    firstA = iunaccent(firstA)
    firstB = iunaccent(firstB)
    lastA = iunaccent(lastA)
    lastB = iunaccent(lastB)
    if lastA != lastB:
        return 0.
    #if firstA == firstB:
    #    return 1.
    partsA, sepsA = split_name_words(firstA)
    partsB, sepsB = split_name_words(firstB)
    parts = zip(partsA, partsB)
    if not all(map(match_first_names, parts)):
        # Try to match in reverse
        partsA.reverse()
        partsB.reverse()
        parts = zip(partsA, partsB)
        if not all(map(match_first_names, parts)):
            return 0.
    maxlen = max(len(partsA), len(partsB))
    sumscores = 0
    for i in range(maxlen):
        if i < len(parts):
            sumscores += weight_first_names(parts[i])
        elif i < len(partsA):
            sumscores -= 0.25 * weight_first_name(partsA[i])
        else:
            sumscores -= 0.25 * weight_first_name(partsB[i])
    sumscores = max(min(sumscores, 1), 0)
    return sumscores

Exemplo n.º 5

0

Exibir arquivo

Arquivo: name.py Projeto: jilljenn/dissemin

def name_similarity(a,b):
    """
    Returns a float: how similar are these two names?
    Examples:
    name_similarity(('Robin', 'Ryder'),('Robin', 'Ryder')) == 0.8
    name_similarity(('Robin', 'Ryder'),('R.', 'Ryder')) == 0.4
    name_similarity(('R.', 'Ryder'),('R.', 'Ryder')) == 0.4
    name_similarity(('Robin J.', 'Ryder'),('R.', 'Ryder')) ==0.3
    name_similarity(('Robin J.', 'Ryder'),('R. J.', 'Ryder')) == 0.8
    name_similarity(('R. J.', 'Ryder'),('J.', 'Ryder')) == 0.3
    name_similarity(('Robin', 'Ryder'),('Robin J.', 'Ryder')) == 0.7
    name_similarity(('W. Timothy','Gowers'), ('Timothy','Gowers')) == 0.7
    name_similarity(('Robin K.','Ryder'), ('Robin J.', 'Ryder')) == 0
    name_similarity(('Claire', 'Mathieu'),('Claire', 'Kenyon-Mathieu') == 0
    """

    if not a or not b:
        return False
    (firstA,lastA) = a
    (firstB,lastB) = b
    firstA = iunaccent(firstA)
    firstB = iunaccent(firstB)
    lastA = iunaccent(lastA)
    lastB = iunaccent(lastB)
    if lastA != lastB:
        return 0.
    #if firstA == firstB:
    #    return 1.
    partsA, sepsA = split_name_words(firstA)
    partsB, sepsB = split_name_words(firstB)
    parts = zip(partsA, partsB)
    if not all(map(match_first_names, parts)):
        # Try to match in reverse
        partsA.reverse()
        partsB.reverse()
        parts = zip(partsA, partsB)
        if not all(map(match_first_names, parts)):
            return 0.
    maxlen = max(len(partsA), len(partsB))
    sumscores = 0
    for i in range(maxlen):
        if i < len(parts):
            sumscores += weight_first_names(parts[i])
        elif i < len(partsA):
            sumscores -= 0.25*weight_first_name(partsA[i])
        else:
            sumscores -= 0.25*weight_first_name(partsB[i])
    sumscores = max(min(sumscores, 1), 0)
    return sumscores

Exemplo n.º 6

0

Exibir arquivo

def populate_identifiers(apps, se):
    Institution = apps.get_model('papers', 'Institution')
    for i in Institution.objects.all():
        if i.country and i.name:
            i.identifiers = [i.identifier, i.country + ':' + iunaccent(i.name)]
        else:
            i.identifiers = [i.identifier]
        i.save(update_fields=['identifiers'])

Exemplo n.º 7

0

Exibir arquivo

Arquivo: 0043_institutions_multiple_identifiers.py Projeto: Phyks/dissemin

def populate_identifiers(apps, se):
    Institution = apps.get_model('papers', 'Institution')
    for i in Institution.objects.all():
        if i.country and i.name:
            i.identifiers = [i.identifier, i.country+':'+iunaccent(i.name)]
        else:
            i.identifiers = [i.identifier]
        i.save(update_fields=['identifiers'])

Exemplo n.º 8

0

Exibir arquivo

 def create(cls, first, last):
     """
     Creates an instance of the Name object without saving it.
     Useful for name lookups where we are not sure we want to
     keep the name in the model.
     """
     instance = cls()
     instance.first = sanitize_html(first[:MAX_NAME_LENGTH].strip())
     instance.last = sanitize_html(last[:MAX_NAME_LENGTH].strip())
     instance.full = iunaccent(instance.first+' '+instance.last)
     return instance

Exemplo n.º 9

0

Exibir arquivo

Arquivo: baremodels.py Projeto: Phyks/dissemin

 def create(cls, first, last):
     """
     Creates an instance of the Name object without saving it.
     Useful for name lookups where we are not sure we want to
     keep the name in the model.
     """
     instance = cls()
     instance.first = sanitize_html(first[:MAX_NAME_LENGTH].strip())
     instance.last = sanitize_html(last[:MAX_NAME_LENGTH].strip())
     instance.full = iunaccent(instance.first+' '+instance.last)
     return instance

Exemplo n.º 10

0

Exibir arquivo

def shallower_name_similarity(a, b):
    """
    Same as name_similarity, but accepts differences in the last names.
    This heuristics is more costly but is only used to attribute an ORCID
    affiliation to the right author in papers fetched from ORCID.
    (in the next function)
    """
    if not a or not b or len(a) != 2 or len(b) != 2:
        return False
    firstA, lastA = a
    firstB, lastB = b

    # Matching last names
    lastA = iunaccent(lastA)
    lastB = iunaccent(lastB)
    wordsA, _ = split_name_words(lastA)
    wordsB, _ = split_name_words(lastB)
    wordsA = set(wordsA)
    wordsB = set(wordsB)
    if not wordsA or not wordsB:
        return False
    ratio = float(len(wordsA & wordsB)) / len(wordsA | wordsB)

    partsA, sepsA = split_name_words(firstA)
    partsB, sepsB = split_name_words(firstB)
    partsA = [p[0] for p in partsA]
    partsB = [p[0] for p in partsB]

    parts = zip(partsA, partsB)
    if not all(map(match_first_names, parts)):
        # Try to match in reverse
        partsA.reverse()
        partsB.reverse()
        parts = zip(partsA, partsB)
        if not all(map(match_first_names, parts)):
            return 0.

    maxlen = max(len(partsA), len(partsB))
    return ratio * (len(parts) + 1) / (maxlen + 1)

Exemplo n.º 11

0

Exibir arquivo

Arquivo: name.py Projeto: Phyks/dissemin

def shallower_name_similarity(a, b):
    """
    Same as name_similarity, but accepts differences in the last names.
    This heuristics is more costly but is only used to attribute an ORCID
    affiliation to the right author in papers fetched from ORCID.
    (in the next function)
    """
    if not a or not b or len(a) != 2 or len(b) != 2:
        return False
    firstA, lastA = a
    firstB, lastB = b

    # Matching last names
    lastA = iunaccent(lastA)
    lastB = iunaccent(lastB)
    wordsA, _ = split_name_words(lastA)
    wordsB, _ = split_name_words(lastB)
    wordsA = set(wordsA)
    wordsB = set(wordsB)
    if not wordsA or not wordsB:
        return False
    ratio = float(len(wordsA & wordsB)) / len(wordsA | wordsB)

    partsA, sepsA = split_name_words(firstA)
    partsB, sepsB = split_name_words(firstB)
    partsA = [ p[0] for p in partsA ]
    partsB = [ p[0] for p in partsB ]

    parts = list(zip(partsA, partsB))
    if not all(map(match_first_names, parts)):
        # Try to match in reverse
        partsA.reverse()
        partsB.reverse()
        parts = list(zip(partsA, partsB))
        if not all(map(match_first_names, parts)):
            return 0.

    maxlen = max(len(partsA), len(partsB))
    return ratio*(len(parts)+1)/(maxlen+1)

Exemplo n.º 12

0

Exibir arquivo

def shallower_name_similarity(a, b):
    """
    Same as name_similarity, but accepts differences in the last names.
    This heuristics is more costly but is only used to attribute an ORCID
    affiliation to the right author in papers fetched from ORCID.
    """
    if not a or not b:
        return False
    firstA, lastA = a
    firstB, lastB = b

    # Matching last names
    lastA = iunaccent(lastA)
    lastB = iunaccent(lastB)
    wordsA, sepA = split_name_words(lastA)
    wordsB, sepB = split_name_words(lastB)
    wordsA = set(wordsA)
    wordsB = set(wordsB)
    ratio = float(len(wordsA & wordsB)) / len(wordsA | wordsB)

    partsA, sepsA = split_name_words(firstA)
    partsB, sepsB = split_name_words(firstB)
    partsA = map(lambda x: x[0], partsA)
    partsB = map(lambda x: x[0], partsB)

    parts = zip(partsA, partsB)
    if not all(map(match_first_names, parts)):
        # Try to match in reverse
        partsA.reverse()
        partsB.reverse()
        parts = zip(partsA, partsB)
        if not all(map(match_first_names, parts)):
            return 0.

    maxlen = max(len(partsA), len(partsB))
    if maxlen > 0:
        return ratio * len(parts) / maxlen
    return 0.

Exemplo n.º 13

0

Exibir arquivo

Arquivo: name.py Projeto: jilljenn/dissemin

def shallower_name_similarity(a, b):
    """
    Same as name_similarity, but accepts differences in the last names.
    This heuristics is more costly but is only used to attribute an ORCID
    affiliation to the right author in papers fetched from ORCID.
    """
    if not a or not b:
        return False
    firstA, lastA = a
    firstB, lastB = b

    # Matching last names
    lastA = iunaccent(lastA)
    lastB = iunaccent(lastB)
    wordsA, sepA = split_name_words(lastA)
    wordsB, sepB = split_name_words(lastB)
    wordsA = set(wordsA)
    wordsB = set(wordsB)
    ratio = float(len(wordsA & wordsB)) / len(wordsA | wordsB)

    partsA, sepsA = split_name_words(firstA)
    partsB, sepsB = split_name_words(firstB)
    partsA = map(lambda x: x[0], partsA)
    partsB = map(lambda x: x[0], partsB)

    parts = zip(partsA, partsB)
    if not all(map(match_first_names, parts)):
        # Try to match in reverse
        partsA.reverse()
        partsB.reverse()
        parts = zip(partsA, partsB)
        if not all(map(match_first_names, parts)):
            return 0.

    maxlen = max(len(partsA), len(partsB))
    if maxlen > 0:
        return ratio*len(parts)/maxlen
    return 0.

Exemplo n.º 14

0

Exibir arquivo

 def test_iunaccent(self):
         self.assertEqual(iunaccent('BÉPO forever'), 'bepo forever')

Exemplo n.º 15

0

Exibir arquivo

def name_similarity(a, b):
    """
    Returns a float: how similar are these two names?
    Examples:

    >>> int(10*name_similarity(('Robin', 'Ryder'),('Robin', 'Ryder')))
    8
    >>> int(10*name_similarity(('Robin', 'Ryder'),('R.', 'Ryder')))
    4
    >>> int(10*name_similarity(('R.', 'Ryder'),('R.', 'Ryder')))
    4
    >>> int(10*name_similarity(('Robin J.', 'Ryder'),('R.', 'Ryder')))
    3
    >>> int(10*name_similarity(('Robin J.', 'Ryder'),('R. J.', 'Ryder')))
    8
    >>> int(10*name_similarity(('R. J.', 'Ryder'),('J.', 'Ryder')))
    3
    >>> int(10*name_similarity(('Robin', 'Ryder'),('Robin J.', 'Ryder')))
    7
    >>> int(10*name_similarity(('W. Timothy','Gowers'), ('Timothy','Gowers')))
    7
    >>> int(10*name_similarity(('Robin K.','Ryder'), ('Robin J.', 'Ryder')))
    0
    >>> int(10*name_similarity(('Claire', 'Mathieu'),('Claire', 'Kenyon-Mathieu')))
    0
    >>> int(10*name_similarity(('Amanda P.','Brown'),('Patrick','Brown')))
    0
    """

    if not a or not b:
        return False
    (firstA, lastA) = a
    (firstB, lastB) = b
    firstA = iunaccent(firstA)
    firstB = iunaccent(firstB)
    lastA = iunaccent(lastA)
    lastB = iunaccent(lastB)
    if lastA != lastB:
        return 0.
    partsA, sepsA = split_name_words(firstA)
    partsB, sepsB = split_name_words(firstB)
    parts = zip(partsA, partsB)
    if not all(map(match_first_names, parts)):
        # Try to match in reverse
        partsA.reverse()
        partsB.reverse()
        parts = zip(partsA, partsB)
        if not all(map(match_first_names, parts)):
            return 0.

    maxlen = max(len(partsA), len(partsB))
    sumscores = 0
    expanded = []
    for i in range(maxlen):
        if i < len(parts):
            sumscores += weight_first_names(parts[i])
            expanded.append((len(partsA[i]) > 1, len(partsB[i]) > 1))
        elif i < len(partsA):
            sumscores -= 0.25 * weight_first_name(partsA[i])
            expanded.append((len(partsA[i]) > 1, False))
        else:
            sumscores -= 0.25 * weight_first_name(partsB[i])
            expanded.append((False, len(partsB[i]) > 1))

    # Make sure expanded first names of A are included in that of B
    # or that of B and included in that of A
    # This prevents ('Amanda P.','Brown') and ('A. Patrick','Brown')
    # frow matching
    if not (all([a or not b for a, b in expanded])
            or all([b or not a for a, b in expanded])):
        return 0.

    sumscores = max(min(sumscores, 1), 0)
    return sumscores

Exemplo n.º 16

0

Exibir arquivo

Arquivo: name.py Projeto: Lysxia/dissemin

def name_similarity(a,b):
    """
    Returns a float: how similar are these two names?
    Examples:

    >>> int(10*name_similarity(('Robin', 'Ryder'),('Robin', 'Ryder')))
    8
    >>> int(10*name_similarity(('Robin', 'Ryder'),('R.', 'Ryder')))
    4
    >>> int(10*name_similarity(('R.', 'Ryder'),('R.', 'Ryder')))
    4
    >>> int(10*name_similarity(('Robin J.', 'Ryder'),('R.', 'Ryder')))
    3
    >>> int(10*name_similarity(('Robin J.', 'Ryder'),('R. J.', 'Ryder')))
    8
    >>> int(10*name_similarity(('R. J.', 'Ryder'),('J.', 'Ryder')))
    3
    >>> int(10*name_similarity(('Robin', 'Ryder'),('Robin J.', 'Ryder')))
    7
    >>> int(10*name_similarity(('W. Timothy','Gowers'), ('Timothy','Gowers')))
    7
    >>> int(10*name_similarity(('Robin K.','Ryder'), ('Robin J.', 'Ryder')))
    0
    >>> int(10*name_similarity(('Claire', 'Mathieu'),('Claire', 'Kenyon-Mathieu')))
    0
    >>> int(10*name_similarity(('Amanda P.','Brown'),('Patrick','Brown')))
    0
    """

    if not a or not b:
        return False
    (firstA,lastA) = a
    (firstB,lastB) = b
    firstA = iunaccent(firstA)
    firstB = iunaccent(firstB)
    lastA = iunaccent(lastA)
    lastB = iunaccent(lastB)
    if lastA != lastB:
        return 0.
    partsA, sepsA = split_name_words(firstA)
    partsB, sepsB = split_name_words(firstB)
    parts = zip(partsA, partsB)
    if not all(map(match_first_names, parts)):
        # Try to match in reverse
        partsA.reverse()
        partsB.reverse()
        parts = zip(partsA, partsB)
        if not all(map(match_first_names, parts)):
            return 0.

    maxlen = max(len(partsA), len(partsB))
    sumscores = 0
    expanded = []
    for i in range(maxlen):
        if i < len(parts):
            sumscores += weight_first_names(parts[i])
            expanded.append((len(partsA[i])>1, len(partsB[i])>1))
        elif i < len(partsA):
            sumscores -= 0.25*weight_first_name(partsA[i])
            expanded.append((len(partsA[i])>1, False))
        else:
            sumscores -= 0.25*weight_first_name(partsB[i])
            expanded.append((False, len(partsB[i])>1))

    # Make sure expanded first names of A are included in that of B
    # or that of B and included in that of A
    # This prevents ('Amanda P.','Brown') and ('A. Patrick','Brown')
    # frow matching
    if not (all([a or not b for a,b in expanded]) or
        all([b or not a for a,b in expanded])):
        return 0.

    sumscores = max(min(sumscores, 1), 0)
    return sumscores