示例#1
0
    def _process_crossref_name(self, contributor):
        # Adapted from logic used in `api/citations/utils.py`
        # If the user has a family and given name, use those
        if contributor.family_name and contributor.given_name:
            given = contributor.given_name
            middle = contributor.middle_names
            family = contributor.family_name
            suffix = contributor.suffix
        else:
            names = impute_names(contributor.fullname)
            given = names.get('given')
            middle = names.get('middle')
            family = names.get('family')
            suffix = names.get('suffix')

        given_name = ' '.join([given, middle]).strip()
        given_stripped = remove_control_characters(given_name)
        # For crossref, given_name is not allowed to have numbers or question marks
        given_processed = ''.join(
            [char for char in given_stripped if (not char.isdigit() and char != '?')]
        )
        surname_processed = remove_control_characters(family)

        surname = surname_processed or given_processed or contributor.fullname
        processed_names = {'surname': surname[:CROSSREF_SURNAME_LIMIT].strip()}
        if given_processed and surname_processed:
            processed_names['given_name'] = given_processed[:CROSSREF_GIVEN_NAME_LIMIT].strip()
        if suffix and (surname_processed or given_processed):
            processed_names['suffix'] = suffix[:CROSSREF_SUFFIX_LIMIT].strip()

        return processed_names
示例#2
0
文件: crossref.py 项目: qa4osf/osf.io
    def _process_crossref_name(self, contributor):
        # Adapted from logic used in `api/citations/utils.py`
        # If the user has a family and given name, use those
        if contributor.family_name and contributor.given_name:
            given = contributor.given_name
            middle = contributor.middle_names
            family = contributor.family_name
            suffix = contributor.suffix
        else:
            names = impute_names(contributor.fullname)
            given = names.get('given')
            middle = names.get('middle')
            family = names.get('family')
            suffix = names.get('suffix')

        given_name = ' '.join([given, middle]).strip()
        given_stripped = remove_control_characters(given_name)
        # For crossref, given_name is not allowed to have numbers or question marks
        given_processed = ''.join([
            char for char in given_stripped
            if (not char.isdigit() and char != '?')
        ])
        surname_processed = remove_control_characters(family)

        surname = surname_processed or given_processed or contributor.fullname
        processed_names = {'surname': surname[:CROSSREF_SURNAME_LIMIT].strip()}
        if given_processed and surname_processed:
            processed_names[
                'given_name'] = given_processed[:
                                                CROSSREF_GIVEN_NAME_LIMIT].strip(
                                                )
        if suffix and (surname_processed or given_processed):
            processed_names['suffix'] = suffix[:CROSSREF_SUFFIX_LIMIT].strip()

        return processed_names
示例#3
0
文件: core.py 项目: keyz182/osf.io
 def update_guessed_names(self):
     """Updates the CSL name fields inferred from the the full name.
     """
     parsed = utils.impute_names(self.fullname)
     self.given_name = parsed['given']
     self.middle_names = parsed['middle']
     self.family_name = parsed['family']
     self.suffix = parsed['suffix']
示例#4
0
文件: core.py 项目: cldershem/osf.io
 def update_guessed_names(self):
     """Updates the CSL name fields inferred from the the full name.
     """
     parsed = utils.impute_names(self.fullname)
     self.given_name = parsed["given"]
     self.middle_names = parsed["middle"]
     self.family_name = parsed["family"]
     self.suffix = parsed["suffix"]
示例#5
0
文件: core.py 项目: ShadowsHax/osf.io
 def update_guessed_names(self):
     """Updates the CSL name fields inferred from the the full name.
     """
     parsed = utils.impute_names(self.fullname)
     self.given_name = parsed['given']
     self.middle_names = parsed['middle']
     self.family_name = parsed['family']
     self.suffix = parsed['suffix']
示例#6
0
def impute_names():

    for user in models.User.find():

        parsed = impute_names(user.fullname)
        for field, value in parsed.items():
            if getattr(user, field, None) is None:
                setattr(user, field, value)
        user.save()
示例#7
0
def impute_names():

    for user in models.User.find():

        parsed = impute_names(user.fullname)
        for field, value in parsed.items():
            if getattr(user, field, None) is None:
                setattr(user, field, value)
        user.save()
    def test_process_crossref_name(self, crossref_client):
        contributor = AuthUserFactory()

        # Given name and no family name
        contributor.given_name = 'Hey'
        contributor.family_name = ''
        contributor.save()
        meta = crossref_client._process_crossref_name(contributor)
        imputed_names = impute_names(contributor.fullname)
        assert meta == {'surname': imputed_names['family'], 'given_name': imputed_names['given']}

        # Just one name
        contributor.fullname = 'Ke$ha'
        contributor.given_name = ''
        contributor.family_name = ''
        contributor.save()
        meta = crossref_client._process_crossref_name(contributor)
        assert meta == {'surname': contributor.fullname}

        # Number and ? in given name
        contributor.fullname = 'Scotty2Hotty? Ronald Garland II'
        contributor.given_name = ''
        contributor.family_name = ''
        contributor.save()
        meta = crossref_client._process_crossref_name(contributor)
        assert meta == {'given_name': 'ScottyHotty Ronald', 'surname': 'Garland', 'suffix': 'II'}

        # Long suffix is truncated to 10 characters
        long_suffix = 'PhD MD Esq MPH IV'
        contributor.given_name = 'Mark'
        contributor.family_name = 'Henry'
        contributor.suffix = long_suffix
        contributor.save()
        meta = crossref_client._process_crossref_name(contributor)
        assert meta['suffix'] == long_suffix[:crossref.CROSSREF_SUFFIX_LIMIT]

        # Long given_names and surnames are truncated to limit
        long_given = 'Maaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaark'
        long_surname = 'Henryyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy'
        contributor.given_name = long_given
        contributor.family_name = long_surname
        contributor.save()

        meta = crossref_client._process_crossref_name(contributor)
        assert meta['given_name'] == long_given[:crossref.CROSSREF_GIVEN_NAME_LIMIT]
        assert meta['surname'] == long_surname[:crossref.CROSSREF_SURNAME_LIMIT]

        # Unparsable given or surname just returns fullname as surname
        unparsable_fullname = 'Author (name withheld until double-blind peer review completes and this name is also really long)'
        contributor.given_name = ''
        contributor.family_name = ''
        contributor.fullname = unparsable_fullname
        contributor.save()

        meta = crossref_client._process_crossref_name(contributor)
        assert meta == {'surname': unparsable_fullname[:crossref.CROSSREF_SURNAME_LIMIT]}
    def test_process_crossref_name(self, crossref_client):
        contributor = AuthUserFactory()

        # Given name and no family name
        contributor.given_name = 'Hey'
        contributor.family_name = ''
        contributor.save()
        meta = crossref_client._process_crossref_name(contributor)
        imputed_names = impute_names(contributor.fullname)
        assert meta == {'surname': imputed_names['family'], 'given_name': imputed_names['given']}

        # Just one name
        contributor.fullname = 'Ke$ha'
        contributor.given_name = ''
        contributor.family_name = ''
        contributor.save()
        meta = crossref_client._process_crossref_name(contributor)
        assert meta == {'surname': contributor.fullname}

        # Number and ? in given name
        contributor.fullname = 'Scotty2Hotty? Ronald Garland II'
        contributor.given_name = ''
        contributor.family_name = ''
        contributor.save()
        meta = crossref_client._process_crossref_name(contributor)
        assert meta == {'given_name': 'ScottyHotty Ronald', 'surname': 'Garland', 'suffix': 'II'}

        # Long suffix is truncated to 10 characters
        long_suffix = 'PhD MD Esq MPH IV'
        contributor.given_name = 'Mark'
        contributor.family_name = 'Henry'
        contributor.suffix = long_suffix
        contributor.save()
        meta = crossref_client._process_crossref_name(contributor)
        assert meta['suffix'] == long_suffix[:crossref.CROSSREF_SUFFIX_LIMIT]

        # Long given_names and surnames are truncated to limit
        long_given = 'Maaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaark'
        long_surname = 'Henryyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy'
        contributor.given_name = long_given
        contributor.family_name = long_surname
        contributor.save()

        meta = crossref_client._process_crossref_name(contributor)
        assert meta['given_name'] == long_given[:crossref.CROSSREF_GIVEN_NAME_LIMIT]
        assert meta['surname'] == long_surname[:crossref.CROSSREF_SURNAME_LIMIT]

        # Unparsable given or surname just returns fullname as surname
        unparsable_fullname = 'Author (name withheld until double-blind peer review completes and this name is also really long)'
        contributor.given_name = ''
        contributor.family_name = ''
        contributor.fullname = unparsable_fullname
        contributor.save()

        meta = crossref_client._process_crossref_name(contributor)
        assert meta == {'surname': unparsable_fullname[:crossref.CROSSREF_SURNAME_LIMIT]}
示例#10
0
    def parse(self, csv_file):
        """ Parse and add to csv file.

        :param csv_file: Comma separated
        :return: A list
        """
        result = []
        csv_reader = csv.reader(csv_file)

        for index, row in enumerate(csv_reader):
            if index == 0:
                row.extend([
                    'OSF ID', 'Logs Since Workshop',
                    'Nodes Created Since Workshop', 'Last Log Date'
                ])
                result.append(row)
                continue

            email = row[5]
            user_by_email = self.find_user_by_email(email)

            if not user_by_email:
                full_name = row[4]
                try:
                    family_name = impute_names(full_name)['family']
                except UnicodeDecodeError:
                    row.extend(['Unable to parse name'])
                    result.append(row)
                    continue

                user_by_name = self.find_user_by_full_name(
                    full_name) or self.find_user_by_family_name(family_name)
                if not user_by_name:
                    row.extend(['', 0, 0, ''])
                    result.append(row)
                    continue
                else:
                    user = user_by_name

            else:
                user = user_by_email

            workshop_date = datetime.strptime(row[1], '%m/%d/%y')
            nodes = self.get_user_nodes_since_workshop(user, workshop_date)
            user_logs = self.get_user_logs_since_workshop(user, workshop_date)

            try:
                last_log_date = user_logs[-1].date.strftime('%m/%d/%y')
            except IndexError:
                last_log_date = ''

            row.extend([user.pk, len(user_logs), len(nodes), last_log_date])
            result.append(row)

        return result
示例#11
0
文件: views.py 项目: baylee-d/osf.io
    def parse(self, csv_file):
        """ Parse and add to csv file.

        :param csv_file: Comma separated
        :return: A list
        """
        result = []
        csv_reader = csv.reader(csv_file)

        for index, row in enumerate(csv_reader):
            if index == 0:
                row.extend([
                    'OSF ID', 'Logs Since Workshop', 'Nodes Created Since Workshop', 'Last Log Date'
                ])
                result.append(row)
                continue

            email = row[5]
            user_by_email = self.find_user_by_email(email)

            if not user_by_email:
                full_name = row[4]
                try:
                    family_name = impute_names(full_name)['family']
                except UnicodeDecodeError:
                    row.extend(['Unable to parse name'])
                    result.append(row)
                    continue

                user_by_name = self.find_user_by_full_name(full_name) or self.find_user_by_family_name(family_name)
                if not user_by_name:
                    row.extend(['', 0, 0, ''])
                    result.append(row)
                    continue
                else:
                    user = user_by_name

            else:
                user = user_by_email

            workshop_date = datetime.strptime(row[1], '%m/%d/%y')
            nodes = self.get_user_nodes_since_workshop(user, workshop_date)
            user_logs = self.get_user_logs_since_workshop(user, workshop_date)

            try:
                last_log_date = user_logs[-1].date.strftime('%m/%d/%y')
            except IndexError:
                last_log_date = ''

            row.extend([
                user.pk, len(user_logs), len(nodes), last_log_date
            ])
            result.append(row)

        return result
示例#12
0
def create_fake_user():
    email = fake.email()
    name = fake.name()
    parsed = utils.impute_names(name)
    user = UserFactory(username=email, fullname=name,
                       is_registered=True, is_claimed=True,
                       date_registered=fake.date_time(),
                       emails=[email],
                       **parsed
                   )
    user.set_password('faker123')
    user.save()
    logger.info('Created user: {0} <{1}>'.format(user.fullname, user.username))
    return user
示例#13
0
def create_fake_user():
    email = fake.email()
    name = fake.name()
    parsed = impute_names(name)
    user = UserFactory(username=email,
                       fullname=name,
                       is_registered=True,
                       is_claimed=True,
                       date_registered=fake.date_time(),
                       emails=[email],
                       **parsed)
    user.set_password('faker123')
    user.save()
    return user
示例#14
0
def create_fake_user():
    email = fake.email()
    name = fake.name()
    parsed = impute_names(name)
    user = UserFactory(username=email,
                       fullname=name,
                       is_registered=True,
                       is_claimed=True,
                       verification_key=security.random_string(15),
                       date_registered=fake.date_time(),
                       emails=[email],
                       **parsed)
    user.set_password('faker123')
    user.save()
    return user
示例#15
0
def create_fake_user():
    email = fake.email()
    name = fake.name()
    parsed = impute_names(name)
    user = UserFactory(
        username=email,
        fullname=name,
        is_registered=True,
        is_claimed=True,
        date_registered=fake.date_time(),
        emails=[email],
        **parsed
    )
    user.set_password('faker123')
    user.save()
    return user
示例#16
0
def create_fake_user():
    email = fake.email()
    name = fake.name()
    parsed = impute_names(name)
    user = UserFactory(
        username=email,
        fullname=name,
        is_registered=True,
        is_claimed=True,
        verification_key=security.random_string(15),
        date_registered=fake.date_time(),
        emails=[email],
        **parsed
    )
    user.set_password('faker123')
    user.save()
    return user
def get_users_from_csv(filename):
    rows = set()
    with open(filename, 'rU') as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            username = row['Email Address']
            full_name = row['Name']
            family_name = impute_names(full_name)['family']
            parsed_date = row['Workshop_Date'].split("/")
            day, month, year = int(parsed_date[0]), int(parsed_date[1]), int('20'+parsed_date[2])
            date = datetime.datetime(year, month, day)

            found_user = find_user_by_email(username) or find_user_by_fullname(full_name) or find_user_by_lastname(family_name)
            if found_user:
                log_count = count_user_logs(found_user, Q('date', 'gte', date))
                node_count = count_user_nodes(found_user, Q('date_created', 'gte', date))
                last_log = user_last_log(found_user)
                rows.add((date, found_user.fullname, found_user.username, found_user._id, log_count, node_count, last_log))
    return rows
示例#18
0
def get_users_from_csv(filename):
    rows = set()
    with open(filename, 'rU') as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            email = row['Email Address']
            full_name = row['Name'].decode('utf-8', 'ignore')
            names = impute_names(full_name)
            date = parse_date(row['Workshop_Date'])

            found_user = find_user_by_email(email)
            if not found_user and full_name:
                found_user = find_user_by_fullname(full_name) or find_user_by_names(names)

            if found_user:
                log_count = count_user_logs(found_user, Q('date', 'gte', date))
                node_count = count_user_nodes(found_user, Q('date_created', 'gte', date))
                last_log = user_last_log(found_user)
                rows.add((date, found_user.fullname, found_user.username, found_user._id, log_count, node_count, last_log))
    return rows
示例#19
0
def email_name(user):

    logging.debug('Emailing user {0}'.format(user.fullname))

    names = {'fullname': user.fullname}
    names.update(impute_names(user.fullname))

    message=email_template.format(**names).encode('utf-8')

    success = send_email(
        from_addr='*****@*****.**',
        to_addr=user.username,
        subject='Open Science Framework: Verify your citation information',
        message=message,
        mimetype='plain',
    )

    if success:
        logging.debug('Emailing user {0}: Success'.format(user.fullname))
    else:
        logging.debug('Emailing user {0}: Failure'.format(user.fullname))
示例#20
0
文件: utils.py 项目: erinspace/osf.io
def process_name(node, user):
    # If the user has a family and given name, use those
    if user.family_name and user.given_name:
        return {
            'family_name': user.family_name,
            'suffix': user.suffix,
            'given_name': user.given_name,
            'middle_names': user.middle_names,
        }
    elif user.is_registered or user.is_disabled:
        name = user.fullname
    else:
        name = user.get_unclaimed_record(node._id)['name']

    # If the user doesn't autofill his family and given name
    parsed = utils.impute_names(name)
    return {
        'family_name': parsed['family'],
        'suffix': parsed['suffix'],
        'given_name': parsed['given'],
        'middle_names': parsed['middle']
    }
示例#21
0
def process_name(node, user):
    # If the user has a family and given name, use those
    if user.family_name and user.given_name:
        return {
            'family_name': user.family_name,
            'suffix': user.suffix,
            'given_name': user.given_name,
            'middle_names': user.middle_names,
        }
    elif user.is_registered or user.is_disabled:
        name = user.fullname
    else:
        name = user.get_unclaimed_record(node._id)['name']

    # If the user doesn't autofill his family and given name
    parsed = utils.impute_names(name)
    return {
        'family_name': parsed['family'],
        'suffix': parsed['suffix'],
        'given_name': parsed['given'],
        'middle_names': parsed['middle']
    }
示例#22
0
def get_users_from_csv(filename):
    rows = set()
    with open(filename, 'rU') as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            email = row['Email Address']
            full_name = row['Name'].decode('utf-8', 'ignore')
            names = impute_names(full_name)
            date = parse_date(row['Workshop_Date'])

            found_user = find_user_by_email(email)
            if not found_user and full_name:
                found_user = find_user_by_fullname(
                    full_name) or find_user_by_names(names)

            if found_user:
                log_count = count_user_logs(found_user, Q('date', 'gte', date))
                node_count = count_user_nodes(found_user,
                                              Q('date_created', 'gte', date))
                last_log = user_last_log(found_user)
                rows.add((date, found_user.fullname, found_user.username,
                          found_user._id, log_count, node_count, last_log))
    return rows
示例#23
0
def get_users_from_csv(filename):
    rows = set()
    with open(filename, 'rU') as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            username = row['Email Address']
            full_name = row['Name']
            family_name = impute_names(full_name)['family']
            parsed_date = row['Workshop_Date'].split("/")
            day, month, year = int(parsed_date[0]), int(
                parsed_date[1]), int('20' + parsed_date[2])
            date = datetime.datetime(year, month, day)

            found_user = find_user_by_email(username) or find_user_by_fullname(
                full_name) or find_user_by_lastname(family_name)
            if found_user:
                log_count = count_user_logs(found_user, Q('date', 'gte', date))
                node_count = count_user_nodes(found_user,
                                              Q('date_created', 'gte', date))
                last_log = user_last_log(found_user)
                rows.add((date, found_user.fullname, found_user.username,
                          found_user._id, log_count, node_count, last_log))
    return rows
示例#24
0
def impute_names(**kwargs):
    name = request.args.get('name', '')
    return auth_utils.impute_names(name)
示例#25
0
"""
Impute name parts for all existing users.
"""

from framework.auth.utils import impute_names

from website.app import init_app
from website import models

app = init_app('website.settings', set_backends=True, routes=True)

def impute_names():

    for user in models.User.find():

        parsed = impute_names(user.fullname)
        for field, value in parsed.items():
            if getattr(user, field, None) is None:
                setattr(user, field, value)
        user.save()

if __name__ == '__main__':
    impute_names()
示例#26
0
def impute_names(**kwargs):
    name = request.args.get("name", "")
    return auth_utils.impute_names(name)
示例#27
0
def impute_names(**kwargs):
    name = request.args.get('name', '')
    return auth_utils.impute_names(name)
示例#28
0
"""
Impute name parts for all existing users.
"""

from framework.auth.utils import impute_names

from website.app import init_app
from website import models

app = init_app('website.settings', set_backends=True, routes=True)


def impute_names():

    for user in models.User.find():

        parsed = impute_names(user.fullname)
        for field, value in parsed.items():
            if getattr(user, field, None) is None:
                setattr(user, field, value)
        user.save()


if __name__ == '__main__':
    impute_names()