Пример #1
0
def normalize_person(cls, node, graph):
    name = max(strip_whitespace(' '.join(node.attrs[x]
                                         for x in NAME_PARTS.values()
                                         if node.attrs.get(x))),
               strip_whitespace(node.attrs.get('name', '')),
               '',
               key=len)

    if NULL_RE.match(name):
        logger.debug('Discarding unnamed agent "%s"',
                     node.attrs.get('name', ''))
        return graph.remove(node)

    human = nameparser.HumanName(name)
    parts = {
        v: strip_whitespace(human[k]).title()
        for k, v in NAME_PARTS.items() if strip_whitespace(human[k])
    }

    node.attrs = {
        'name': ' '.join(parts[k] for k in NAME_PARTS.values() if k in parts),
        **parts
    }

    if node.attrs.get('location'):
        node.attrs['location'] = strip_whitespace(node.attrs['location'])
Пример #2
0
def get_first_name(legislator):
    given = legislator.get('given_name')
    if given:
        return given

    parsed = nameparser.HumanName(legislator['name'])
    return parsed.first
Пример #3
0
 def update_fullname(self):
     human_name = nameparser.HumanName('')
     for attr in name_parts:
         value = getattr(self, attr) or ''
         setattr(human_name, attr, value)
     self._full = unicode(human_name)
     self._lfull = self._full.lower()
Пример #4
0
def get_user_from_headers(message):
    try:
        humanName = None
        emailForward = None
        uid = None
        if 'From' in message:
            (x, y) = email.utils.parseaddr(message.get('From'))
            if x:
                humanName = nameparser.HumanName(x)
            if y:
                emailForward = y
                if y.endswith('@debian.org'):
                    uid = y.split('@')[0]
        if emailForward:
            result = DebianUser.objects.filter(emailForward=emailForward)
            if len(result) == 1:
                return result[0]
        if humanName:
            result = DebianUser.objects.filter(cn=humanName.first,
                                               sn=humanName.last)
            if len(result) == 1:
                return result[0]
        if uid:
            result = DebianUser.objects.filter(uid=uid)
            if len(result) == 1:
                return result[0]
    except:
        pass
    return None
def map_name(oneline_name, name_object):
    name = nameparser.HumanName(str(oneline_name))
    name_object.first = name['first']
    if name['middle']:
        name_object.middle = name['middle']
    name_object.last = name['last']
    if name['suffix']:
        name_object.suffix = name['suffix']
Пример #6
0
def get_creators(names):
    res = []
    for name in names:
        name = nameparser.HumanName(name)
        first = name.first
        if name.middle:
            first += ' ' + name.middle
        res.append('{}, {}'.format(name.last, first))
    return res
Пример #7
0
def make_person_name(person):
    name = nameparser.HumanName()
    name.title = person["title"]
    name.first = person["first name"]
    name.middle = person["middle name"]
    name.last = person["last name"]
    name.suffix = person["suffix"]

    return unicode(name)
Пример #8
0
 def _parse(self, *args, **kwargs):
     if not self.attrs.pop('parse') or self.type != 'person':
         return
     if not self.attrs.get('name'):
         self.attrs['name'] = ' '.join(self.attrs[k] for k in ['given_name', 'additional_name', 'family_name', 'suffix'] if self.attrs.get(k))
     else:
         human = nameparser.HumanName(self.attrs['name'])
         for hk, sk in [('first', 'given_name'), ('middle', 'additional_name'), ('last', 'family_name'), ('suffix', 'suffix')]:
             if human[hk]:
                 self.attrs[sk] = human[hk]
def parse_name(name):
    if not isinstance(name, str):
        return name
    parsed_name = nameparser.HumanName(name)
    return Person(
        first=parsed_name.first if parsed_name.first else None,
        middle=parsed_name.middle if parsed_name.middle else None,
        last=parsed_name.last if parsed_name.last else None,
        raw=name
    )
Пример #10
0
 def from_fullname(cls, session, name, email=None):
     parsed_name = nameparser.HumanName(name)
     return cls.as_unique(
         session,
         fname=parsed_name.first.strip('.'),
         lname=parsed_name.last.strip('.'),
         mname=parsed_name.middle.strip('.'),
         title=parsed_name.title.strip('.'),
         suffix=parsed_name.suffix.strip('.'),
         nickname=parsed_name.nickname.strip('.'),
         email=email,
     )
Пример #11
0
    def __init__(self, reference_text):
        self.gender_options = [
            'male', 'mostly_male', 'andy', 'mostly_female', "female", "unknown"
        ]
        self.gender_results = {key: 0 for key in self.gender_options}
        self.race_options = [
            'pctwhite', 'pctblack', 'pctapi', 'pctaian', 'pct2prace',
            'pcthispanic', 'race_unknown'
        ]
        self.ethnicity_results = {key: 0 for key in self.race_options}
        self.raw_results = {}

        pickle_path = pathlib.Path(
            __file__).parent / 'data' / 'ethnicity_lookup.p'
        csv_path = pathlib.Path(
            __file__).parent / 'data' / 'Names_2010Census.csv'

        # Load data
        if os.path.isfile(pickle_path):
            self.ethnicity_lookup = pickle.load(open(pickle_path, 'rb'))
        else:
            self.ethnicity_lookup = {}
            with open(csv_path) as csv_file:
                reader = csv.DictReader(csv_file)
                for row in reader:
                    self.ethnicity_lookup[row['name']] = {}
                    for race in self.race_options[:-1]:
                        try:
                            value = float(row[race])
                        except ValueError:
                            value = 0
                        self.ethnicity_lookup[row['name']][race] = value
            pickle.dump(self.ethnicity_lookup, open(pickle_path, 'wb'))

        # Parse names from input
        self.reference_text = reference_text
        self.references = bibtexparser.loads(reference_text)
        logging.info(self.reference_text)
        logging.info(self.references.entries)
        self.first_names = []
        self.last_names = []
        for paper in self.references.entries:
            if "author" in paper:
                authors = paper["author"].split(' and ')
                for person in authors:
                    name = nameparser.HumanName(person)
                    self.first_names.append(name.first)
                    self.last_names.append(name.last)
        self.raw_results['first_name'] = self.first_names
        self.raw_results['last_name'] = self.last_names
Пример #12
0
def build(gdb):

    if gdb.labels:
        print('Labels, Nodes, and Relationships already loaded')
        return

    lblperson = gdb.labels.create('Person')
    gdbnames = dict(
        map(
            lambda x:
            (x, lblperson.create(name=x,
                                 last_name=nameparser.HumanName(x).last)),
            generate_data.gen_names()))

    colors = generate_data.gen_colors()
    lblevent = gdb.labels.create('Event')
    gdbevent_types = dict(
        map(
            lambda x: (x,
                       lblevent.create(
                           name=x,
                           colors=random.sample(colors, random.randint(1, 3)),
                           user=json.dumps({
                               'user_name': 'jhoweyusername',
                               'password': '******'
                           }))), generate_data.gen_event_types()))
    lblfoods = gdb.labels.create('Food')
    gdbfoods = dict(
        map(lambda x: (x, lblfoods.create(name=x)), generate_data.gen_foods()))

    with open('events.json') as eventsfile:
        events_all = json.load(eventsfile)

        for event_type, events in events_all.items():
            for event in events:

                person = gdbnames[event['name']]
                cur_event = gdbevent_types[
                    event_type]  #Avoid renaming to event as its loop var :)
                food = gdbfoods[event['food']]

                person.relationships.create('Attends',
                                            cur_event,
                                            confirmed=event['confirmed'],
                                            brought=event['food'])
                person.relationships.create('Brings',
                                            food,
                                            signup_date=event['signup_date'],
                                            attended=event_type)
Пример #13
0
def clean_name(df):
    constants = nameparser.config.Constants()
    constants.titles.add('Major', 'Jonkheer', 'Don')
    df['nmp_tag'] = df['Name'].apply(lambda x: nameparser.HumanName(
        full_name=x, constants=constants).as_dict(False))
    df['title'] = df['nmp_tag'].apply(lambda x: x.get('title'))
    df['title'] = df['title'].apply(lambda x: 'Other' if x not in
                                    ['Mr.', 'Miss.', 'Mrs.', 'Master'] else x)
    df['first_name'] = df['nmp_tag'].apply(lambda x: x.get('first'))
    df['middle_name'] = df['nmp_tag'].apply(lambda x: x.get('middle'))
    df['last_name'] = df['nmp_tag'].apply(lambda x: x.get('last'))
    df['suffix'] = df['nmp_tag'].apply(lambda x: x.get('suffix'))
    df['nickname'] = df['nmp_tag'].apply(lambda x: x.get('nickname'))
    df.drop(columns=['nmp_tag'], inplace=True)
    return df
Пример #14
0
    def get_roster(self, team):
        # assuming players are always addressed in the play-by-play by last names
        # school name in title form and space inbetween school and self.year
        try:  # for games for which roster are stored in excel spreadsheets
            roster = pd.read_excel('Rosters/{}.xlsx'.format(' '.join([team.title(), str(self.year)])))
            # I also want to convert class (now: fr, so, jr, sr) into the year a player get into college'
            for i in roster.index:
                name = roster.at[i, 'Name']
                parsed_name = nameparser.HumanName(name)
                if parsed_name.last == "":  # must have last name. if cannot find, use first name as last name
                    parsed_name.last = parsed_name.first
                    parsed_name.first = ""
                # roster.at[i, 'First'] = parsed_name.first
                # roster.at[i, 'Middle'] = parsed_name.middle
                roster.at[i, 'Last'] = parsed_name.last  # roster.at[i, 'Suffix'] = parsed_name.suffix

            # Fake the jersey number: use the index of the player in the roster df as Jersey Number for the moment
            if roster['Jersey Number'].isnull().all() == True:
                roster['Jersey Number'] = [str(i) for i in roster.index.tolist()]
            roster['Jersey Number'] = [str(j) for j in roster['Jersey Number']]
            roster['Season'] = str(self.year)
            roster['Team'] = team
            roster['Player_id'] = ['-'.join(roster.loc[i, ['Season', 'Team', 'Jersey Number']]) for i in roster.index]

            # find players in the same roster with the same last names
            # problematic_names = roster['Last'].value_counts().loc[roster['Last'].value_counts() > 1,].index
            # roster['Problems'] = ['player with same last name exists' * bool(n in problematic_names) for n in
            #                       roster['Last']]
            roster = roster[['Name', 'Last', 'Position', 'Jersey Number', 'Class', 'Player_id']]

        except:  # for the games with active roster MongoDB Collection available
            matcher = PlayerNameMatcher(team.title(), int(self.year), 'MFB')
            code = matcher._get_team_code()
            roster_dict = matcher._get_active_players(code, int(self.year), 'MFB')

            if roster_dict != {}:
                roster = pd.DataFrame()
                roster['Name'] = [roster_dict.get(k).player_name for k in roster_dict.keys()]
                roster['Last'] = [roster_dict.get(k).player_name.split(",")[0] for k in roster_dict.keys()]
                roster['Position'] = [roster_dict.get(k).pos for k in roster_dict.keys()]
                roster['Jersey Number'] = [roster_dict.get(k).jersey_number for k in roster_dict.keys()]
                roster['Class'] = [roster_dict.get(k).player_class for k in roster_dict.keys()]
                roster['Player_id'] = [roster_dict.get(k).player_uuid for k in roster_dict.keys()]
            else:
                raise NameError('Roster for {} {} is not found'.format(team.title(), self.year))

        return roster  # a df used in match_players function.
Пример #15
0
    def _map_row_to_instructor_names(self, row):
        instructor_names = row['Instructor'].split(';')

        instructor_names_formatted = []
        for name in instructor_names:
            formatted_name = nameparser.HumanName(name)
            if formatted_name.first and formatted_name.last:
                instructor_names_formatted.append(
                    f'{formatted_name.last.upper()}, {formatted_name.first.upper()[0]}'
                )

        return {
            'instructor':
            instructor_names_formatted[0]
            if instructor_names_formatted else '',
            'instructors':
            instructor_names_formatted,
        }
Пример #16
0
def perform_inserts(curr, conn, color_dict, event_type_dict, names, foods):
    
    with open('events.json') as eventsfile:
        events_all = json.load(eventsfile)

        counter = 0

        for event_type, events in events_all.items():
            
            normalized_tpls, json_tpls = list(), list()

            for event in events:
                tpl_n = ( 
                    event_type_dict[event_type],
                    'jhoweyusername',
                    'jhoweypassword',
                    color_dict[random.choice(list(color_dict.keys()))],
                    event['name'],
                    event['food'],
                    event['confirmed'],
                    event['signup_date'],
                )
                normalized_tpls.append(tpl_n)

                tpl_j = (
                    event_type,
                    'jhoweyusername',
                    'jhoweypassword',
                    event['name'],
                    nameparser.HumanName(event['name']).last,
                    random.choice(list(color_dict.keys())),
                    json.dumps(event),
                )
                json_tpls.append(tpl_j)
            qry_n = 'INSERT INTO events (event_type_id, user_name, password, color_id, name, food, confirmed, signup_date) VALUES (%s,%s,%s,%s,%s,%s,%s,%s)'
            curr.executemany(qry_n, normalized_tpls)
            counter += curr.rowcount
            conn.commit()

            qry_j = 'INSERT INTO events_json (event_type, user_name, password, metadata_name, metadata_last_name, metadata_color, event) VALUES (%s,%s,%s,%s,%s,%s,%s)'
            curr.executemany(qry_j, json_tpls)
            conn.commit()

        print('Inserted %d events x2' % counter)
Пример #17
0
def bill_sponsor(person):
    """Grab complete sponsor information from GPO 'person' object."""
    sponsor = {}
    parsed_name = nameparser.HumanName(person['name'])
    sponsor['first_name'] = parsed_name['first']
    sponsor['last_name'] = parsed_name['last']
    sponsor['id'] = person['bioguide_id']
    sponsor['state'] = person['state']
    sponsor['title'] = person['title']
    sponsor['facebook_id'] = None
    endpoint = 'https://api.propublica.org/congress/v1/members/' \
        + sponsor['id'] + '.json'
    headers = {
        'X-API-Key': '76l8Lwp3w45mu6BeOShc17r3H4I264iK2mqMfX1k'
    }
    request = requests.get(endpoint, headers=headers)
    response = json.loads(request.text)
    if 'results' in response and response['results']:
        sponsor['party'] = response['results'][0]['current_party']
        sponsor['facebook_id'] = response['results'][0]['facebook_account']
    else: assert False
    return sponsor
Пример #18
0
    def __init__(self,
                 name,
                 email='',
                 directorate='',
                 division='',
                 programs=None):
        parsed_name = nameparser.HumanName(name)
        self.fname = parsed_name.first.encode('utf-8')
        self.lname = parsed_name.last.encode('utf-8')
        self.mname = parsed_name.middle.strip('.').encode('utf-8')
        self.title = parsed_name.title.strip('.').encode('utf-8')
        self.suffix = parsed_name.suffix.strip('.').encode('utf-8')
        self.nickname = parsed_name.nickname.encode('utf-8')

        self.email = email
        self.directorate = directorate.upper().strip(string.punctuation)
        self.division = division.upper().strip(string.punctuation)
        self.programs = set(
            p.upper().strip(string.punctuation)
            for p in programs) if programs is None else set(programs)

        self.id = Person.ID
        Person.ID += 1
Пример #19
0
def create_names_df(data_path='test_data/test_names.txt'):
    """
    Create a DataFrame from the provided data path. Each line in the file
    should be a single name
    :param data_path: Path to data
    :type data_path: str
    :return: DataFrame, containing names, and column 'last'
    :rtype: pd.DataFrame
    """
    names_list = list()

    # Iterate through file, add parsed names to list
    for line in open(data_path):
        parsed_name = nameparser.HumanName(line).as_dict()
        names_list.append(parsed_name)

    # Convert to DataFrame
    names_df = pd.DataFrame(names_list)

    # Uppercase to match Census data
    names_df['last'] = names_df['last'].apply(lambda x: x.upper())

    return names_df
Пример #20
0
def search_inmates(session):
    """:py:mod:`bottle` route to handle a GET request for an inmate search.

    This :py:mod:`bottle` route uses the following GET parameter:

    :param query: The inmate query string.
    :type query: str

    This is used as the query for the inmate search.

    :returns: :py:mod:`bottle` JSON response containing the following fields:

        - :py:data:`inmates` JSON encoding of the list of inmates.
        - :py:data:`errors` List of error strings encountered during search.

    """
    search = bottle.request.query.get("query")

    if not search:
        raise bottle.HTTPError(400, "Some search input must be provided")

    try:
        inmate_id = int(search.replace("-", ""))
        inmates, errors = db.query_providers_by_id(session, inmate_id)

    except ValueError:
        name = nameparser.HumanName(search)

        if not (name.first and name.last):
            message = "If using a name, please specify first and last name"
            raise bottle.HTTPError(400, message)  # pylint: disable=raise-missing-from

        inmates, errors = db.query_providers_by_name(session, name.first,
                                                     name.last)

    return {"inmates": schemas.inmates.dump(inmates), "errors": errors}
Пример #21
0
def perform_inserts(events_collection):    
    colors = generate_data.gen_colors()

    with open('events.json') as eventsfile:
        events_all = json.load(eventsfile)

        bulk_add = list()
        for event_type, events in events_all.items():
            
            #Enrich
            for event in events:
                event['event_type'] = event_type

                event['_user'] = {'user_name': 'jhoweyusername', 'password': '******'}

                #Add fields for our keyword indexing later
                event['__color'] = random.choice(colors)
                event['__name'] = event['name']
                event['__last_name'] = nameparser.HumanName(event['name']).last
                bulk_add.append(event)

        result = events_collection.insert_many(bulk_add)

        print('Inserted %d events' % len(result.inserted_ids))
Пример #22
0
    def _reorder_author_name(self, author_str, default_to_last_name):
        """
        Automatically detect first and last names in an author name string and reorder
        using the format: last name, first name
        """
        author = nameparser.HumanName(author_str)
        if author.first == u'Jr.' and author.suffix != '':
            author.first = author.suffix
            author.suffix = u'Jr.'

        if (author.middle):
            # Move middle names to first name if detected as so,
            # or move to last name if detected as so
            # or move to the default
            add_to_first = []
            add_to_last = []
            last_name_found = False

            middle_name_list = author.middle.split()

            try:
                for middle_name in middle_name_list:
                    middle_name_length = len(
                        unicode_entities(middle_name).strip('.').strip('-')
                    )  # Ignore '.' or '-' at the beginning/end of the string
                    middle_name_upper = middle_name.upper()
                    if (middle_name_length <= 2 and middle_name_upper not in self.last_names and "'" not in middle_name) \
                        or (middle_name_upper in self.first_names and middle_name_upper not in self.last_names) \
                        or (self.regex_dash.sub('', middle_name_upper) in self.first_names and self.regex_dash.sub('', middle_name_upper) not in self.last_names) \
                        or (self.regex_quote.sub('', middle_name_upper) in
                            self.first_names and self.regex_quote.sub('',
                            middle_name_upper) not in self.last_names):
                        # Case: First name found
                        # Middle name is found in the first names ADS
                        # list and not in the last names ADS list
                        if last_name_found:
                            # Move all previously detected first names to
                            # last name since we are in a situation where
                            # we detected:
                            # F F L F
                            # hence we correct it to:
                            # L L L F
                            # where F is first name and L is last name
                            add_to_first += add_to_last
                            add_to_last = []
                            last_name_found = False
                        add_to_first.append(middle_name)
                    elif last_name_found or middle_name.upper(
                    ) in self.last_names:
                        # Case: Last name found
                        add_to_last.append(middle_name)
                        last_name_found = True
                    else:
                        # Case: Unknown
                        # Middle name not found in the first or last names ADS list
                        if default_to_last_name:
                            add_to_last.append(middle_name)
                            last_name_found = True
                        else:
                            add_to_first.append(middle_name)
            except Exception as e:
                logging.exception("Unexpected error in middle name parsing")
            author.first = [author.first] + add_to_first
            add_to_last.reverse()
            author.last = add_to_last + [author.last]
        author.middle = u''

        # Verify that no first names appear in the detected last name
        if (author.last):
            if isinstance(author.last, basestring):
                last_name_list = [author.last]
            else:
                last_name_list = author.last.split()
            # At this point we already know it has at least 1 last name and
            # we will not question that one (in the last position)
            verified_last_name_list = [last_name_list.pop()]
            last_name_list.reverse()
            try:
                for last_name in last_name_list:
                    last_name_upper = last_name.upper()
                    if last_name_upper in self.first_names and last_name_upper not in self.last_names:
                        author.first = [author.first, last_name]
                    else:
                        verified_last_name_list.append(last_name)
            except Exception, err:
                logging.exception("Unexpected error in last name parsing")
            else:
                verified_last_name_list.reverse()
                author.last = verified_last_name_list
def main(args):
    file = Path.cwd().joinpath('..', 'db', "{}.yml".format(OUTCOLLECTION))
    name = nameparser.HumanName(args.name)
    day = dt.datetime.today().day
    month = dt.datetime.today().month
    year = dt.datetime.today().year
    now = dt.datetime.now()
    key = "{}{}_{}_{}".format(
        str(year)[-2:], month_to_str_int(month), name.last.casefold(),
        name.first.casefold().strip("."))

    pdoc = {
        'adequacy_of_resources':
        ['The resources available to the PI seem adequate'],
        'agency':
        args.type,
        'competency_of_team': [],
        'doe_appropriateness_of_approach': [],
        'doe_reasonableness_of_budget': [],
        'doe_relevance_to_program_mission': [],
        'does_how': [],
        'does_what':
        '',
        'due_date':
        args.due_date,
        'freewrite': [],
        'goals': [],
        'importance': [],
        'institutions': [],
        'month':
        'tbd',
        'names':
        name.full_name,
        'nsf_broader_impacts': [],
        'nsf_create_original_transformative': [],
        'nsf_plan_good': [],
        'nsf_pot_to_advance_knowledge': [],
        'nsf_pot_to_benefit_society': [],
        'status':
        'accepted',
        'summary':
        '',
        'year':
        2020
    }

    if args.title:
        pdoc.update({'title': args.title})
    else:
        pdoc.update({'title': ''})
    if args.requester:
        pdoc.update({'requester': args.requester})
    else:
        pdoc.update({'requester': ''})
    if args.reviewer:
        pdoc.update({'reviewer': args.reviewer})
    else:
        pdoc.update({'reviewer': 'sbillinge'})
    if args.status:
        if args.status not in ALLOWED_STATI:
            raise ValueError(
                "status should be one of {}".format(ALLOWED_STATI))
        else:
            pdoc.update({'status': args.status})
    else:
        pdoc.update({'requester': ''})

    fullpdoc = {key: pdoc}
    sync_coll(file, fullpdoc)

    print("{} proposal has been added/updated in proposal reviews".format(
        args.name))
    return fullpdoc
Пример #24
0
 def update_name_parts(self, fullname):
     human_name = nameparser.HumanName(fullname)
     for attr in name_parts:
         value = getattr(human_name, attr) or None
         setattr(self, attr, value)
Пример #25
0
    def db_updater(self):
        rc = self.rc
        name = nameparser.HumanName(rc.name)
        month = dt.datetime.today().month
        year = dt.datetime.today().year
        key = "{}{}_{}_{}".format(
            str(year)[-2:], month_to_str_int(month), name.last.casefold(),
            name.first.casefold().strip("."))

        coll = self.gtx[rc.coll]
        pdocl = list(filter(lambda doc: doc["_id"] == key, coll))
        if len(pdocl) > 0:
            sys.exit("This entry appears to already exist in the collection")
        else:
            pdoc = {}
        pdoc.update({'adequacy_of_resources': [
            'The resources available to the PI seem adequate'],
                'agency': rc.type,
                'competency_of_team': [],
                'doe_appropriateness_of_approach': [],
                'doe_reasonableness_of_budget': [],
                'doe_relevance_to_program_mission': [],
                'does_how': [],
                'does_what': '',
                'due_date': rc.due_date,
                'freewrite': [],
                'goals': [],
                'importance': [],
                'institutions': [],
                'month': 'tbd',
                'names': name.full_name,
                'nsf_broader_impacts': [],
                'nsf_create_original_transformative': [],
                'nsf_plan_good': [],
                'nsf_pot_to_advance_knowledge': [],
                'nsf_pot_to_benefit_society': [],
                'status': 'accepted',
                'summary': '',
                'year': 2020
                })

        if rc.title:
            pdoc.update({'title': rc.title})
        else:
            pdoc.update({'title': ''})
        if rc.requester:
            pdoc.update({'requester': rc.requester})
        else:
            pdoc.update({'requester': ''})
        if rc.reviewer:
            pdoc.update({'reviewer': rc.reviewer})
        else:
            pdoc.update({'reviewer': 'sbillinge'})
        if rc.status:
            if rc.status not in ALLOWED_STATI:
                raise ValueError(
                    "status should be one of {}".format(ALLOWED_STATI))
            else:
                pdoc.update({'status': rc.status})
        else:
            pdoc.update({'status': 'accepted'})

        pdoc.update({"_id": key})
        rc.client.insert_one(rc.database, rc.coll, pdoc)

        print("{} proposal has been added/updated in proposal reviews".format(
            rc.name))

        return
Пример #26
0
def xmlformatname(pname):
    name = nameparser.HumanName(pname)
    if (not name.first) or (not name.last):
        return None
    return presenter_template.format(FIRSTNAME=html_escape(name.first),
                                     LASTNAME=html_escape(name.last))
Пример #27
0
    def db_updater(self):
        rc = self.rc
        name = nameparser.HumanName(rc.name)
        month = dt.datetime.today().month
        year = dt.datetime.today().year
        if name.last == '':
            key = "{}{}_{}".format(
                str(year)[-2:], month_to_str_int(month),
                name.first.casefold().strip("."))
        else:
            key = "{}{}_{}_{}".format(
                str(year)[-2:], month_to_str_int(month), name.last.casefold(),
                name.first.casefold().strip("."))

        coll = self.gtx[rc.coll]
        pdocl = list(filter(lambda doc: doc["_id"] == key, coll))
        if len(pdocl) > 0:
            sys.exit("This entry appears to already exist in the collection")
        else:
            pdoc = {}
        pdoc.update({'claimed_found_what': [],
                     'claimed_why_important': [],
                     'did_how': [],
                     'did_what': [],
                     'due_date': rc.due_date,
                     'editor_eyes_only': '',
                     'final_assessment': [],
                     'freewrite': '',
                     'journal': rc.journal,
                     'recommendation': '',
                     'title': rc.title,
                     'validity_assessment': [],
                     'year': year
                     })

        if rc.reviewer:
            pdoc.update({'reviewer': rc.reviewer})
        else:
            try:
                rc.reviewer = rc.default_user_id
                pdoc.update({'reviewer': rc.reviewer})
            except AttributeError:
                print(
                    "Please set default_user_id in '~/.config/regolith/user.json', or you need to enter your group id "
                    "in the command line")
                return
        if rc.submitted_date:
            pdoc.update({'submitted_date': rc.submitted_date})
        else:
            pdoc.update({'submitted_date': 'tbd'})
        if rc.name:
            if name.last == '':
                pdoc.update({'first_author_last_name': name.first})
            else:
                pdoc.update({'first_author_last_name': name.last})
        if rc.requester:
            pdoc.update({'requester': rc.requester})
        else:
            pdoc.update({'requester': ''})
        if rc.status:
            if rc.status not in ALLOWED_STATI:
                raise ValueError(
                    "status should be one of {}".format(ALLOWED_STATI))
            else:
                pdoc.update({'status': rc.status})
        else:
            pdoc.update({'status': 'accepted'})

        pdoc.update({"_id": key})
        rc.client.insert_one(rc.database, rc.coll, pdoc)

        print("{} manuscript has been added/updated in manuscript reviews".format(
            rc.name))

        return
Пример #28
0
def parseCaption(caption):
    # given a caption, return a list of names of people in the caption
    # if no names found, or caption not about people, return None
    debug = False
    flag = False
    rejects = []

    if debug:
        print " "
        print caption

    names = []
    chunks = []

    caplen = len(caption)
    if (caplen > 1) & (caplen < 250):  # ignore short/long descriptions

        ## split on 'AT' and 'IN'
        # throw away everything after "so-and-so AT the ball"
        if findAt.search(caption):
            rejects.append(caption[findAt.search(caption).start():])
            caption = caption[:findAt.search(caption).start()].strip()
        if ' in ' in caption:
            tmp = caption.split(' in ')
            rejects.append(tmp[1])
            caption = tmp[0]
        if debug:
            print "After at/in : ", caption

        ### skip if less than 4 words
        if len(caption.split()) < 4:
            if debug:
                print " nWords < 4"
            return None

        # each chunk is a set of words
        chunks = fpunkt.split(caption)

        ###  fix Jr. / Sr. / Dr.
        for ind, chunk in enumerate(chunks):
            if fsr.search(chunk):
                print chunk
                spl = fjr.split(chunk)
                chunk = ''.join(spl).strip()
                print chunk

        for ind, chunk in enumerate(chunks):
            if fjr.search(chunk):
                #			print
                print chunk
                spl = fjr.split(chunk)
                chunk = ''.join(spl).strip()
                print chunk

        for ind, chunk in enumerate(chunks):
            if fdr.search(chunk):
                print
                print "Dr. Sub"
                print chunk
                spl = fdr.split(chunk)
                chunks[ind] = ''.join(spl).strip()
                print chunks
        ## ^^ 	only the Dr. one works, because of Python's scoping.
        ## 		Could fix the others if needed

        if debug:
            print chunks

        ### split at "Bob WITH Kate"
        for ind, chunk in enumerate(chunks):
            #				if 'with' in chunk:
            if fwith.search(chunk):
                pieces = chunk.split(' with ')
                chunks[ind] = pieces[0]
                chunks.insert(ind + 1, pieces[1])

        ### handling 'AND' ###

        # checks first word of split (implies a list, and Bob)
        for ind, chunk in enumerate(chunks):
            if fand.match(chunk):  # this re. defined above
                chunks[ind] = chunks[ind][5:]  # everything after the "and "
                chunk = chunks[ind]
                #print "post-fand chunks : ", chunks

            # separating "Husb and Wife Smith", etc
            if ' and ' in chunk:
                #print "found AND "
                temp = chunk.split(' and ')
                name1 = temp[0].strip()
                name2 = temp[1].strip()
                human1 = parser.HumanName(name1)
                human2 = parser.HumanName(name2)

                if debug:
                    print "ind = ", ind, "; temp = ", temp
                    print "name1 = ", name1, ";  name2 = ", name2
                    print "human1 = ", human1
                    print "human2 = ", human2

                # if this was of the form "Husband and Wife Smith"
                if not human1.last:
                    human1 = temp[0].strip() + ' ' + human2.last
                else:
                    human1 = name1

                chunks[ind] = human1
                chunks.insert(ind + 1, temp[-1])

        ### check for capitalized words to see if this is names ###
        cutList = []
        for ind, chunk in enumerate(chunks):
            words = chunk.split()
            nWords = len(words)

            if nWords:
                # check ratio of caps to not
                nCaps = sum(map(str.isupper, str(chunk)))
                #				ratio = float(nCaps)/nWords

                if (nWords - nCaps) > 1:
                    cutList.append(chunk)
                    if debug:
                        print "no caps"

        rejects.append(cutList)
        for cut in cutList:
            chunks.remove(cut)

        if debug:
            print
            print "after capitals :"
            print "  reject : ", rejects
            print "  keep :", chunks
            dan.danpause()

        ### cut chunks with 'The'
        cutList = []
        for chunk in chunks:
            if fthe.search(chunk):
                #		print chunks
                # should probably check if it's already there
                if chunk not in cutList:
                    cutList.append(chunk)

        rejects.append(cutList)
        for cut in cutList:
            chunks.remove(cut)

        if debug:
            print ' cutting "the" : ', chunks

        ### upon exit ###
        if len(chunks) > 1:  # need more than one person
            for chunk in chunks:
                chunk = chunk.strip()
                if len(chunk.split()) > 1:
                    # make sure it doesn't say "friend"
                    if not ffrnd.search(chunk):
                        # strip whitespace, condense multispaces
                        names.append(re.sub('\s+', ' ', chunk.strip()))
                    else:
                        print "rej : ", chunk
                        rejects.append(chunk)

    #	print names
        return names

    else:
        return None