def make_bibcode(self): year = self.data_dict['pubdate'][0:4] bibcode = 'GCN.' self.data_dict['volume'] = self.data_dict['volume'].ljust(5, '.') volume = self.data_dict['volume'].ljust(9, '.') + '1' try: init = u2asc(self.data_dict['authors'][0][0]) except Exception, err: print("Problem generating author initial") init = '.'
def _normalize_author(self, author_str, collaborations_params): """ Normalizes an author name string ensuring capitalization and transforming first name to only initials """ try: # Transliterates unicode characters to ASCII author_str = u2asc(author_str.strip()) except Exception as err: logging.exception("Unexpected error transliterating author name\ unicode string to ASCII") # TODO: Implement better error control return self._normalize_author(self.unknown_author_str, collaborations_params) # Check first if it is a collaboration, given that collaboration strings # may have commas and it may be wrongly interpreted as a name collaboration = False for keyword in collaborations_params['keywords']: if keyword in author_str.lower(): collaboration = True break if collaboration: # Make sure there are no commas to avoid interpreting this name as 'last, first name' normalized_author_str = author_str.replace(",", "") else: match = self.regex_author.search(author_str) if match: # Last name detected ## Using .title() breaks dutch last names! # last_name = match.group('last_name').strip().title() last_name = match.group('last_name').strip() initials_list = [] # Collect initials from first name if it is present for i in range(self.max_first_name_initials): key = 'initial' + str(i) if match.group(key): initials_list.append(match.group(key).strip().upper()) initials_str = u" ".join(initials_list) # Form normalized author string where capitalization is guaranteed normalized_author_str = "{}, {}".format(last_name, initials_str) # Make sure there are no dots normalized_author_str = normalized_author_str.replace(u".", u"") else: # Make sure there are no commas to avoid interpreting this # name as 'last, first name' normalized_author_str = author_str.replace(u",", u"") # Make sure there are no dots or commas normalized_author_str = normalized_author_str.replace(u".", u"") normalized_author_str = normalized_author_str.strip() if len(normalized_author_str) == 0: normalized_author_str = self.normalized_unknown_author_str return normalized_author_str
def _normalize_author(self, author_str, collaborations_params): """ Normalizes an author name string ensuring capitalization and transforming first name to only initials """ try: # Transliterates unicode characters to ASCII author_str = u2asc(author_str.strip()) except Exception, err: logging.exception("Unexpected error transliterating author name\ unicode string to ASCII") # TODO: Implement better error control return self._normalize_author(self.unknown_author_str, collaborations_params)
def get_author_init(self, namestring): output = u2asc(namestring) for c in output: if c.isalpha(): return c.upper() return u'.'