def set_journal(self, journal): node = self._fetch_node("titles") cleanj = clean_line(journal) self._set_item(cleanj, self.journaltag, node) #and do the abbreviation node = self._fetch_node("periodical") abbrev = JournalCleanup.abbreviate(journal) self._set_item(abbrev, self.abbrevtag, node)
def process_authors(entries): authors = [] for entry in entries: if len(entry) == 1: #hmm, chinese name entry = map(lambda x: x.strip(), entry[0].split()) last, first = entry[:2] #check to see if we have stupidness first_first = first.split()[0] match = re.compile("[A-Z]{2}").search(first_first) if match: #I f*****g hate you papers initials = [] for entry in first_first: initials.append(entry) first = " ".join(initials) else: first = clean_line(first) #capitalize last name last = capitalize_word(last) name = "%s, %s" % (last, first) authors.append(name) return authors