Пример #1
0
    def _abbrev_word(cls, word):
        new_word = word
        if not word in cls.keep:
            new_word = word
            for entry in cls.abbrev_map:
                if cls._startswith(word, entry):
                    new_word = cls.abbrev_map[entry]
                    break

        if new_word in cls.upper:
            new_word = new_word.upper()
        elif new_word in cls.lower:
            new_word = new_word.lower()
        else:
            new_word = capitalize_word(new_word)

        new_word = cls._add_period(new_word)
        return new_word
Пример #2
0
    def abbreviate(cls, journal):
        journal = journal.lower()
        for entry in cls.special:
            journal = journal.replace(entry, cls.special[entry])
        words = journal.replace("-"," ").strip().split()

        if len(words) == 1: #don't abbreivate
            title = capitalize_word(words[0])
            return cls.final_process(title)

        str_arr = []
        for word in words:
            if word in cls.erase:
                continue

            str_arr.append(cls._abbrev_word(word))

        text = " ".join(str_arr)

        return cls.final_process(text)
Пример #3
0
def process_authors(entries):
    authors = []
    for entry in entries:
        if len(entry) == 1: #hmm, chinese name
            entry = map(lambda x: x.strip(), entry[0].split())
        last, first = entry[:2]
        #check to see if we have stupidness
        first_first = first.split()[0]
        match = re.compile("[A-Z]{2}").search(first_first)
        if match: #I f*****g hate you papers
            initials = []
            for entry in first_first:
                initials.append(entry)
            first = " ".join(initials)
        else:
            first = clean_line(first)

        #capitalize last name
        last = capitalize_word(last)

        name = "%s, %s" % (last, first)
        authors.append(name)

    return authors
Пример #4
0
 def capitalize_word(cls, word):
     for delim in "/", "-":
         if delim in word and not word[-1] == delim:
             return cls.capitalize_hyphenated_word(word, delim)
     else:
         return capitalize_word(word)