Exemplo n.º 1
0
def applyManualCorrections(acronymDB):
    for line in DictReader(open(file_msh_manual_corrections, "rb"), delimiter=","):
        acronym = TextTools.toUnicode(line["acronym"])
        wrong_exp = TextTools.toUnicode(line["wrong_expansion"])
        correct_exp = TextTools.toUnicode(line["correct_expansion"])

        for entry in acronymDB[acronym]:
            if entry[0] == wrong_exp:
                entry[0] = correct_exp

    return acronymDB
Exemplo n.º 2
0
def serialGetWordCorpus(articleDB):
    word_corpus = {}
    for article_id, text in articleDB.items():
        word_corpus[article_id] = TextTools.getCleanedWords(text, stem_words=stem_words, removeNumbers=removeNumbers)
        if len(word_corpus) % 1000 == 0:
            common_logger.debug("converted " + str(len(word_corpus)) + " articles to words")
    return word_corpus
Exemplo n.º 3
0
def writefile(f, defs):

    f.write("entitydefs = {\n")
    items = sorted(defs.items())
    for name, (charcode, comment) in items:
        if charcode[:2] == '&#':
            code = int(charcode[2:-1])
            if code < 256:
                charcode = "'\%o'" % code
            else:
                charcode = repr(charcode)
        else:
            charcode = repr(charcode)
        comment = TextTools.collapse(comment)
        f.write("    '%s':\t%s,  \t# %s\n" % (name, charcode, comment))
    f.write('\n}\n')
def writefile(f,defs):

    f.write("entitydefs = {\n")
    items = sorted(defs.items())
    for name, (charcode,comment) in items:
        if charcode[:2] == '&#':
            code = int(charcode[2:-1])
            if code < 256:
                charcode = "'\%o'" % code
            else:
                charcode = repr(charcode)
        else:
            charcode = repr(charcode)
        comment = TextTools.collapse(comment)
        f.write("    '%s':\t%s,  \t# %s\n" % (name,charcode,comment))
    f.write('\n}\n')
Exemplo n.º 5
0
 async def summary(self, ctx, *, msg):
     user = ""
     user_toon = msg.strip()
     if not ctx.message.mentions:
         results = User.toon_search(user_toon)
         if results.first() is not None:
             for x in results:
                 converter = MemberConverter()
                 user = await converter.convert(ctx, x.discord_id)
                 break
     else:
         user = ctx.message.mentions[0]
     user_summary = TextTools.list_summary(str(user.id), user.joined_at,
                                           str(ctx.message.guild.id))
     for x in user_summary:
         await ctx.message.channel.send(x)
     await Users.get_profile_image(self, ctx=ctx, msg=user.mention)
Exemplo n.º 6
0
def _createArticleAndAcronymDB():
    acronymExpander = Expander_fromText_v2()
    articleDB = {}
    acronymDB = {}
    CUID_to_expansion = {}
    for fileName in os.listdir(folder_msh_arff):
        filePath = os.path.join(folder_msh_arff, fileName)
        file_reader = arff.Reader(open(filePath, "rb"))
        # the iterator needs to be called for the self.relation part to be
        # initialized
        lines = list(file_reader)
        cuids = file_reader.relation.strip().split("_")
        # storing all acronyms as uppercase values
        acronym = _fileNameToAcronym(fileName).upper()
        cuid_and_pmid = []
        for line in lines:
            pmid = unicode(line.PMID)
            text = TextTools.toUnicode(line.citation)
            cuid = cuids[_classToIndex(line["class"])]
            textWithoutMarkup = _removeMarkup(text)
            if (cuid not in CUID_to_expansion):
                acronymExpansions = []
                acronymExpansions = acronymExpander.expand(
                    acronym, acronymExpansions, textWithoutMarkup)
                if (len(acronymExpansions) != 0 and
                        acronymExpansions[0].expansion != acronym):
                    CUID_to_expansion[cuid] = acronymExpansions[0].expansion
            if (pmid not in articleDB):
                articleDB[pmid] = textWithoutMarkup
            cuid_and_pmid.append([cuid, pmid])

        if (acronym in acronymDB):
            common_logger.error("acronym already present in acronymDB")
        else:
            acronymDB[acronym] = []
        for cuid, pmid in cuid_and_pmid:
            if (cuid in CUID_to_expansion):
                acronymDB[acronym].append([CUID_to_expansion[cuid], pmid, 0])
            else:
                common_logger.error(
                    "Expansion not found for CUID %s of %s" % (cuid, acronym))
                acronymDB[acronym].append([cuid, pmid, 0])

    return acronymDB, articleDB
Exemplo n.º 7
0
 async def find_toon(self, ctx, *, msg):
     if not ctx.message.mentions:
         user_toon = msg.strip()
         results = User.toon_search(user_toon)
         if results.first() is not None:
             for x in results:
                 await ctx.message.channel.send("Character: " +
                                                x.character +
                                                " was added to: <@!" +
                                                x.discord_id + "> on: " +
                                                x.timestamp)
         else:
             await ctx.message.channel.send("Could not find character: " +
                                            user_toon)
     else:
         user = str(ctx.message.mentions[0].id)
         results = User.toon_search_by_user(user)
         if results.first() is not None:
             for x in TextTools.list_toons(results):
                 await ctx.message.channel.send(x)
         else:
             await ctx.message.channel.send(
                 "Could not find any characters for: <@!" +
                 str(ctx.message.mentions[0].id) + ">")
Exemplo n.º 8
0
#!/usr/local/bin/python
Exemplo n.º 9
0
#!/usr/local/bin/python