Пример #1
0
def process_text(tweets, sw, LEM):

    processed_tweets = []

    for i in range(len(tweets)):

        if "retweeted_status" in tweets[i]:

            # Remove URL
            tweet_1 = remove_url(tweets[i]["retweeted_status"]["text"])

            # Lower case and split
            tweet_2 = tweet_1.lower().split()

            # Remove stopwords
            tweet_3 = [w for w in tweet_2 if w not in sw]

            # Lemmatize
            if LEM:
                tweet_3 = [uralicApi.lemmatize(w, "fin") for w in tweet_3]

            processed_tweets.append(tweet_3)

        else:

            # Remove URL
            tweet_1 = remove_url(tweets[i]["text"])

            # Lower case and split
            tweet_2 = tweet_1.lower().split()

            # Remove stopwords
            tweet_3 = [w for w in tweet_2 if w not in sw]

            #  Lemmatize
            if LEM:
                tweet_3 = [uralicApi.lemmatize(w, "fin") for w in tweet_3]

            processed_tweets.append(tweet_3)

    return processed_tweets
Пример #2
0
async def lemmatize(ctx, arg):
    response = []
    lines = uralicApi.lemmatize(arg, "fin")

    if len(lines) == 0:
        await ctx.send('word not found.')
        return

    for line in lines:
        response.append(line) 

    response = ('\n'.join(response))
    await ctx.send(response)
Пример #3
0
 def test_lemmatize(self):
     result = uralicApi.lemmatize("lehmäni", "fin", force_local=True)
     self.assertEqual(result[0], 'lehmä')
Пример #4
0
 def test_lemmatize_swe_bound(self):
     result = uralicApi.lemmatize("livsmedel", "swe",force_local=True, word_boundaries=True)
     self.assertTrue('livs|medel' in result)
Пример #5
0
 def test_lemmatize_swe(self):
     result = uralicApi.lemmatize("livsmedel", "swe",force_local=True)
     self.assertEqual(result[0], 'livsmedel')
Пример #6
0
 def test_lemmatize_fin_bound(self):
     result = uralicApi.lemmatize("autosaha", "fin",force_local=True, word_boundaries=True)
     self.assertEqual(result[0], 'auto|saha')
Пример #7
0
 def test_lemmatize_fin(self):
     result = uralicApi.lemmatize("autosaha", "fin",force_local=True)
     self.assertEqual(result[0], 'autosaha')