def process_text(tweets, sw, LEM): processed_tweets = [] for i in range(len(tweets)): if "retweeted_status" in tweets[i]: # Remove URL tweet_1 = remove_url(tweets[i]["retweeted_status"]["text"]) # Lower case and split tweet_2 = tweet_1.lower().split() # Remove stopwords tweet_3 = [w for w in tweet_2 if w not in sw] # Lemmatize if LEM: tweet_3 = [uralicApi.lemmatize(w, "fin") for w in tweet_3] processed_tweets.append(tweet_3) else: # Remove URL tweet_1 = remove_url(tweets[i]["text"]) # Lower case and split tweet_2 = tweet_1.lower().split() # Remove stopwords tweet_3 = [w for w in tweet_2 if w not in sw] # Lemmatize if LEM: tweet_3 = [uralicApi.lemmatize(w, "fin") for w in tweet_3] processed_tweets.append(tweet_3) return processed_tweets
async def lemmatize(ctx, arg): response = [] lines = uralicApi.lemmatize(arg, "fin") if len(lines) == 0: await ctx.send('word not found.') return for line in lines: response.append(line) response = ('\n'.join(response)) await ctx.send(response)
def test_lemmatize(self): result = uralicApi.lemmatize("lehmäni", "fin", force_local=True) self.assertEqual(result[0], 'lehmä')
def test_lemmatize_swe_bound(self): result = uralicApi.lemmatize("livsmedel", "swe",force_local=True, word_boundaries=True) self.assertTrue('livs|medel' in result)
def test_lemmatize_swe(self): result = uralicApi.lemmatize("livsmedel", "swe",force_local=True) self.assertEqual(result[0], 'livsmedel')
def test_lemmatize_fin_bound(self): result = uralicApi.lemmatize("autosaha", "fin",force_local=True, word_boundaries=True) self.assertEqual(result[0], 'auto|saha')
def test_lemmatize_fin(self): result = uralicApi.lemmatize("autosaha", "fin",force_local=True) self.assertEqual(result[0], 'autosaha')