def get_langs(word): """ returns a list of the 3 most probable languages. the last two elements might be None TextBlob return 1 language. langdetect return a list of languages. if TextBlob == langdetect[0] languages = langdetect[0:3] else: languages = TextBlob + langdetect[0:2] """ if (len(word) < 3): return [None for i in range(3)] first_lang = TextBlob(word).detect_language().__str__() langs = [w.lang.__str__() for w in detect_langs(word)] if (first_lang != langs[0]): langs.insert(0, first_lang.__str__()) # pad list if there are less than 3 elems while (len(langs) < 4): langs.append(None) # remove last elements there are more than 3 elems while (len(langs) > 3): langs = langs[:-1] return langs
def jaccard_calc(csv_file): '''Takes in a a table and Returns another with a column of jaccard_values for each team''' table = pd.read_csv(csv_file) #Cleaning, lemmatizing, then grouping responses by team into lists of wordlists. cleaned_table = cleaning_and_lemmatizing(table) summed_table = cleaned_table.groupby('Teamname')[['Shared Goal']].sum() summed_table['Shared Goal'] = cleaned_table.groupby('Teamname')['Shared Goal'].apply(list) table_with_values = summed_table #Jaccard Calculation jaccard_value = np.array([]) for responses in table_with_values['Shared Goal']: response_length = 0 numerator = 0 past_responses = TextBlob('').words for response in responses: past_responses += remove_dupes(response) response_length += len(response) past_word_freq = TextBlob(past_responses.__str__()).word_counts for word in list(past_word_freq): if past_word_freq[word] > 1: numerator += past_word_freq[word] - 1 jaccard_value = np.append(jaccard_value, numerator / response_length) table_with_values['jaccard value'] = jaccard_value return table_with_values
def get_sentiment(text, language="en", extended=True): logging.debug({'text': text, 'lang': language, 'ext': extended}) tmp = TextBlob(text) if language != "en": logging.warn('Message in '+language+' language. Need to be translated') tmp = tmp.translate(to='en') logging.debug('Translated: '+tmp.__str__()) if extended: logging.debug('Extended message reques') return __get_extended_analysis(text, language, tmp) else: logging.debug('Simple message request') return {'polarity': tmp.sentiment.polarity, 'subjectivity': tmp.sentiment.subjectivity, 'tag': polarity_tag(tmp.sentiment.polarity)}