Exemplo n.º 1
0
Arquivo: __init__.py Projeto: qirh/pt
def get_langs(word):
    """
    returns a list of the 3 most probable languages. the last two elements might be None
    TextBlob return 1 language. langdetect return a list of languages.

    if TextBlob == langdetect[0]
        languages = langdetect[0:3]
    else:
        languages = TextBlob + langdetect[0:2]
    """
    if (len(word) < 3):
        return [None for i in range(3)]

    first_lang = TextBlob(word).detect_language().__str__()
    langs = [w.lang.__str__() for w in detect_langs(word)]

    if (first_lang != langs[0]):
        langs.insert(0, first_lang.__str__())

    # pad list if there are less than 3 elems
    while (len(langs) < 4):
        langs.append(None)
    # remove last elements there are more than 3 elems
    while (len(langs) > 3):
        langs = langs[:-1]

    return langs
Exemplo n.º 2
0
def jaccard_calc(csv_file):
    '''Takes in a a table and Returns another with a column of jaccard_values for each team'''
    table = pd.read_csv(csv_file)

    #Cleaning, lemmatizing, then grouping responses by team into lists of wordlists.
    cleaned_table = cleaning_and_lemmatizing(table)
    summed_table = cleaned_table.groupby('Teamname')[['Shared Goal']].sum()
    summed_table['Shared Goal'] = cleaned_table.groupby('Teamname')['Shared Goal'].apply(list)
    table_with_values = summed_table

    #Jaccard Calculation
    jaccard_value = np.array([])
    for responses in table_with_values['Shared Goal']:
        response_length = 0
        numerator = 0
        past_responses = TextBlob('').words
        for response in responses:
            past_responses += remove_dupes(response)
            response_length += len(response)
        past_word_freq = TextBlob(past_responses.__str__()).word_counts
        for word in list(past_word_freq):
            if past_word_freq[word] > 1:
                numerator += past_word_freq[word] - 1
        jaccard_value = np.append(jaccard_value, numerator / response_length)
    table_with_values['jaccard value'] = jaccard_value
    return table_with_values
Exemplo n.º 3
0
def get_sentiment(text, language="en", extended=True):
    logging.debug({'text': text, 'lang': language, 'ext': extended})
    tmp = TextBlob(text)
    if language != "en":
        logging.warn('Message in '+language+' language. Need to be translated')
        tmp = tmp.translate(to='en')
        logging.debug('Translated: '+tmp.__str__())

    if extended:
        logging.debug('Extended message reques')
        return __get_extended_analysis(text, language, tmp)
    else:
        logging.debug('Simple message request')
        return {'polarity': tmp.sentiment.polarity, 'subjectivity': tmp.sentiment.subjectivity, 'tag': polarity_tag(tmp.sentiment.polarity)}