Python TextBlobDE示例，textblob_de.TextBlobDE Python示例

示例#1

0

显示文件

 def translate_de(self):
     str = ''
     for row in self.feedbackcomment:
         blob = TextBlobDE(row)
         if blob.detect_language() == "en":
             blob_en = blob
         else:
             blob_en = blob.translate(to="en")
         str = str + blob_en.string
     return str

示例#2

0

显示文件

文件： app_input.py 项目： ivanchoperez/DEDA_Class_SS2018

def data_prep_to_predict(eintrag, freiab, freibis, mitgliedseit, miete, groesse, area, text):
	
	keywords = ['möbliert','unmöbliert','bitte','leider','Skype','besichtigung','xx',':\)']
	# features to prepare:
	extra_sentiments = ['polarity_de', 'polarity_de_min', 'polarity_de_max', 'polarity_de_median']
	column_list = ['miete_delta','groesse','days_to_freiab','days_to_rent',\
	'popular_area','new_user'] + keywords + extra_sentiments

	# popular_area and miete_delta
	areas = ['kreuzberg', 'wedding','neukoelln','charlottenburg','mitte','friedrichshain','prenzlauerberg','moabit']
	popular_area = 0
	miete_delta = miete-436
	if clean_text(area) in areas:
		popular_area = 1
		miete_delta = miete-470
		
	#days_to_freiab
	days_to_freiab = abs((freiab - eintrag).days)
	
	#days_to_rent
	days_to_rent = abs((freibis - freiab).days)
	
	#polarity_de
	tb_obj = TextBlobDE(text)
	polarity_de = tb_obj.polarity
	sentences_polarity_de=[]
	for sentence in tb_obj.sentences:
		sentences_polarity_de.append(TextBlobDE(str(sentence)).polarity)
	polarity_de_median = np.median(sentences_polarity_de)
	polarity_de_min = np.min(sentences_polarity_de)
	polarity_de_max = np.max(sentences_polarity_de)
	sentiment_features = [polarity_de,polarity_de_min,polarity_de_max,polarity_de_median]
    
	#new_user
	new_user = 0
	if abs((eintrag - mitgliedseit).days) < 30:
		new_user = 1
	else:
		None
	#keyword features
	keyword_features = []
	for word in keywords:
		if word in text:
			keyword_features.append(1)
		else:
			keyword_features.append(0)
			
	feature_list = [miete_delta, groesse, days_to_freiab, days_to_rent, \
	popular_area, new_user] + keyword_features + sentiment_features
	
	features = pd.DataFrame(feature_list).T
	features.columns = column_list
	
	return features

示例#3

0

显示文件

def word_translate(inputtext, language):
    Aufgabe = {
        "Kopfzeile": "Name: 				Klasse: 				Datum:  \n ",
        "Titel": "",
        "1. Aufgabe": "Übersetze!\n",
        "Hinweise": "Hier ist die Wortliste: \n",
        "Rätselwörter": "Hier ein paar Rätselwörter aus dem Text: \n",
    }
    doc = docx.Document()  # initializing python-docx
    save_path = docxprint.docx_print(Doc=doc, save='word-translate')

    docxprint.docx_print(printText=Aufgabe["Kopfzeile"], Bold=True, Doc=doc)
    docxprint.docx_print(printText=Aufgabe["1. Aufgabe"], Bold=True, Doc=doc)

    nlp = languageload.language_load(language)
    docnlp = nlp(inputtext)  #load to spacy

    #prepare to control, only unsorted and unfiltered
    inputtext_prepared = []
    for token in docnlp:
        if str(token).isalpha() == True:
            inputtext_prepared.append(str(token))
    inputtext_prepared = " \n ".join(inputtext_prepared)

    print(inputtext_prepared)
    blob = TextBlobDE(inputtext_prepared)
    translation = blob.translate(
        from_lang='en',
        to="de")  # bg - bulgarisch, de - deutsch, en - englisch
    print(translation)

    wordlist_translated = translation.split("\n")
    inputtext_prepared = inputtext_prepared.split("\n")

    print(len(inputtext_prepared), len(wordlist_translated))
    print(inputtext_prepared)
    print(wordlist_translated)

    result = []
    for i in range(len(wordlist_translated)):
        result.append(inputtext_prepared[i].lower() + "\t-" +
                      wordlist_translated[i])
    result = list(set(result))
    result.sort()

    docxprint.docx_print(printText=inputtext, Doc=doc)
    docxprint.docx_print(printText=Aufgabe["Hinweise"], Bold=True, Doc=doc)

    for translation in result:
        print(translation)
        docxprint.docx_print(printText=translation, Doc=doc)
    doc.save(save_path)

示例#4

0

显示文件

def test_word_lists_de():
    animals = TextBlobDE("katze hund octopus ocropus aktienführer stammaktien syndikus anwälte ")
    pluralized_words = animals.words.pluralize()
    lemmatized_words = animals.words.lemmatize()


    blob = TextBlobDE("das ist ein deutscher Text mit asbjaskfbjjn als fremdwort salut! space")
    # this doesn't detect foreign words as such
    # link to see meaning of tags: http://blog.thedigitalgroup.com/sagarg/wp-content/uploads/sites/12/2015/06/POS-Tags.png
    tags = blob.tags
    for word in blob.words:
        print(word, "language is: ", word.detect_language()) # this takes google translator api requests


    print("done")

示例#5

0

显示文件

文件： functions.py 项目： ysfesr/YT-COMMENTS-ANALYSIS

def analyse(comments):

    allcomments = []
    polarity = []
    for comment in comments:
        try:
            allcomments.append(comment)
            try:
                if detect(comment) == 'de':
                    text = TextBlobDE(comment)
                    x = text.sentiment.polarity
                    polarity.append(x)
                elif detect(comment) == 'fr':
                    blob = TextBlob(comment,
                                    pos_tagger=PatternTagger(),
                                    analyzer=PatternAnalyzer())
                    x = blob.sentiment[0]
                    polarity.append(x)
                else:
                    text = TextBlob(comment)
                    x = text.sentiment.polarity
                    polarity.append(x)
            except:
                text = TextBlob(comment)
                x = text.sentiment.polarity
                polarity.append(x)
        except:
            pass

    return allcomments, polarity

示例#6

0

显示文件

文件： Sentiment Analysis.py 项目： patslabs/Automatisierte_Analyse_von_Flie-texten_im_Kontext_von_Venture_Capital_Investitionen

def german_semantic(text):
    from nltk.corpus import stopwords
    from nltk.stem.cistem import Cistem
    stopwords = set(stopwords.words("german"))

    liste = []
    stemmer = Cistem()
    wordlist = []

    # clean up the text
    text = "".join(text.lower())
    text = text.replace('[^\w\s]', '')
    text = re.sub("\s+", " ", text)
    # delete stopwords
    for word in text.split():
        if word not in stopwords:
            liste.append(word)
    text = " ".join(liste)
    # stemmer
    for word in text.split():
        word = stemmer.segment(word)[0]
        wordlist.append(word)
    text = " ".join(wordlist)

    # sentiment
    blob = TextBlobDE(text)
    sentiment_polarity = blob.sentiment.polarity
    sentiment_subjectivity = blob.sentiment.subjectivity

    return sentiment_polarity, sentiment_subjectivity

示例#7

0

显示文件

文件： preprocessing.py 项目： yakuhzi/news-analysis

 def _determine_polarity_textblob(self, text_series: Series) -> Series:
     """
     for each paragraph (row in a series) the polarity is calculated with textblob
     :param text_series: series, containing the text where the polarity needs to be determined
     :return: series, containing rows with the polarity for the corresponding text
     """
     tqdm.pandas(desc="Determine sentiment polarity with TextBlob")
     return text_series.progress_apply(lambda doc: TextBlobDE(doc).sentiment[0])

示例#8

0

显示文件

def blob_classify(text, id, collection):
    blob = TextBlobDE(text)
    client.spiegel[collection].update(
        {'_id': id},
        {"$set": {
            'blobPolarity': float(blob.sentiment.polarity)
        }})
    print(blob.sentiment.polarity)

示例#9

0

显示文件

文件： find_easy_words.py 项目： hdaSprachtechnologie/easy-to-understand_language

def complex_terms_satz(satz):
    word_list = TextBlobDE(satz).words
    for word in word_list:
        lemma = lemmatize_word(word)
        if find_word_status(lemma) == True:
            print('Leichtes Wort: ' + word)
        elif lemma in basic_german.keys():
            easy_variant = basic_german[lemma]
            print(word + " hat eine leichte Alternative: " + str(easy_variant))

示例#10

0

显示文件

文件： find_easy_words.py 项目： hdaSprachtechnologie/easy-to-understand_language

def lex_vereinfache(satz):
    word_list = TextBlobDE(satz).words
    for word in word_list:
        #        print(word)
        lemma = lemmatize_word(word)
        #        print(lemma)
        syns = synonyms(lemma)
        if syns != None:
            find_easy_syn(lemma, syns)

示例#11

0

显示文件

def get_de_tweet_sentiment(tweet):
    '''
    Utility function to classify sentiment of passed tweet
    using textblob's sentiment method
    '''
    # create TextBlob object of passed tweet text
    analysis = TextBlobDE(clean_tweet(tweet))
    # set sentiment
    return analysis.sentiment.polarity

示例#12

0

显示文件

 def sentiment_textblobde(self):
     textblobde_score = [
         round(TextBlobDE(article).sentiment.polarity, 3)
         for article in self.feedbackcomment
     ]
     textblobde_category = [
         'positive' if score > 0 else 'negative' if score < 0 else 'neutral'
         for score in textblobde_score
     ]
     return textblobde_score, textblobde_category

示例#13

0

显示文件

文件： sentiment.py 项目： svrijenhoek/dart

 def analyze(self, text):
     # Analyze the polarity of each text in the appropriate language.
     # Uses Textblob mainly because of its ease of implementation in multiple languages.
     # Dutch Textblob uses the same engine as the English one, but with special Pattern tagger and analyzer.
     if self.language == 'dutch':
         blob = TextBlob(text, pos_tagger=PatternTagger(), analyzer=PatternAnalyzer())
     elif self.language == 'english':
         blob = TextBlob(text)
     elif self.language == 'german':
         blob = TextBlobDE(text)
     return blob

示例#14

0

显示文件

def build_naive():
    with open('raw_num_labeled', 'rb') as fs:
        training_list = pickle.load(fs)
    cl = NaiveBayesClassifier(training_list[:3000])
    #cl.classify('das ist echt toll')
    print(cl.classify('das ist echt toll'))
    blob = TextBlobDE("Das ist super schade. Das tut mir so leid.")

    for s in blob.sentences:
        print(s)
        print(cl.classify(s))
    print(cl.accuracy(training_list[3000:]))

示例#15

0

显示文件

def get_sentiment(text, language):
    if isinstance(text, str):
        if language == 'DE':
            blob = TextBlobDE(text)
            return [blob.sentiment.polarity, blob.sentiment.subjectivity]
        elif language == 'FR':
            tb = Blobber(pos_tagger=PatternTaggerFR(),
                         analyzer=PatternAnalyzerFR())
            blob = tb(text)
            return blob.sentiment
        else:
            blob = TextBlob(text)
            return [blob.sentiment.polarity, blob.sentiment.subjectivity]

示例#16

0

显示文件

文件： sentiment-analysis.py 项目： masterproject2018mgm/sentiment-analysis-demo

def get_de_tweet_sentiment(tweet):
    '''
    Utility function to classify sentiment of passed tweet
    using textblob's sentiment method
    '''
    # create TextBlob object of passed tweet text
    analysis = TextBlobDE(clean_tweet(tweet))
    # set sentiment
    if analysis.sentiment.polarity > 0:
        return 'positive'
    elif analysis.sentiment.polarity == 0:
        return 'neutral'
    else:
        return 'negative'

示例#17

0

显示文件

def get_article_sentiment(article):
    """
    Extracts sentiment analysis for article.
    @param article: article dictionary (retrieved from the Data Lake)
    @returns: (article_level_polarity, article_level_subjectivity)
    """
    if language_dict[article['media']] == 'DE':
        blob = TextBlobDE(article['text'])
        polarity, subjectivity = (blob.sentiment.polarity, blob.sentiment.subjectivity)
    elif language_dict[article['media']] == 'FR':
        tb = Blobber(pos_tagger=PatternTaggerFR(), analyzer=PatternAnalyzerFR())
        blob = tb(article['text'])
        polarity, subjectivity = blob.sentiment
    else:  # for now defaults to FR (just for PoC)
        blob = TextBlob(article['text'])
        polarity, subjectivity = (blob.sentiment.polarity, blob.sentiment.subjectivity)
    return polarity, subjectivity

示例#18

0

显示文件

    def create_sen_indices(word2idx, data):
        """
        function to convert a sentence to a list of indices
        :param word2idx: word to index mapping dictionary
        :param data: data(which needs to be transformed:list of sentences)
        :return list of list of sentences where each word is replaces by its index values from word2idx.
        """
        idx = []
        sen_indices = []
        for sen in data:
            blob = TextBlobDE(sen)

            for w in blob.tokens:
                if w not in word2idx:
                    idx.append(word2idx['<UNK>'])
                else:
                    idx.append(word2idx[w])
            sen_indices.append(idx)
            idx = []
        return sen_indices

示例#19

0

显示文件

文件： textblob-test.py 项目： melaniesiegel/nlp-python

def nltk_parser(txt):
    myblob = TextBlobDE(txt)
    sent = [x[1] for x in myblob.tags]
    sent_text = [x[0] for x in myblob.tags]
    cfg_grammar = nltk.CFG.fromstring("""
    S -> NP VP | S CC S
    NP -> 'DT' N | 'DT' N PP | 'PRP' | N | 'PRP$'
    VP -> V NP | V NP PP | V ADJP
    ADJP -> 'RB' 'JJ' | 'JJ'
    PP -> P NP
    N -> 'NN' | 'NNP' | 'NNS' | 'FW'
    V -> 'VBN' | 'VB' | 'MD'
    P -> 'IN' | 'TO'
    CC -> 'CC'
    O -> 'RP' | 'WDT' | 'TRUNC' | 'CD'
    """)
    
    parser = nltk.parse.ChartParser(cfg_grammar)
    for tree in parser.parse(sent):
        print(tree)
        tree.draw()

示例#20

0

显示文件

 def from_dict(details):
     post = details["details"]
     blob = TextBlobDE(post["message"])
     return Post(_id=post["post_id"],
                 timestamp=dateutil.parser.parse(post["created_at"]),
                 message=post["message"],
                 tags=extract_tags(post["message"]),
                 image_url=post.get("image_url"),
                 thumbnail_url=post.get("thumbnail_url"),
                 child_count=post["child_count"],
                 banned=details["banned"],
                 deleted=False,
                 from_home=details.get("from_home"),
                 color=post["color"],
                 distance=post["distance"],
                 location_name=post["location"]["name"],
                 pin_count=post["pin_count"],
                 share_count=post["share_count"],
                 vote_count=post["vote_count"],
                 readonly=details["readonly"],
                 polarity=blob.polarity)

示例#21

0

显示文件

文件： analytics_controller.py 项目： tyayers/sentiments

def text_analytics(analysis_request):
    """
    Customer Service Text Analytics
    The Analytics endpoint returns both the sentiment and suggested response for a customer service text. 
    :param analysis_request: The customer&#39;s serve text in base64 encoding
    :type analysis_request: dict | bytes

    :rtype: AnalysisResponse
    """
    if connexion.request.is_json:
        analysis_request = AnalysisRequest.from_dict(
            connexion.request.get_json())
        response = AnalysisResponse()

        if analysis_request.language_code.upper() == "DE":
            blob = TextBlobDE(analysis_request.customer_text)
            response.sentiment_score = (blob.sentiment.polarity + 1) / 2
        else:
            blob = TextBlob(analysis_request.customer_text)
            response.sentiment_score = (blob.sentiment.polarity + 1) / 2

    return response

示例#22

0

显示文件

 def from_dict(reply, post):
     blob = TextBlobDE(reply["message"])
     return Reply(_id=reply["post_id"],
                  timestamp=dateutil.parser.parse(reply["created_at"]),
                  post_id=reply["parent_id"],
                  post_timestamp=post.timestamp,
                  message=reply["message"],
                  tags=extract_tags(reply["message"]),
                  post_message=post.message,
                  post_tags=post.tags,
                  color=reply["color"],
                  post_color=post.color,
                  distance=reply["distance"],
                  got_thanks=reply["got_thanks"],
                  location_name=reply["location"]["name"],
                  from_home=reply.get("from_home"),
                  vote_count=reply["vote_count"],
                  replier=reply["replier"],
                  polarity=blob.polarity,
                  post_pin_count=post.pin_count,
                  post_share_count=post.share_count,
                  post_vote_count=post.vote_count,
                  post_polarity=post.polarity)

示例#23

0

显示文件

文件： textblob-test.py 项目： melaniesiegel/nlp-python

def termex(txt):
    myblob = TextBlobDE(txt)
    pos = myblob.tags
    for item in pos:
        if item[1].startswith('N'):
            terms.append(item)

示例#24

0

显示文件

文件： textblob-test.py 项目： melaniesiegel/nlp-python

text_long = adidas_text.read()


#### Regular Expressions ####

#alle Wörter, die mit "heit" enden
heitwords = re.findall(r"\w+heit",text_long)

##print ("HEIT:", heitwords)
    
#### Tokenizer ####

#erst mit Textblob

blob = TextBlobDE(text)

print("SENTENCE TOKENIZER (TextBlobDE)")

sentences = blob.sentences
print (sentences)

print("WORD TOKENIZER (TextBlobDE)")

tokens = blob.tokens
print(tokens)

#dann mit NLTK

print ("SENTENCE TOKENIZER (NLTK)")
sent_detector = nltk.data.load('tokenizers/punkt/german.pickle')

示例#25

0

显示文件

# -*- coding: utf-8 -*-

from textblob import TextBlob
from textblob_de import TextBlobDE


text = TextBlob("Markus is angry because he never gets the biggest chocolate.")
print(text.tags)
print(text.sentiment.polarity)
print(text.sentiment)

text = TextBlobDE("Markus ist wütend weil er nie die grösste Schokolade erhält.".decode('utf-8'))
print(text.tags)
print(text.sentiment.polarity)
print(text.sentiment)

text = TextBlobDE("Markus ist glücklich weil er immer die grösste Schokolade erhält.".decode('utf-8'))
print(text.tags)
print(text.sentiment.polarity)
print(text.sentiment)

示例#26

0

显示文件

# WIN: spacy.load('en') geht nicht - dagegen spacy.load('en_core_web_sm') geht...
# also immer nlp = spacy.load('en_core_web_sm') nehmen
# wenn keine Permissions: pip install de_core_news_sm-2.0.0.tar (muss im gleichen Verzeichnis stehen) ==> manuelle installation
# pip install .tar.gz archive from path or URL
# ! pip install /Users/you/en_core_web_sm-2.0.0.tar.gz
# import de_core_news_sm geht

import de_core_news_sm

from textblob_de import TextBlobDE
import pandas as pd

f1 = open("DHB.txt", mode="r", encoding="UTF8")

text1 = f1.read()
blob2 = TextBlobDE(text1)
f1.close()
blob2.sentences
sens = pd.DataFrame(blob2.sentences)
tgs = pd.DataFrame(blob2.tags)
print(sens)

#blob = TextBlobDE(text)

blob2.tags  # [('Der', 'DT'), ('Blob', 'NN'), ('macht', 'VB'),
#  ('in', 'IN'), ('seiner', 'PRP$'), ...]

blob2.noun_phrases  # WordList(['Der Blob', 'seiner unbekümmert-naiven Weise',
#           'den gewissen Charme', 'hölzerne Regie',
#           'konfuse Drehbuch'])
"""

示例#27

0

显示文件

consumer_secret = 'dvy2S7enpM4dpRCCr3LS7wvGRmZyvvV0Al6W1y1okJiiAQj3lK'

access_token = '612923328-OgEMn0JdVBsV3bkPMMol7sv6eSC3l2Uh8rYo0uRe'
access_token_secret = 'djVUXlaomK5AxQpr1ePr1gcIV5fIFgsKBvN1b9MBsITrk'

print('Starting...')

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)

api = tweepy.API(auth)
print('Authenticated.')

#Step 3 - Retrieve Tweets
# Twitter API https://developer.twitter.com/en/docs/tweets/search/api-reference/get-search-tweets
query = '@uwyss'
public_tweets = api.search(query, lang='de', count=1, result_type='recent')

#CHALLENGE - Instead of printing out each tweet, save each Tweet to a CSV file
#and label each one as either 'positive' or 'negative', depending on the sentiment
#You can decide the sentiment polarity threshold yourself

for tweet in public_tweets:

    #Step 4 Perform Sentiment Analysis on Tweets
    analysis = TextBlobDE(tweet.text)

    print(tweet.id, tweet.text)
    print(analysis.sentiment)
    print("")

示例#28

0

显示文件

文件： lemmatisierung_de.py 项目： melaniesiegel/nlp-python

def lemmatize_sentence(sentence):
    sblob = TextBlobDE(str(sentence))
    for w in sblob.words:
        w_new = lemmatize_word(w)
        sentence = sentence.replace(w, w_new)
    return (sentence)

示例#29

0

显示文件

文件： nlp_basics.py 项目： melaniesiegel/nlp-python

#allianz_text = open('allianz_JA_2012_Text.txt','w', encoding='utf-8')
#Axel_Springer_JA_2012
#todo_file = open('Vulcanic_Triatherm_JA_2012.htm','r', encoding='utf-8')

#todo_data = todo_file.read()

#todo_text = open('Vulcanic_Triatherm_JA_2012_Text.txt','w', encoding='utf-8')

#### HTML-Parser ####


class MyHTMLParser(HTMLParser):
    def handle_data(self, data):
        todo_text.write(data)


parser = MyHTMLParser()

##Das musste ich nur einmal machen:
#parser.feed(todo_data)

#text_long = adidas_text.read()

#### Textblob initialisieren ####
blob = TextBlobDE(text)

### was man aufmacht, muss man auch wieder zumachen ####

#todo_file.close()
#todo_text.close()

示例#30

0

显示文件

文件： preprocess.py 项目： haw-mip02/analysis-preprocess

def preprocess_tweet(data):
    try:
        created_at = datetime.strptime(data['created_at'],
                                       '%a %b %d %H:%M:%S +0000 %Y')
        # detect the language of the tweet or use predefined language
        lang = classify(
            data['text'])[0] if not 'lang' in data else data['lang']
        # remove urls using Imme Emosol regex: https://mathiasbynens.be/demo/url-regex
        text = re.sub(r"http\S+", "", data['text'], flags=re.MULTILINE)
        # tokenize the text dependent on the language
        blob = None
        if lang == 'en':
            blob = TextBlobEN(text)
        elif lang == 'de':
            blob = TextBlobDE(text)
        else:  # avoid unknown languages
            raise UnknownLanguageException('Unknown language: ' + data['text'])
        # get the polarity of the tweet sentences and summerize them
        # NOTE: TextBlobDE is not as great as the english analyzer and is fairly barebone.
        #	    If the resulting polarity is inaccurate, one possibility to solve this is to
        #		only process english tweets
        polarity = 0
        polarity_count = 0
        for sentence in blob.sentences:
            # ignore unimportant sentiment, because in most cases failed detection or hashtag parts from tweet
            if sentence.sentiment.polarity != 0.0:
                polarity = sentence.sentiment.polarity
                polarity_count += 1
        if polarity_count > 0:
            polarity /= polarity_count
        # extract _important_ words from the word tokens
        words = []
        is_hashtag = False
        is_tagged_user = False
        for tag in blob.tags:
            word = tag[0]
            kind = tag[1]
            # TODO: special behaviour for hashtag is possibly also necessary for @
            if word[0] == '#':  # special case means next word is a hashtag
                is_hashtag = True
            elif word[0] == '@':
                is_tagged_user = True
            else:
                if is_hashtag:  # previous word was a hashtag, so remerge with # and save
                    words.append("#" + word)
                    is_hashtag = False
                elif is_tagged_user:
                    words.append("@" + word)
                    is_tagged_user = False
                elif any(word == s for s in BLACKLIST):
                    continue
                else:  # just normal word of the tweet
                    # check the word is of an allowed grammatical type
                    if kind[0] in ALLOWED_WORD_TOKENS:
                        words.append(word)
        # find out where the tweet came from by either taking existing coordinates
        # or center of place
        # TODO: check if coordinates exist before using place
        # TODO: verify structure of place coordinates
        coords = []
        if data['geo']:
            coords.append(data['geo']['coordinates'])
        elif data['coordinates']:
            coords.append(data['coordinates']['coordinates'])
        else:
            coords = data['place']['bounding_box']['coordinates'][0]
        loc = [0.0, 0.0]
        for coord in coords:
            loc[0] += coord[0]
            loc[1] += coord[1]
        loc[0] /= len(coords)
        loc[1] /= len(coords)
        # create tweet object
        tweet = {
            "_id": data['_id'],  # use same id
            "user": {
                "name": data['user']['name'],
                "screen_name": data['user']['screen_name'],
                "followers_count": data['user']['followers_count'],
                "friends_count": data['user']['friends_count'],
                "listed_count": data['user']['listed_count'],
                "statuses_count": data['user']['statuses_count'],
                "following": data['user']['following']
            },
            "created_at": created_at,
            "words": words,
            "loc": loc,
            "polarity": polarity,
            "retweet_count": data['retweet_count'],
            "favorite_count": data['favorite_count']
        }
        return tweet
    except UnknownLanguageException as error:  # catch exceptions, usually failed language detection
        logging.warning(repr(error))