示例#1
0
def load_data(filepath):
    captions = []
    tags = []
    zipped = ()

    cucco = Cucco()

    with open(filepath, 'r+') as file:
        doc = file.read()
    doc = json.loads(doc)
    for obj in doc:
        for post in doc[obj]:
            hashtags = doc[obj][post]['tags']
            if len(hashtags) > 0:
                capt = [
                    cucco.replace_emojis(
                        str(doc[obj][post]['caption']).lower(), '')
                ]
                tags += hashtags
                cap = capt * len(hashtags)
                captions += cap
    return captions, tags
示例#2
0
def get_tasks(task_id):
    abc = []

    graph = facebook.GraphAPI(access_token=token, version=3.1)
    node = "/%s" % task_id

    video = graph.request(
        node + "/comments?fields=id,message,comment_count,"
        "reactions.type(LIKE).limit(0).summary(total_count).as(like),"
        "reactions.type(LOVE).limit(0).summary(total_count).as(love),"
        "reactions.type(WOW).limit(0).summary(total_count).as(wow),"
        "reactions.type(HAHA).limit(0).summary(total_count).as(haha),"
        "reactions.type(SAD).limit(0).summary(total_count).as(sad),"
        "reactions.type(ANGRY).limit(0).summary(total_count).as(angry)")
    # video = graph.request(node + '?fields='
    #                            'reactions.type(LIKE).limit(0).summary(total_count).as(like),'
    #                           'reactions.type(LOVE).limit(0).summary(total_count).as(love),'
    #                             'reactions.type(WOW).limit(0).summary(total_count).as(wow),'
    #                             'reactions.type(HAHA).limit(0).summary(total_count).as(haha),'
    #                             'reactions.type(SAD).limit(0).summary(total_count).as(sad),'
    #                             'reactions.type(ANGRY).limit(0).summary(total_count).as(angry)')

    # Wrap this block in a while loop so we can keep paginating requests until
    # finished.

    # Baca dataset
    joy_feel = read_dataset(get_full_path("dataset/cf/pp/filter/joy.txt"),
                            "joy")
    disgust_feel = read_dataset(
        get_full_path("dataset/cf/pp/filter/disgust.txt"), "disgust")
    sadness_feel = read_dataset(
        get_full_path("dataset/cf/pp/filter/sadness.txt"), "sadness")
    anger_feel = read_dataset(get_full_path("dataset/cf/pp/filter/anger.txt"),
                              "anger")
    fear_feel = read_dataset(get_full_path("dataset/cf/pp/filter/fear.txt"),
                             "fear")
    surprise_feel = read_dataset(
        get_full_path("dataset/cf/pp/filter/surpriseExtra.txt"), "surprise")

    # filter away words that are less than 3 letters to form the training data
    dataku = []
    for (words, sentiment) in (joy_feel + disgust_feel + sadness_feel +
                               anger_feel + fear_feel + surprise_feel):
        dataku.append((words.rstrip(), sentiment))

    lines = []
    labels = []
    for words, sentiment in dataku:
        html_parser = HTMLParser()

        lines.append(html_parser.unescape(words))
        labels.append(sentiment)

    headlines, labels = lines, labels

    pipeline = Pipeline([
        (
            "count_vectorizer",
            CountVectorizer(
                ngram_range=(2, 3),
                min_df=1,
                max_df=0.8,
                stop_words=frozenset([
                    "saya",
                    "sedang",
                    "lagi",
                    "adalah",
                    "di",
                    "dari",
                    "karena",
                    "dan",
                    "dengan",
                    "ke",
                    "yang",
                    "untuk",
                    "itu",
                    "orang",
                ]),
            ),
        ),
        ("tfidf_transformer", TfidfTransformer()),
        ("classifier", MultinomialNB()),
    ])
    pipeline.fit(headlines, labels)
    angerx = 0
    joyx = 0
    surprisex = 0
    sadnessx = 0
    fearx = 0
    disgustx = 0
    while True:
        try:
            # print("Get post comments data :")
            for each_video in video["data"]:
                if each_video["message"] != "":
                    # connect to database
                    init_tag()
                    html_parser = HTMLParser()
                    spell_check = jalanSpellCheck()
                    koreksi_slang = slangWordCorrect()
                    cucco = Cucco()

                    kata = cucco.replace_emojis(each_video["message"])

                    # Escape HTML
                    kata = html_parser.unescape(each_video["message"])
                    kata = " ".join(kata.split())

                    # Hapus emoji
                    kata = cucco.replace_emojis(kata)

                    normalizations = ["remove_extra_white_spaces"]

                    # Hapus extra spasi
                    kata = cucco.normalize(kata, normalizations)

                    kata = kata.replace("/", " ")

                    # Conver ke lowercase
                    kata = kata.lower()

                    # Hapus repeating character yang lebih dari 2
                    kata = re.sub(r"(.)\1+", r"\1\1", kata)

                    # Proses ,. yang sisa jadi 2
                    kata = kata.replace("..", ".")
                    kata = kata.replace(",,", ",")
                    kata = kata.replace("!!", "!")
                    kata = kata.replace("??", "?")

                    # Tambahkan spasi habis titik
                    rx = r"\.(?=\S)"
                    kata = re.sub(rx, ". ", kata)

                    # Slang correction
                    kata = koreksi_slang.jalan(kata)

                    # Spellcheck error
                    # tampung_kata_1 = []
                    # tampung_1 = kata.split()
                    # for word in tampung_1:
                    #    tampung_kata_1.append(spell_check.correctSpelling(word))
                    # kata = " ".join(tampung_kata_1)
                    asdqwe = kata

                    # Check apakah ada tanda baca di akhir
                    if (re.match(".*[^.?!]$", kata) is not None) == True:
                        kata = kata + " ."

                    resultx = do_tag(kata)
                    kata = " ".join(resultx)

                    # print(words)
                    # xxx = "".join([" " + i for i in words]).strip()

                    # kata = xxx

                    if kata != "":
                        linesz = []
                        linesz.append(kata)
                        words = []
                        for y in linesz:
                            lines = y.split()
                            for x in lines:
                                word = x.split("/")
                                chars_to_remove = set((
                                    ",",
                                    "IN",
                                    "CC",
                                    "SC",
                                    "CDO",
                                    "CDC",
                                    "CDP",
                                    "CDI",
                                    "DT",
                                    "MD",
                                    "OP",
                                    "CP",
                                    "SYM",
                                    ".",
                                ))
                                if word[1] not in chars_to_remove:
                                    words.append(word[0] + "_" + word[1])
                            resultx = "".join([" " + i for i in words]).strip()
                            # print(resultx)

                        cobaa = []
                        cobaa.append(resultx)
                        for x in pipeline.predict(cobaa):
                            hasilx = x
                        if hasilx == "anger":
                            angerx = angerx + 1
                        elif hasilx == "joy":
                            joyx = joyx + 1
                        elif hasilx == "sadness":
                            sadnessx = sadnessx + 1
                        elif hasilx == "fear":
                            fearx = fearx + 1
                        elif hasilx == "disgust":
                            disgustx = disgustx + 1
                        elif hasilx == "surprise":
                            surprisex = surprisex + 1

                        comments_data = {
                            "id":
                            each_video["id"],
                            "komen":
                            each_video["message"],
                            "asdqwe":
                            asdqwe,
                            "komen_edit":
                            resultx,
                            "prediksi":
                            hasilx,
                            "like_count":
                            each_video["like"]["summary"]["total_count"],
                            "love_count":
                            each_video["love"]["summary"]["total_count"],
                            "wow_count":
                            each_video["wow"]["summary"]["total_count"],
                            "haha_count":
                            each_video["haha"]["summary"]["total_count"],
                            "sad_count":
                            each_video["sad"]["summary"]["total_count"],
                            "angry_count":
                            each_video["angry"]["summary"]["total_count"],
                        }

                    abc.append(comments_data)
            # Attempt to make a request to the next page of data, if it exists.
            video = requests.get(video["paging"]["next"]).json()
        except KeyError:
            # When there are no more pages (['paging']['next']), break from the
            # loop and end the script.
            break

    ctrku = {
        "anger": angerx,
        "joy": joyx,
        "sadness": sadnessx,
        "fear": fearx,
        "surprise": surprisex,
        "disgust": disgustx,
    }

    # comments_data = {
    #    'id' : video['comment_count'],
    #    'video_like' : video['like']['summary']['total_count'],
    #    'video_love': video['love']['summary']['total_count'],
    #    'video_wow': video['wow']['summary']['total_count'],
    #    'video_haha': video['haha']['summary']['total_count'],
    #    'video_sad': video['sad']['summary']['total_count'],
    #    'video_angry': video['angry']['summary']['total_count']
    #    }
    # abc.append(comments_data)

    return jsonify({"tasks": abc}, {"ASD": ctrku})
示例#3
0
def test(task_id):
    video = []
    conn = create_connection("datafacebook/Kompas/" + str(task_id) + ".db")
    cursor = conn.execute(
        "SELECT comment_id, comment_content, like_count, love_count, wow_count, haha_count, sad_count, angry_count from Comments"
    )
    for row in cursor:
        video.append({
            "id": row[0],
            "message": row[1],
            "like": row[2],
            "love": row[3],
            "wow": row[4],
            "haha": row[5],
            "sad": row[6],
            "angry": row[7],
        })
    conn.close()

    abc = []

    joy_feel = read_dataset(get_full_path("dataset/cf/pp/filter/joy.txt"),
                            "joy")
    disgust_feel = read_dataset(
        get_full_path("dataset/cf/pp/filter/disgust.txt"), "disgust")
    sadness_feel = read_dataset(
        get_full_path("dataset/cf/pp/filter/sadness.txt"), "sadness")
    anger_feel = read_dataset(get_full_path("dataset/cf/pp/filter/anger.txt"),
                              "anger")
    fear_feel = read_dataset(get_full_path("dataset/cf/pp/filter/fear.txt"),
                             "fear")
    surprise_feel = read_dataset(
        get_full_path("dataset/cf/pp/filter/surpriseExtra.txt"), "surprise")

    dataku = []
    for (words, sentiment) in (joy_feel + disgust_feel + sadness_feel +
                               anger_feel + fear_feel + surprise_feel):
        dataku.append((words.rstrip(), sentiment))

    lines = []
    labels = []
    for words, sentiment in dataku:
        html_parser = HTMLParser()

        lines.append(html_parser.unescape(words))
        labels.append(sentiment)

    headlines, labels = lines, labels

    pipeline = Pipeline([
        (
            "count_vectorizer",
            CountVectorizer(
                ngram_range=(2, 3),
                min_df=1,
                max_df=0.8,
                stop_words=frozenset([
                    "saya",
                    "sedang",
                    "lagi",
                    "adalah",
                    "di",
                    "dari",
                    "karena",
                    "dan",
                    "dengan",
                    "ke",
                    "yang",
                    "untuk",
                    "itu",
                    "orang",
                ]),
            ),
        ),
        ("tfidf_transformer", TfidfTransformer()),
        ("classifier", MultinomialNB()),
    ])
    pipeline.fit(headlines, labels)
    angerx = 0
    joyx = 0
    surprisex = 0
    sadnessx = 0
    fearx = 0
    disgustx = 0

    for each_video in video:
        if each_video["message"] != "":
            # connect to database
            init_tag()
            html_parser = HTMLParser()
            spell_check = jalanSpellCheck()
            koreksi_slang = slangWordCorrect()
            cucco = Cucco()

            kata = cucco.replace_emojis(each_video["message"])

            # Escape HTML
            kata = html_parser.unescape(each_video["message"])
            kata = " ".join(kata.split())

            # Hapus emoji
            kata = cucco.replace_emojis(kata)

            normalizations = ["remove_extra_white_spaces"]

            # Hapus extra spasi
            kata = cucco.normalize(kata, normalizations)

            kata = kata.replace("/", " ")

            # Conver ke lowercase
            kata = kata.lower()

            # Hapus repeating character yang lebih dari 2
            kata = re.sub(r"(.)\1+", r"\1\1", kata)

            # Proses ,. yang sisa jadi 2
            kata = kata.replace("..", ".")
            kata = kata.replace(",,", ",")
            kata = kata.replace("!!", "!")
            kata = kata.replace("??", "?")

            # Tambahkan spasi habis titik
            rx = r"\.(?=\S)"
            kata = re.sub(rx, ". ", kata)

            # Slang correction
            kata = koreksi_slang.jalan(kata)

            # Spellcheck error
            # tampung_kata_1 = []
            # tampung_1 = kata.split()
            # for word in tampung_1:
            #    tampung_kata_1.append(spell_check.correctSpelling(word))
            # kata = " ".join(tampung_kata_1)
            asdqwe = kata

            # Check apakah ada tanda baca di akhir
            if (re.match(".*[^.?!]$", kata) is not None) == True:
                kata = kata + " ."

            resultx = do_tag(kata)
            kata = " ".join(resultx)

            if kata != "":
                linesz = []
                linesz.append(kata)
                words = []

                for y in linesz:
                    lines = y.split()
                    for x in lines:
                        word = x.split("/")
                        chars_to_remove = set((
                            ",",
                            "IN",
                            "CC",
                            "SC",
                            "CDO",
                            "CDC",
                            "CDP",
                            "CDI",
                            "DT",
                            "MD",
                            "OP",
                            "CP",
                            "SYM",
                            ".",
                        ))
                        if word[1] not in chars_to_remove:
                            words.append(word[0] + "_" + word[1])
                    resultx = "".join([" " + i for i in words]).strip()

                cobaa = []
                cobaa.append(resultx)

                for x in pipeline.predict(cobaa):
                    hasilx = x
                if hasilx == "anger":
                    angerx = angerx + 1
                elif hasilx == "joy":
                    joyx = joyx + 1
                elif hasilx == "sadness":
                    sadnessx = sadnessx + 1
                elif hasilx == "fear":
                    fearx = fearx + 1
                elif hasilx == "disgust":
                    disgustx = disgustx + 1
                elif hasilx == "surprise":
                    surprisex = surprisex + 1

                comments_data = {
                    "id": each_video["id"],
                    "komen": each_video["message"],
                    "asdqwe": asdqwe,
                    "komen_edit": resultx,
                    "prediksi": hasilx,
                    "like_count": each_video["like"],
                    "love_count": each_video["love"],
                    "wow_count": each_video["wow"],
                    "haha_count": each_video["haha"],
                    "sad_count": each_video["sad"],
                    "angry_count": each_video["angry"],
                }

            abc.append(comments_data)

    ctrku = {
        "anger": angerx,
        "joy": joyx,
        "sadness": sadnessx,
        "fear": fearx,
        "surprise": surprisex,
        "disgust": disgustx,
    }

    return jsonify({"tasks": abc}, {"ASD": ctrku})
示例#4
0
class TestCucco(object):

    _cucco = None

    @staticmethod
    def _tests_generator(test):
        for test in TESTS_DATA['tests'][test[5:]]:
            yield (test['after'],
                   test['before'],
                   test['characters'] if 'characters' in test else '',
                   test['kwargs'] if 'kwargs' in test else dict(),
                   test['message'])

    def setup_method(self):
        self._cucco = Cucco()

    def test_normalize(self, request):
        for after, before, _, kwargs, message in self._tests_generator(request.node.name):
            assert self._cucco.normalize(before, **kwargs) == after, message

    def test_remove_accent_marks(self, request):
        for after, before, _, _, message in self._tests_generator(request.node.name):
            assert self._cucco.remove_accent_marks(before) == after, message

    def test_remove_stop_words(self, request):
        for after, before, _, kwargs, message in self._tests_generator(request.node.name):
            assert self._cucco.remove_stop_words(before, **kwargs) == after, message

        # Force language
        self._cucco = Cucco()
        for after, before, _, kwargs, message in self._tests_generator(request.node.name):
            kwargs['language'] = 'en'
            assert self._cucco.remove_stop_words(before, **kwargs) == after, message

        # Force invalid language
        self._cucco = Cucco()
        for after, before, _, kwargs, message in self._tests_generator(request.node.name):
            kwargs['language'] = 'invalid'
            assert self._cucco.remove_stop_words(before, **kwargs) == before, message

        # Test lazy load
        self._cucco = Cucco(lazy_load=True)
        for after, before, _, kwargs, message in self._tests_generator(request.node.name):
            kwargs['language'] = 'en'
            assert self._cucco.remove_stop_words(before, **kwargs) == after, message

    def test_replace_characters(self, request):
        for after, before, characters, kwargs, message in self._tests_generator(request.node.name):
            assert self._cucco.replace_characters(text=before, characters=characters, **kwargs) == after, message

    def test_replace_emails(self, request):
        for after, before, _, kwargs, message in self._tests_generator(request.node.name):
            assert self._cucco.replace_emails(text=before, **kwargs) == after, message

    def test_replace_emojis(self, request):
        for after, before, _, kwargs, message in self._tests_generator(request.node.name):
            assert self._cucco.replace_emojis(text=before, **kwargs) == after, message

    def test_remove_extra_white_spaces(self, request):
        for after, before, _, _, message in self._tests_generator(request.node.name):
            assert self._cucco.remove_extra_white_spaces(before) == after, message

    def test_replace_hyphens(self, request):
        for after, before, _,  kwargs, message in self._tests_generator(request.node.name):
            assert self._cucco.replace_hyphens(text=before, **kwargs) == after, message

    def test_replace_punctuation(self, request):
        for after, before, _, kwargs, message in self._tests_generator(request.node.name):
            assert self._cucco.replace_punctuation(text=before, **kwargs) == after, message

    def test_replace_symbols(self, request):
        for after, before, _, kwargs, message in self._tests_generator(request.node.name):
            assert self._cucco.replace_symbols(text=before, **kwargs) == after, message

    def test_replace_urls(self, request):
        for after, before, _, kwargs, message in self._tests_generator(request.node.name):
            assert self._cucco.replace_urls(text=before, **kwargs) == after, message
示例#5
0
sentence = "This is a phone number 672-123-456-9910"
pattern = r".*(phone).*?([\d-]+)"
match = re.match(pattern, sentence)
match.groups()
match.group()
match.group(0)
match.group(1)
match.group(2)
match.group(1,2)

write a python program for searching and replacing flags.
## THis library checks for Emojis and replaces it with the regular expressions
from cucco import Cucco
cucco = Cucco()
a=cucco.replace_emojis(':) :)) :( FSDFSDDFSDfv')
print(a)
    
write the syntax and a simple program for regular expression pattern in python.

import re
text = 'You can try to find an ant in this string'
pattern = 'an?\w'
for match in re.finditer(pattern, text):
    sStart = match.start()
    sEnd = match.end()
    sGroup = match.group()
    print('Match "{}" found at: [{},{}]'.format(sGroup, sStart,sEnd))


def remove_emoji(text):
    cucco = Cucco()
    return cucco.replace_emojis(text)
示例#7
0
class TestCucco(object):

    _cucco = None

    @staticmethod
    def _tests_generator(test):
        for test in TESTS_DATA['tests'][test[5:]]:
            yield (test['after'], test['before'],
                   test['characters'] if 'characters' in test else '',
                   test['kwargs'] if 'kwargs' in test else dict(),
                   test['message'])

    def setup_method(self):
        self._cucco = Cucco()

    def test_normalize(self, request):
        for after, before, _, kwargs, message in self._tests_generator(
                request.node.name):
            assert self._cucco.normalize(before, **kwargs) == after, message

    def test_remove_accent_marks(self, request):
        for after, before, _, _, message in self._tests_generator(
                request.node.name):
            assert self._cucco.remove_accent_marks(before) == after, message

    def test_remove_stop_words(self, request):
        for after, before, _, kwargs, message in self._tests_generator(
                request.node.name):
            assert self._cucco.remove_stop_words(before,
                                                 **kwargs) == after, message

        # Force language
        self._cucco = Cucco()
        for after, before, _, kwargs, message in self._tests_generator(
                request.node.name):
            kwargs['language'] = 'en'
            assert self._cucco.remove_stop_words(before,
                                                 **kwargs) == after, message

        # Force invalid language
        self._cucco = Cucco()
        for after, before, _, kwargs, message in self._tests_generator(
                request.node.name):
            kwargs['language'] = 'invalid'
            assert self._cucco.remove_stop_words(before,
                                                 **kwargs) == before, message

        # Test lazy load
        self._cucco = Cucco(lazy_load=True)
        for after, before, _, kwargs, message in self._tests_generator(
                request.node.name):
            kwargs['language'] = 'en'
            assert self._cucco.remove_stop_words(before,
                                                 **kwargs) == after, message

    def test_replace_characters(self, request):
        for after, before, characters, kwargs, message in self._tests_generator(
                request.node.name):
            assert self._cucco.replace_characters(text=before,
                                                  characters=characters,
                                                  **kwargs) == after, message

    def test_replace_emails(self, request):
        for after, before, _, kwargs, message in self._tests_generator(
                request.node.name):
            assert self._cucco.replace_emails(text=before,
                                              **kwargs) == after, message

    def test_replace_emojis(self, request):
        for after, before, _, kwargs, message in self._tests_generator(
                request.node.name):
            assert self._cucco.replace_emojis(text=before,
                                              **kwargs) == after, message

    def test_remove_extra_whitespaces(self, request):
        for after, before, _, _, message in self._tests_generator(
                request.node.name):
            assert self._cucco.remove_extra_whitespaces(
                before) == after, message

    def test_replace_hyphens(self, request):
        for after, before, _, kwargs, message in self._tests_generator(
                request.node.name):
            assert self._cucco.replace_hyphens(text=before,
                                               **kwargs) == after, message

    def test_replace_punctuation(self, request):
        for after, before, _, kwargs, message in self._tests_generator(
                request.node.name):
            assert self._cucco.replace_punctuation(text=before,
                                                   **kwargs) == after, message

    def test_replace_symbols(self, request):
        for after, before, _, kwargs, message in self._tests_generator(
                request.node.name):
            assert self._cucco.replace_symbols(text=before,
                                               **kwargs) == after, message

    def test_replace_urls(self, request):
        for after, before, _, kwargs, message in self._tests_generator(
                request.node.name):
            assert self._cucco.replace_urls(text=before,
                                            **kwargs) == after, message
示例#8
0
def searchTweets(query):
    db = firestore.client()
    maxCount = 100
    max_id = -1
    count = 0

    obj = {
    query : {
        "regioes": {
        "Norte": {
            "tristeza": 0,
            "alegria": 0,
            "amor": 0,
            "raiva": 0
        },
        "Nordeste": {
            "tristeza": 0,
            "alegria": 0,
            "amor": 0,
            "raiva": 0
        },
        "Centro-Oeste": {
            "tristeza": 0,
            "alegria": 0,
            "amor": 0,
            "raiva": 0
        },
        "Sul": {
            "tristeza": 0,
            "alegria": 0,
            "amor": 0,
            "raiva": 0
        },
        "Sudeste": {
            "tristeza": 0,
            "alegria": 0,
            "amor": 0,
            "raiva": 0
        }
        }
    }
    }

    other_obj = {
    "regioes": {
        "Norte": {
        "tristeza": 0,
        "alegria": 0,
        "amor": 0,
        "raiva": 0,
        "count": 0
        },
        "Nordeste": {
        "tristeza": 0,
        "alegria": 0,
        "amor": 0,
        "raiva": 0,
        "count": 0
        },
        "Centro-Oeste": {
        "tristeza": 0,
        "alegria": 0,
        "amor": 0,
        "raiva": 0,
        "count": 0
        },
        "Sul": {
        "tristeza": 0,
        "alegria": 0,
        "amor": 0,
        "raiva": 0,
        "count": 0
        },
        "Sudeste": {
        "tristeza": 0,
        "alegria": 0,
        "amor": 0,
        "raiva": 0,
        "count": 0
        }
    }
    }

    users_ref = db.collection(query)
    docs = users_ref.stream()

    jsonT = ""
    for doc in docs:
        jsonT = doc.to_dict()["porcentagem"]
    if jsonT == "":
        while count < maxCount:
            if max_id <= 0:
                searched_tweets = api.search(q=query+" -filter:retweets", lang="pt-br", tweet_mode='extended', count=maxCount*5)
            else:
                searched_tweets = api.search(q=query+" -filter:retweets", lang="pt-br", tweet_mode='extended', count=maxCount*5, max_id=str(max_id - 1))
            if not searched_tweets:
                print("tem nada aq mona") 
                break
            else:
                for tweet in searched_tweets:
                    if (tweet.place is not None) and (count < maxCount):
                        text = json.dumps(tweet._json['full_text'], sort_keys=True, indent=4, ensure_ascii=False).encode('utf8').decode()
                        finalText = text.split(" ")
                        text = ""
                        for aux in finalText:
                            if not '@' in aux and not 'https://' in aux:
                                text += aux + " "

                        count += 1
                        text = Cucco.replace_emojis(text)
                        text = text.replace('"', '')
                        municipio = (json.dumps(tweet._json['place']['full_name'], sort_keys=True, indent=4, ensure_ascii=False).encode('utf8')).decode().split(",")[0].replace('"',"")
                        
                        try:
                            if municipio == 'Sao Paulo':
                                municipio = 'São Paulo'
                            regiao = regioes.getRegion(ufbr.get_cidade(municipio).codigo)
                            em = classify(text)
                            other_obj["regioes"][regiao][em] +=1
                            other_obj["regioes"][regiao]["count"] +=1
                            pass
                        except Exception as identifier:
                            count -= 1
                            pass

            max_id = searched_tweets[-1].id

        arr_reg = ["Norte", "Nordeste", "Centro-Oeste", "Sul", "Sudeste"]
        arr_emo = ["tristeza", "alegria", "amor", "raiva"]
        for i in arr_reg:
            for j in arr_emo:
                total = other_obj["regioes"][i]["count"]
                if total == 0:
                    obj[query]["regioes"][i][j] = 0
                else :
                    obj[query]["regioes"][i][j] = round((other_obj["regioes"][i][j] / total) * 100, 2)

        db.collection(query).add({ "tweets_classificados": json.dumps(other_obj), "porcentagem" : json.dumps(obj) })
        objs = [obj, other_obj]
        return objs

    else:
        users_ref = db.collection(query)
        docs = users_ref.stream()

        jsonP = ""
        for doc in docs:
            jsonP = doc.to_dict()["porcentagem"]
            jsonT = doc.to_dict()["tweets_classificados"]

            arr = [json.loads(jsonP), json.loads(jsonT)]

        return arr