示例#1
0
 def test_tweet_delete(self):
     first_tweet = Tweet()
     first_tweet.text = "First tweet"
     db.session.add(first_tweet)
     db.session.commit()
     self.client.delete("/tweets/1")
     self.assertIsNone(db.session.query(Tweet).get(1))
    def test_tweets_show(self):
        first_tweet = Tweet()
        first_tweet.id = 1
        first_tweet.text = "First tweet"
        db.session.add(first_tweet)

        second_tweet = Tweet()
        second_tweet.id = 2
        second_tweet.text = "Second tweet"
        db.session.add(second_tweet)

        db.session.commit()

        response = self.client.get("/tweets")
        response_tweets = response.json
        print(response_tweets)

        self.assertEqual(len(response_tweets), 2)
示例#3
0
 def post(self):
     """
     Create a new tweet
     """
     tweet_to_add = Tweet()
     tweet_to_add.text = api.payload['text']
     db.session.add(tweet_to_add)
     db.session.commit()
     return tweet_to_add, 201
示例#4
0
 def test_tweet_update(self):
     first_tweet = Tweet()
     first_tweet.text = "First tweet"
     db.session.add(first_tweet)
     db.session.commit()
     response = self.client.patch("/tweets/1", json={'text': 'New text'})
     updated_tweet = response.json
     self.assertEqual(response.status_code, 200)
     self.assertEqual(updated_tweet["id"], 1)
     self.assertEqual(updated_tweet["text"], "New text")
示例#5
0
 def test_tweet_show(self):
     first_tweet = Tweet()
     first_tweet.text = "First tweet"
     db.session.add(first_tweet)
     response = self.client.get("/tweets/1")
     response_tweet = response.json
     print(response_tweet)
     self.assertEqual(response_tweet["id"], 1)
     self.assertEqual(response_tweet["text"], "First tweet")
     self.assertIsNotNone(response_tweet["created_at"])
示例#6
0
 def post(self):
     text = api.payload["text"]
     if len(text) > 0:
         tweet = Tweet()
         tweet.text = text
         db.session.add(tweet)
         db.session.commit()
         return tweet, 201
     else:
         return abort(422, "Tweet text can't be empty")
示例#7
0
 def post(self):
     text = api.payload["text"]
     user = api.payload["user"]
     if len(text) > 0:
         tweet = Tweet()
         tweet.text = text
         user = db.session.query(User).filter_by(name=user).first()
         tweet.user_id = user.id
         db.session.add(tweet)
         db.session.commit()
         return tweet, 201
     else:
         return abort(422, "Tweet text can't be empty")
    def save_tweet(self, tweet):
        if len(User.objects.all().filter(id_str=tweet["user"]["id_str"])) == 0:
            user_obj = User()
            user_obj.id_str = tweet["user"]["id_str"]
            user_obj.name = tweet["user"]["name"]
            user_obj.screen_name = tweet["user"]["screen_name"]
            user_obj.location = tweet["user"]["location"]
            user_obj.url = tweet["user"]["url"]
            user_obj.description = tweet["user"]["description"]
            user_obj.verified = tweet["user"]["verified"]
            user_obj.follower = tweet["user"]["followers_count"]
            user_obj.statuses_count = tweet["user"]["statuses_count"]
            user_obj.created_at = self.str_to_date(tweet["user"]["created_at"])
            user_obj.profile_image_url = tweet["user"]["profile_image_url"]
            user_obj.save()

            user = user_obj
        else:
            user = User.objects.all().filter(id_str=tweet["user"]["id_str"])[0]
        
        if len(Tweet.objects.all().filter(id_str=tweet["id_str"])) == 0:
            tweet_obj = Tweet()
            tweet_obj.id_str = tweet["id_str"]
            tweet_obj.created_at = self.str_to_date(tweet["created_at"])
            tweet_obj.tag = self.tag
            tweet_obj.user = user
            tweet_obj.text = tweet["text"]
            tweet_obj.in_reply_to_status_id = tweet["in_reply_to_status_id"]
            tweet_obj.in_reply_to_user_id = tweet["in_reply_to_user_id"]
            tweet_obj.in_reply_to_screen_name = tweet["in_reply_to_screen_name"]
            tweet_obj.geo = tweet["geo"]
            tweet_obj.coordinates = tweet["coordinates"]
            tweet_obj.place = tweet["place"]

            if tweet["is_quote_status"] and "quoted_status" in tweet:
                if  len(Tweet.objects.all().filter(id_str=tweet["quoted_status"]["id_str"])) == 0:
                    if len(User.objects.all().filter(id_str=tweet["quoted_status"]["user"]["id_str"])) == 0:
                        quoted_user_obj = User()
                        quoted_user_obj.id_str = tweet["quoted_status"]["user"]["id_str"]
                        quoted_user_obj.name = tweet["quoted_status"]["user"]["name"]
                        quoted_user_obj.screen_name = tweet["quoted_status"]["user"]["screen_name"]
                        quoted_user_obj.location = tweet["quoted_status"]["user"]["location"]
                        quoted_user_obj.url = tweet["quoted_status"]["user"]["url"]
                        quoted_user_obj.description = tweet["quoted_status"]["user"]["description"]
                        quoted_user_obj.verified = tweet["quoted_status"]["user"]["verified"]
                        quoted_user_obj.follower = tweet["quoted_status"]["user"]["followers_count"]
                        quoted_user_obj.statuses_count = tweet["quoted_status"]["user"]["statuses_count"]
                        quoted_user_obj.created_at = self.str_to_date(tweet["quoted_status"]["user"]["created_at"])
                        quoted_user_obj.profile_image_url = tweet["quoted_status"]["user"]["profile_image_url"]
                        quoted_user_obj.save()
                    else:
                        quoted_user_obj = User.objects.all().filter(id_str=tweet["user"]["id_str"])[0]
                    
                    quoted_obj = Tweet()
                    quoted_obj.id_str = tweet["id_str"]
                    quoted_obj.created_at = self.str_to_date(tweet["quoted_status"]["created_at"])
                    quoted_obj.user = quoted_user_obj
                    quoted_obj.tag = self.tag
                    quoted_obj.text = tweet["quoted_status"]["text"]
                    quoted_obj.in_reply_to_status_id = tweet["quoted_status"]["in_reply_to_status_id"]
                    quoted_obj.in_reply_to_user_id = tweet["quoted_status"]["in_reply_to_user_id"]
                    quoted_obj.in_reply_to_screen_name = tweet["quoted_status"]["in_reply_to_screen_name"]
                    quoted_obj.geo = tweet["quoted_status"]["geo"]
                    quoted_obj.coordinates = tweet["quoted_status"]["coordinates"]
                    quoted_obj.place = tweet["quoted_status"]["place"]

                    if "quote_count" in tweet:
                        quoted_obj.quote_count = tweet["quote_count"]
                    if "reply_count" in tweet:
                        quoted_obj.reply_count = tweet["reply_count"]
                    if "retweet_count" in tweet:
                        quoted_obj.retweet_count = tweet["retweet_count"]
                    if "favorite_count" in tweet:
                        quoted_obj.favorite_count = tweet["favorite_count"]

                    quoted_obj.entities = tweet["quoted_status"]["entities"]
                    quoted_obj.save()
                else:
                    quoted_obj = Tweet.objects.all().filter(id_str=tweet["quoted_status"]["id_str"])[0]
                
                tweet_obj.quoted_status_id = tweet["quoted_status_id_str"]
                tweet_obj.is_quote_status = tweet["is_quote_status"]
                tweet_obj.quoted_status = quoted_obj
                if "quoted_status_permalink" in tweet:
                    tweet_obj.quoted_status_permalink = tweet["quoted_status_permalink"]["url"]
                tweet_obj.quoted_status_id = tweet["quoted_status_id"]
            
            if "quote_count" in tweet:
                tweet_obj.quote_count = tweet["quote_count"]
            if "reply_count" in tweet:
                tweet_obj.reply_count = tweet["reply_count"]
            if "retweet_count" in tweet:
                tweet_obj.retweet_count = tweet["retweet_count"]
            if "favorite_count" in tweet:
                tweet_obj.favorite_count = tweet["favorite_count"]
            tweet_obj.entities = tweet["entities"]
            tweet_obj.save()

            return tweet_obj
示例#9
0
def result():
    selectedChoices = ChoiceObj('attractions', session.get('selected'))
    form_splace = SearchPlaceForm()
    form_stweets = SearchTweetsForm(obj=selectedChoices)

    if form_stweets.validate_on_submit():
        session['selected'] = form_stweets.multi_attractions.data
        place_name = form_stweets.place.data
        latitude = form_stweets.latitude.data
        longitude = form_stweets.longitude.data
        attractions = session.get('selected')
        range_dist = form_stweets.range_dist.data
        days_before = form_stweets.days_before.data

        # CRAWLING
        twitter_crawler = TwitterCrawler(current_app)
        df_attractions = twitter_crawler.fetch_tweets_from_attractions(
            attractions, int(days_before), float(latitude), float(longitude),
            int(range_dist), place_name)

        # if data from crawling less than 20, return notification
        if len(df_attractions) < 20:
            return render_template('notification.html')

        # insert into crawler table
        crawler = Crawler()
        crawler.timestamp = datetime.now(pytz.timezone('Asia/Jakarta'))
        db.session.add(crawler)
        db.session.commit()

        # insert into attractions table
        attractions_lower = [x.lower() for x in attractions]
        att = Attractions()
        att.attractions = ','.join(attractions_lower)
        att.crawler_id = crawler.id
        db.session.add(att)
        db.session.commit()

        # insert into tweet table
        for _, row in df_attractions.iterrows():
            tweet = Tweet()
            tweet.user_id = row['user_id']
            tweet.username = row['username']
            tweet.created = row['created_at']
            tweet.text = row['text']
            tweet.latitude = row['latitude']
            tweet.longitude = row['longitude']
            tweet.crawler_id = crawler.id
            db.session.add(tweet)
            db.session.commit()

        # PREPROCESSING
        tweets = Tweet.query.filter_by(crawler_id=crawler.id)
        attractions = Attractions.query.filter_by(crawler_id=crawler.id)

        # change attractions into list
        list_attractions = []
        for a in attractions:
            list_attractions.append(a.attractions)

        list_attractions = ''.join(list_attractions).split(',')

        # separate text into list
        list_tweets = []
        for t in tweets:
            id_tweet = [t.id, t.text]
            list_tweets.append(id_tweet)

        # define
        normalizer = Normalize()
        tokenizer = Tokenize()
        symspell = SymSpell(max_dictionary_edit_distance=3)
        SITE_ROOT = os.path.abspath(os.path.dirname(__file__))
        json_url = os.path.join(SITE_ROOT, "..\data",
                                "corpus_complete_model.json")
        symspell.load_complete_model_from_json(json_url, encoding="ISO-8859-1")

        # do preprocess
        result = []
        for tweet in list_tweets:
            id, text = tweet[0], tweet[1]

            # normalize
            tweet_norm = normalizer.remove_ascii_unicode(text)
            tweet_norm = normalizer.remove_rt_fav(tweet_norm)
            tweet_norm = normalizer.lower_text(tweet_norm)
            tweet_norm = normalizer.remove_newline(tweet_norm)
            tweet_norm = normalizer.remove_url(tweet_norm)
            tweet_norm = normalizer.remove_emoticon(tweet_norm)
            tweet_norm = normalizer.remove_hashtag_mention(tweet_norm)
            tweet_norm = normalizer.remove_punctuation(tweet_norm)

            # tokenize
            tweet_tok = tokenizer.WordTokenize(tweet_norm, removepunct=True)

            # spell correction
            temp = []
            for token in tweet_tok:
                suggestion = symspell.lookup(phrase=token,
                                             verbosity=1,
                                             max_edit_distance=3)

                # option if there is no suggestion
                if len(suggestion) > 0:
                    get_suggestion = str(suggestion[0]).split(':')[0]
                    temp.append(get_suggestion)
                else:
                    temp.append(token)
            tweet_prepared = ' '.join(temp)

            # join attraction with strip
            tweet_prepared = normalizer.join_attraction(
                tweet_prepared, list_attractions)

            id_tweet_prepared = [id, tweet_prepared]
            result.append(id_tweet_prepared)

        # insert into table preprocess
        for res in result:
            id, text = res[0], res[1]

            tb_preprocess = Preprocess()
            tb_preprocess.text = text
            tb_preprocess.tweet_id = id
            tb_preprocess.crawler_id = crawler.id
            db.session.add(tb_preprocess)
            db.session.commit()

        # POS TAGGING
        tweets_preprocessed = Preprocess.query.filter_by(crawler_id=crawler.id)

        # get text from table Preprocess
        list_tweets = []
        for t in tweets_preprocessed:
            tid_tweet = [t.tweet_id, t.text]
            list_tweets.append(tid_tweet)

        # path
        SITE_ROOT = os.path.abspath(os.path.dirname(__file__))
        lexicon_url = os.path.join(SITE_ROOT, "..\data", "Lexicon.trn")
        ngram_url = os.path.join(SITE_ROOT, "..\data", "Ngram.trn")

        # initialize
        tagger = MainTagger(lexicon_url, ngram_url, 0, 3, 3, 0, 0, False, 0.2,
                            0, 500.0, 1)
        tokenize = Tokenization()

        # do pos tagging
        result = []
        for tweet in list_tweets:
            tweet_id, text = tweet[0], tweet[1]

            if len(text) == 0:
                tid_text = [tweet_id, text]
                result.append(tid_text)
            else:
                if len(text.split(' ')) == 1:
                    text = text + ' ini'
                out = tokenize.sentence_extraction(tokenize.cleaning(text))
                join_word = []
                for o in out:
                    strtag = " ".join(tokenize.tokenisasi_kalimat(o)).strip()
                    join_word += [" ".join(tagger.taggingStr(strtag))]
                tid_text = [tweet_id, join_word]
                result.append(tid_text)

        # insert into table preprocess
        for tweet in result:
            tweet_id, text = tweet[0], tweet[1]
            tweet_str = ''.join(text)

            tb_postag = PosTag()
            tb_postag.text = tweet_str
            tb_postag.tweet_id = tweet_id
            tb_postag.crawler_id = crawler.id
            db.session.add(tb_postag)
            db.session.commit()

        # PENENTUAN KELAS
        Ccon = ['JJ', 'NN', 'NNP', 'NNG', 'VBI', 'VBT']
        Cfunc = [
            'OP', 'CP', 'GM', ';', ':', '"', '.', ',', '-', '...', 'RB', 'IN',
            'MD', 'CC', 'SC', 'DT', 'UH', 'CDO', 'CDC', 'CDP', 'CDI', 'PRP',
            'WP', 'PRN', 'PRL', 'NEG', 'SYM', 'RP', 'FW'
        ]
        tweets_tagged = PosTag.query.filter_by(crawler_id=crawler.id)

        # get text from table PostTag
        list_tweets = []
        for t in tweets_tagged:
            tid_tweet = [t.tweet_id, t.text]
            list_tweets.append(tid_tweet)

        # do penentuan kelas
        result = []
        for tweet in list_tweets:
            tweet_id, text = tweet[0], tweet[1]

            if len(text) > 0:
                text_split = text.split(' ')

                doc_complete = {"con": [], "func": []}
                con = []
                func = []

                for word in text_split:
                    w = word.split('/', 1)[0]
                    tag = word.split('/', 1)[1]
                    if tag in Ccon:
                        con.append(word)
                    elif tag in Cfunc:
                        func.append(word)
                doc_complete["con"].append(' '.join(con))
                doc_complete["func"].append(' '.join(func))
            else:
                doc_complete["con"].append(text)
                doc_complete["func"].append(text)

            result.append([tweet_id, doc_complete])

        # insert into table penentuan kelas
        for tweet in result:
            tweet_id, text = tweet[0], tweet[1]
            content, function = ''.join(text["con"]), ''.join(text["func"])

            tb_penentuan_kelas = PenentuanKelas()
            tb_penentuan_kelas.content = content
            tb_penentuan_kelas.function = function
            tb_penentuan_kelas.tweet_id = tweet_id
            tb_penentuan_kelas.crawler_id = crawler.id
            db.session.add(tb_penentuan_kelas)
            db.session.commit()

        # LDA
        tweets_penentuan_kelas = PenentuanKelas.query.filter_by(
            crawler_id=crawler.id)

        # get tweets content
        tweets_content_tagged = []
        for tweet in tweets_penentuan_kelas:
            tweets_content_tagged.append(tweet.content)

        # separate word and tag
        documents = []
        for tweet in tweets_content_tagged:
            tweet_split = tweet.split(' ')
            temp = []
            for word in tweet_split:
                w = word.split("/", 1)[0]
                temp.append(w)
            documents.append(temp)

        # do process lda
        lda = LdaModel(documents, int(4), float(0.001), float(0.001),
                       int(1000))
        result = lda.get_topic_word_pwz(tweets_content_tagged)

        # insert into table ldapwz
        for r in result:
            topic, word, pwz = r[0], r[1], r[2]

            tb_ldapwz = LdaPWZ()
            tb_ldapwz.topic = topic
            tb_ldapwz.word = word
            tb_ldapwz.pwz = pwz
            tb_ldapwz.crawler_id = crawler.id
            db.session.add(tb_ldapwz)
            db.session.commit()

        # GRAMMAR STORY
        ldapwz = LdaPWZ.query.filter_by(crawler_id=crawler.id)

        # get topic with words in dictionary
        dict_ldapwz = defaultdict(list)
        for data in ldapwz:
            dict_ldapwz[data.topic].append([data.word, data.pwz])

        # initialize
        cfg_informasi = CFG_Informasi()
        cfg_cerita = CFG_Cerita()

        # create sentence for story
        dict_story_informasi = cfg_informasi.create_sentences_from_data(
            dict(dict_ldapwz))
        dict_story_cerita = cfg_cerita.create_sentences_from_data(
            dict(dict_ldapwz))

        # join into dict_story
        dict_story = defaultdict(list)
        for d in (dict_story_informasi, dict_story_cerita):
            for key, value in d.items():
                dict_story[key].append('. '.join(i.capitalize()
                                                 for i in value))

        # insert into table GrammarStory
        for topic, stories in dict_story.items():
            # insert informasi
            tb_grammar_story = GrammarStory()
            tb_grammar_story.topic = topic
            tb_grammar_story.rules = 'informasi'
            tb_grammar_story.story = stories[0]
            tb_grammar_story.crawler_id = crawler.id
            db.session.add(tb_grammar_story)
            db.session.commit()

            # insert cerita
            tb_grammar_story = GrammarStory()
            tb_grammar_story.topic = topic
            tb_grammar_story.rules = 'cerita'
            tb_grammar_story.story = stories[1]
            tb_grammar_story.crawler_id = crawler.id
            db.session.add(tb_grammar_story)
            db.session.commit()

    c = Crawler.query.order_by(Crawler.id.desc()).all()

    return render_template("stories.html",
                           crawler=c,
                           form_stweets=form_stweets)
示例#10
0
 def test_instance_variables(self):
     # Create an instance of the `Tweet` class with one argument
     tweet = Tweet()
     tweet.text = "my first tweet"
     # Check that `text` holds the content of the tweet
     self.assertEqual(tweet.text, "my first tweet")