def test_tweet_delete(self): first_tweet = Tweet() first_tweet.text = "First tweet" db.session.add(first_tweet) db.session.commit() self.client.delete("/tweets/1") self.assertIsNone(db.session.query(Tweet).get(1))
def test_tweets_show(self): first_tweet = Tweet() first_tweet.id = 1 first_tweet.text = "First tweet" db.session.add(first_tweet) second_tweet = Tweet() second_tweet.id = 2 second_tweet.text = "Second tweet" db.session.add(second_tweet) db.session.commit() response = self.client.get("/tweets") response_tweets = response.json print(response_tweets) self.assertEqual(len(response_tweets), 2)
def post(self): """ Create a new tweet """ tweet_to_add = Tweet() tweet_to_add.text = api.payload['text'] db.session.add(tweet_to_add) db.session.commit() return tweet_to_add, 201
def test_tweet_update(self): first_tweet = Tweet() first_tweet.text = "First tweet" db.session.add(first_tweet) db.session.commit() response = self.client.patch("/tweets/1", json={'text': 'New text'}) updated_tweet = response.json self.assertEqual(response.status_code, 200) self.assertEqual(updated_tweet["id"], 1) self.assertEqual(updated_tweet["text"], "New text")
def test_tweet_show(self): first_tweet = Tweet() first_tweet.text = "First tweet" db.session.add(first_tweet) response = self.client.get("/tweets/1") response_tweet = response.json print(response_tweet) self.assertEqual(response_tweet["id"], 1) self.assertEqual(response_tweet["text"], "First tweet") self.assertIsNotNone(response_tweet["created_at"])
def post(self): text = api.payload["text"] if len(text) > 0: tweet = Tweet() tweet.text = text db.session.add(tweet) db.session.commit() return tweet, 201 else: return abort(422, "Tweet text can't be empty")
def post(self): text = api.payload["text"] user = api.payload["user"] if len(text) > 0: tweet = Tweet() tweet.text = text user = db.session.query(User).filter_by(name=user).first() tweet.user_id = user.id db.session.add(tweet) db.session.commit() return tweet, 201 else: return abort(422, "Tweet text can't be empty")
def save_tweet(self, tweet): if len(User.objects.all().filter(id_str=tweet["user"]["id_str"])) == 0: user_obj = User() user_obj.id_str = tweet["user"]["id_str"] user_obj.name = tweet["user"]["name"] user_obj.screen_name = tweet["user"]["screen_name"] user_obj.location = tweet["user"]["location"] user_obj.url = tweet["user"]["url"] user_obj.description = tweet["user"]["description"] user_obj.verified = tweet["user"]["verified"] user_obj.follower = tweet["user"]["followers_count"] user_obj.statuses_count = tweet["user"]["statuses_count"] user_obj.created_at = self.str_to_date(tweet["user"]["created_at"]) user_obj.profile_image_url = tweet["user"]["profile_image_url"] user_obj.save() user = user_obj else: user = User.objects.all().filter(id_str=tweet["user"]["id_str"])[0] if len(Tweet.objects.all().filter(id_str=tweet["id_str"])) == 0: tweet_obj = Tweet() tweet_obj.id_str = tweet["id_str"] tweet_obj.created_at = self.str_to_date(tweet["created_at"]) tweet_obj.tag = self.tag tweet_obj.user = user tweet_obj.text = tweet["text"] tweet_obj.in_reply_to_status_id = tweet["in_reply_to_status_id"] tweet_obj.in_reply_to_user_id = tweet["in_reply_to_user_id"] tweet_obj.in_reply_to_screen_name = tweet["in_reply_to_screen_name"] tweet_obj.geo = tweet["geo"] tweet_obj.coordinates = tweet["coordinates"] tweet_obj.place = tweet["place"] if tweet["is_quote_status"] and "quoted_status" in tweet: if len(Tweet.objects.all().filter(id_str=tweet["quoted_status"]["id_str"])) == 0: if len(User.objects.all().filter(id_str=tweet["quoted_status"]["user"]["id_str"])) == 0: quoted_user_obj = User() quoted_user_obj.id_str = tweet["quoted_status"]["user"]["id_str"] quoted_user_obj.name = tweet["quoted_status"]["user"]["name"] quoted_user_obj.screen_name = tweet["quoted_status"]["user"]["screen_name"] quoted_user_obj.location = tweet["quoted_status"]["user"]["location"] quoted_user_obj.url = tweet["quoted_status"]["user"]["url"] quoted_user_obj.description = tweet["quoted_status"]["user"]["description"] quoted_user_obj.verified = tweet["quoted_status"]["user"]["verified"] quoted_user_obj.follower = tweet["quoted_status"]["user"]["followers_count"] quoted_user_obj.statuses_count = tweet["quoted_status"]["user"]["statuses_count"] quoted_user_obj.created_at = self.str_to_date(tweet["quoted_status"]["user"]["created_at"]) quoted_user_obj.profile_image_url = tweet["quoted_status"]["user"]["profile_image_url"] quoted_user_obj.save() else: quoted_user_obj = User.objects.all().filter(id_str=tweet["user"]["id_str"])[0] quoted_obj = Tweet() quoted_obj.id_str = tweet["id_str"] quoted_obj.created_at = self.str_to_date(tweet["quoted_status"]["created_at"]) quoted_obj.user = quoted_user_obj quoted_obj.tag = self.tag quoted_obj.text = tweet["quoted_status"]["text"] quoted_obj.in_reply_to_status_id = tweet["quoted_status"]["in_reply_to_status_id"] quoted_obj.in_reply_to_user_id = tweet["quoted_status"]["in_reply_to_user_id"] quoted_obj.in_reply_to_screen_name = tweet["quoted_status"]["in_reply_to_screen_name"] quoted_obj.geo = tweet["quoted_status"]["geo"] quoted_obj.coordinates = tweet["quoted_status"]["coordinates"] quoted_obj.place = tweet["quoted_status"]["place"] if "quote_count" in tweet: quoted_obj.quote_count = tweet["quote_count"] if "reply_count" in tweet: quoted_obj.reply_count = tweet["reply_count"] if "retweet_count" in tweet: quoted_obj.retweet_count = tweet["retweet_count"] if "favorite_count" in tweet: quoted_obj.favorite_count = tweet["favorite_count"] quoted_obj.entities = tweet["quoted_status"]["entities"] quoted_obj.save() else: quoted_obj = Tweet.objects.all().filter(id_str=tweet["quoted_status"]["id_str"])[0] tweet_obj.quoted_status_id = tweet["quoted_status_id_str"] tweet_obj.is_quote_status = tweet["is_quote_status"] tweet_obj.quoted_status = quoted_obj if "quoted_status_permalink" in tweet: tweet_obj.quoted_status_permalink = tweet["quoted_status_permalink"]["url"] tweet_obj.quoted_status_id = tweet["quoted_status_id"] if "quote_count" in tweet: tweet_obj.quote_count = tweet["quote_count"] if "reply_count" in tweet: tweet_obj.reply_count = tweet["reply_count"] if "retweet_count" in tweet: tweet_obj.retweet_count = tweet["retweet_count"] if "favorite_count" in tweet: tweet_obj.favorite_count = tweet["favorite_count"] tweet_obj.entities = tweet["entities"] tweet_obj.save() return tweet_obj
def result(): selectedChoices = ChoiceObj('attractions', session.get('selected')) form_splace = SearchPlaceForm() form_stweets = SearchTweetsForm(obj=selectedChoices) if form_stweets.validate_on_submit(): session['selected'] = form_stweets.multi_attractions.data place_name = form_stweets.place.data latitude = form_stweets.latitude.data longitude = form_stweets.longitude.data attractions = session.get('selected') range_dist = form_stweets.range_dist.data days_before = form_stweets.days_before.data # CRAWLING twitter_crawler = TwitterCrawler(current_app) df_attractions = twitter_crawler.fetch_tweets_from_attractions( attractions, int(days_before), float(latitude), float(longitude), int(range_dist), place_name) # if data from crawling less than 20, return notification if len(df_attractions) < 20: return render_template('notification.html') # insert into crawler table crawler = Crawler() crawler.timestamp = datetime.now(pytz.timezone('Asia/Jakarta')) db.session.add(crawler) db.session.commit() # insert into attractions table attractions_lower = [x.lower() for x in attractions] att = Attractions() att.attractions = ','.join(attractions_lower) att.crawler_id = crawler.id db.session.add(att) db.session.commit() # insert into tweet table for _, row in df_attractions.iterrows(): tweet = Tweet() tweet.user_id = row['user_id'] tweet.username = row['username'] tweet.created = row['created_at'] tweet.text = row['text'] tweet.latitude = row['latitude'] tweet.longitude = row['longitude'] tweet.crawler_id = crawler.id db.session.add(tweet) db.session.commit() # PREPROCESSING tweets = Tweet.query.filter_by(crawler_id=crawler.id) attractions = Attractions.query.filter_by(crawler_id=crawler.id) # change attractions into list list_attractions = [] for a in attractions: list_attractions.append(a.attractions) list_attractions = ''.join(list_attractions).split(',') # separate text into list list_tweets = [] for t in tweets: id_tweet = [t.id, t.text] list_tweets.append(id_tweet) # define normalizer = Normalize() tokenizer = Tokenize() symspell = SymSpell(max_dictionary_edit_distance=3) SITE_ROOT = os.path.abspath(os.path.dirname(__file__)) json_url = os.path.join(SITE_ROOT, "..\data", "corpus_complete_model.json") symspell.load_complete_model_from_json(json_url, encoding="ISO-8859-1") # do preprocess result = [] for tweet in list_tweets: id, text = tweet[0], tweet[1] # normalize tweet_norm = normalizer.remove_ascii_unicode(text) tweet_norm = normalizer.remove_rt_fav(tweet_norm) tweet_norm = normalizer.lower_text(tweet_norm) tweet_norm = normalizer.remove_newline(tweet_norm) tweet_norm = normalizer.remove_url(tweet_norm) tweet_norm = normalizer.remove_emoticon(tweet_norm) tweet_norm = normalizer.remove_hashtag_mention(tweet_norm) tweet_norm = normalizer.remove_punctuation(tweet_norm) # tokenize tweet_tok = tokenizer.WordTokenize(tweet_norm, removepunct=True) # spell correction temp = [] for token in tweet_tok: suggestion = symspell.lookup(phrase=token, verbosity=1, max_edit_distance=3) # option if there is no suggestion if len(suggestion) > 0: get_suggestion = str(suggestion[0]).split(':')[0] temp.append(get_suggestion) else: temp.append(token) tweet_prepared = ' '.join(temp) # join attraction with strip tweet_prepared = normalizer.join_attraction( tweet_prepared, list_attractions) id_tweet_prepared = [id, tweet_prepared] result.append(id_tweet_prepared) # insert into table preprocess for res in result: id, text = res[0], res[1] tb_preprocess = Preprocess() tb_preprocess.text = text tb_preprocess.tweet_id = id tb_preprocess.crawler_id = crawler.id db.session.add(tb_preprocess) db.session.commit() # POS TAGGING tweets_preprocessed = Preprocess.query.filter_by(crawler_id=crawler.id) # get text from table Preprocess list_tweets = [] for t in tweets_preprocessed: tid_tweet = [t.tweet_id, t.text] list_tweets.append(tid_tweet) # path SITE_ROOT = os.path.abspath(os.path.dirname(__file__)) lexicon_url = os.path.join(SITE_ROOT, "..\data", "Lexicon.trn") ngram_url = os.path.join(SITE_ROOT, "..\data", "Ngram.trn") # initialize tagger = MainTagger(lexicon_url, ngram_url, 0, 3, 3, 0, 0, False, 0.2, 0, 500.0, 1) tokenize = Tokenization() # do pos tagging result = [] for tweet in list_tweets: tweet_id, text = tweet[0], tweet[1] if len(text) == 0: tid_text = [tweet_id, text] result.append(tid_text) else: if len(text.split(' ')) == 1: text = text + ' ini' out = tokenize.sentence_extraction(tokenize.cleaning(text)) join_word = [] for o in out: strtag = " ".join(tokenize.tokenisasi_kalimat(o)).strip() join_word += [" ".join(tagger.taggingStr(strtag))] tid_text = [tweet_id, join_word] result.append(tid_text) # insert into table preprocess for tweet in result: tweet_id, text = tweet[0], tweet[1] tweet_str = ''.join(text) tb_postag = PosTag() tb_postag.text = tweet_str tb_postag.tweet_id = tweet_id tb_postag.crawler_id = crawler.id db.session.add(tb_postag) db.session.commit() # PENENTUAN KELAS Ccon = ['JJ', 'NN', 'NNP', 'NNG', 'VBI', 'VBT'] Cfunc = [ 'OP', 'CP', 'GM', ';', ':', '"', '.', ',', '-', '...', 'RB', 'IN', 'MD', 'CC', 'SC', 'DT', 'UH', 'CDO', 'CDC', 'CDP', 'CDI', 'PRP', 'WP', 'PRN', 'PRL', 'NEG', 'SYM', 'RP', 'FW' ] tweets_tagged = PosTag.query.filter_by(crawler_id=crawler.id) # get text from table PostTag list_tweets = [] for t in tweets_tagged: tid_tweet = [t.tweet_id, t.text] list_tweets.append(tid_tweet) # do penentuan kelas result = [] for tweet in list_tweets: tweet_id, text = tweet[0], tweet[1] if len(text) > 0: text_split = text.split(' ') doc_complete = {"con": [], "func": []} con = [] func = [] for word in text_split: w = word.split('/', 1)[0] tag = word.split('/', 1)[1] if tag in Ccon: con.append(word) elif tag in Cfunc: func.append(word) doc_complete["con"].append(' '.join(con)) doc_complete["func"].append(' '.join(func)) else: doc_complete["con"].append(text) doc_complete["func"].append(text) result.append([tweet_id, doc_complete]) # insert into table penentuan kelas for tweet in result: tweet_id, text = tweet[0], tweet[1] content, function = ''.join(text["con"]), ''.join(text["func"]) tb_penentuan_kelas = PenentuanKelas() tb_penentuan_kelas.content = content tb_penentuan_kelas.function = function tb_penentuan_kelas.tweet_id = tweet_id tb_penentuan_kelas.crawler_id = crawler.id db.session.add(tb_penentuan_kelas) db.session.commit() # LDA tweets_penentuan_kelas = PenentuanKelas.query.filter_by( crawler_id=crawler.id) # get tweets content tweets_content_tagged = [] for tweet in tweets_penentuan_kelas: tweets_content_tagged.append(tweet.content) # separate word and tag documents = [] for tweet in tweets_content_tagged: tweet_split = tweet.split(' ') temp = [] for word in tweet_split: w = word.split("/", 1)[0] temp.append(w) documents.append(temp) # do process lda lda = LdaModel(documents, int(4), float(0.001), float(0.001), int(1000)) result = lda.get_topic_word_pwz(tweets_content_tagged) # insert into table ldapwz for r in result: topic, word, pwz = r[0], r[1], r[2] tb_ldapwz = LdaPWZ() tb_ldapwz.topic = topic tb_ldapwz.word = word tb_ldapwz.pwz = pwz tb_ldapwz.crawler_id = crawler.id db.session.add(tb_ldapwz) db.session.commit() # GRAMMAR STORY ldapwz = LdaPWZ.query.filter_by(crawler_id=crawler.id) # get topic with words in dictionary dict_ldapwz = defaultdict(list) for data in ldapwz: dict_ldapwz[data.topic].append([data.word, data.pwz]) # initialize cfg_informasi = CFG_Informasi() cfg_cerita = CFG_Cerita() # create sentence for story dict_story_informasi = cfg_informasi.create_sentences_from_data( dict(dict_ldapwz)) dict_story_cerita = cfg_cerita.create_sentences_from_data( dict(dict_ldapwz)) # join into dict_story dict_story = defaultdict(list) for d in (dict_story_informasi, dict_story_cerita): for key, value in d.items(): dict_story[key].append('. '.join(i.capitalize() for i in value)) # insert into table GrammarStory for topic, stories in dict_story.items(): # insert informasi tb_grammar_story = GrammarStory() tb_grammar_story.topic = topic tb_grammar_story.rules = 'informasi' tb_grammar_story.story = stories[0] tb_grammar_story.crawler_id = crawler.id db.session.add(tb_grammar_story) db.session.commit() # insert cerita tb_grammar_story = GrammarStory() tb_grammar_story.topic = topic tb_grammar_story.rules = 'cerita' tb_grammar_story.story = stories[1] tb_grammar_story.crawler_id = crawler.id db.session.add(tb_grammar_story) db.session.commit() c = Crawler.query.order_by(Crawler.id.desc()).all() return render_template("stories.html", crawler=c, form_stweets=form_stweets)
def test_instance_variables(self): # Create an instance of the `Tweet` class with one argument tweet = Tweet() tweet.text = "my first tweet" # Check that `text` holds the content of the tweet self.assertEqual(tweet.text, "my first tweet")