def collect_todays_tweets(entry): """Collects todays tweets for every topic.""" count_word_frequency = Counter() word_counter = Counter() hour_break_dict = {} if ("-latest") not in entry: if ("median") not in entry: # we frst need to collect all todays tweets entry_total = elastic_utils.last_id(entry) if elastic_utils.check_index_exists(entry + "-latest") is True: total = elastic_utils.last_id(entry + "-latest") day_res = elastic_utils.iterate_search(entry + "-latest", query={ "query": { "match_all": {} }, "sort": [{ "last_time": { "order": "desc" } }] }) for test in day_res: time_of_tweet = test["_source"]["created"] datetime_object = datetime.strptime( time_of_tweet, '%Y-%m-%d %H:%M:%S') dateobj = datetime_object.strftime("%Y-%m-%d") created_at = datetime_object.strftime("%Y-%m-%dT%H:%M:%S") count_word_frequency.update(str(datetime_object.hour)) if str(datetime_object.hour) in hour_break_dict: hour_break_dict[str(datetime_object.hour)] += 1 else: hour_break_dict[str(datetime_object.hour)] = 1 words = preprocessor.filter_multiple(str( test["_source"]["text"]), ats=True, hashtags=True, stopwords=True, stemming=False, urls=True, singles=True) terms_all = [term for term in words] word_counter.update(terms_all) freq_obj = { "hour_breakdown": hour_break_dict, "words": json.dumps(word_counter.most_common(400)), "total": total, "date": dateobj, "last_time": created_at } elastic_utils.add_entry(entry, entry_total + 1, freq_obj) elastic_utils.delete_index(entry + "-latest") try: elastic_utils.create_index(entry + "-latest") except: print( "Todays index already exists! This is an exception, but it's probably ok" )
def test_list_all_index(self): es.create_index("list_all") res = es.list_all_indexes() self.assertIn("\'test\'" , res) self.assertIn("\'list_all\'", res) es.delete_index("list_all") es.delete_index("test") self.assertIn("{}", res)
def test_list_all_index(self): es.create_index("list_all") res = es.list_all_indexes() time.sleep(1) self.assertIn('test', res) self.assertIn('list_all', res) es.delete_index("list_all") es.delete_index("test") time.sleep(1)
def test_search_index(self): # Add entry first along with the index doc = {"name": "test"} es.create_index("searching") es.add_entry(index_name="searching", id=1, body=doc) time.sleep(1) res = es.search_index(index_name="searching") print(res['hits']['hits'][0]['_source']) self.assertIn('test', res['hits']['hits'][0]['_source']['name']) es.delete_index("searching")
def twittercat_delete( request, pk, template_name='fyp/Category/twittercat_confirm_delete.html'): book = get_object_or_404(TwitterCat, pk=pk) if request.method == 'POST': topic = book.category_name + "-latest" book.delete() elastic_utils.delete_index(topic) return redirect('fyp_webapp:twittercat_list') return render(request, template_name, {'object': book})
def tearDown(self): es.delete_index("test")
def tearDown(self): time.sleep(1) es.delete_index("test")
def test_delete_index(self): self.assertIn("\'acknowledged\': True", es.delete_index("test"))
def test_create_index(self): self.assertIn(" \'index\': \'testcase\'", es.create_index("testcase")) #An index is made time.sleep(1) es.delete_index("testcase")