def test_search_index(self):
     # Add entry first along with the index
     doc = {"name": "test"}
     es.create_index("searching")
     es.add_entry(index_name="searching", id=1, body=doc)
     time.sleep(1)
     res = es.search_index(index_name="searching")
     print(res['hits']['hits'][0]['_source'])
     self.assertIn('test', res['hits']['hits'][0]['_source']['name'])
     es.delete_index("searching")
def word_cloud(id, topic):
    """The word cloud task creates a word cloud from the data."""
    item = {}
    category = []
    cat = TwitterCat.objects.filter(user_id=id)
    for entry in cat:
        entry = preprocessor.preprocess(entry.category_name)
        entry = preprocessor.porter_stemming(entry)
        entry = ''.join(c for c in entry if c not in '[]\'')
        res = (elastic_utils.search_index(
            topic,
            query='{"query":{"query_string":{"fields":["text"],"query":"%s*"}}}'
            % str(entry)))
        total = res['hits']['total']
        item[entry] = total
        category.append(entry)
        current_task.update_state(state='PROGRESS',
                                  meta={
                                      'current_categories': category,
                                      'current_results': item
                                  })
    jsonData = json.dumps(item)
    return (category, jsonData)
def elastic_info(index_list):
    """Displays statistics from the topics."""
    final_res = []
    current_entry = 0
    all_entries = []
    for entry in index_list:
        index_dict = {}
        all_entries.append(entry)
        index_dict["name"] = {}
        index_dict["current_entry"] = entry
        if current_entry is 0:
            current_task.update_state(state='PROGRESS',
                                      meta={
                                          'current_percentage': 0,
                                          "current_entry": entry
                                      })

        res = elastic_utils.search_index(entry,
                                         query={
                                             "query": {
                                                 "match_all": {}
                                             },
                                             "sort": [{
                                                 "date": {
                                                     "order": "desc"
                                                 }
                                             }],
                                             "size": 10
                                         })

        current_array = []
        for current in res["hits"]["hits"]:
            test = {}
            test["date"] = current["_source"]["date"]
            test["total"] = current["_source"]["total"]
            test["last_collected"] = current["_source"]["last_time"]
            current_array.append(test)
        index_dict["name"]["current"] = current_array

        median_array = []
        res_median = elastic_utils.iterate_search(entry + "-median")
        for median in res_median:
            med = {}
            med["day_median"] = median["_source"]["day_median"]
            med["hour_median"] = median["_source"]["hour_median"]
            med["minute_median"] = median["_source"]["minute_median"]
            median_array.append(med)
        index_dict["name"]["median"] = median_array
        res_latest = elastic_utils.search_index(entry + "-latest",
                                                query={
                                                    "query": {
                                                        "match_all": {}
                                                    },
                                                    "sort": [{
                                                        "created.keyword": {
                                                            "order": "desc"
                                                        }
                                                    }],
                                                    "size":
                                                    5
                                                })

        latest_array = []
        for item in res_latest["hits"]["hits"]:
            cur_entry = {}
            cur_entry["created"] = item["_source"]["created"]
            cur_entry["text"] = item["_source"]["text"]
            cur_entry["image"] = item["_source"]["profile_picture"]
            cur_entry["name"] = item["_source"]["name"]
            latest_array.append(cur_entry)

        index_dict["name"]["latest"] = latest_array

        all_entries.append(latest_array)
        if current_entry is not 0:
            current_task.update_state(
                state='PROGRESS',
                meta={
                    'current_percentage':
                    (current_entry / len(index_list)) * 100,
                    'current_entry': entry,
                    'final_res': final_res
                })
        current_entry += 1
        final_res.append(index_dict)
    print(len(final_res))
    return final_res
Exemplo n.º 4
0
#Import the necessary methods from tweetpy library
import tensorflow as tf
import tweepy
import json
from textblob import TextBlob
from nltk.tokenize import word_tokenize
import re
import preprocessor
import fyp_webapp.config as cfg
from fyp_webapp.ElasticSearch import elastic_utils as es

res = es.search_index(cfg.twitter_credentials['topic'])
if (res['hits']['total'] is None):
    id = 0
else:
    id = res['hits']['total']


class StreamListener(tweepy.StreamListener):
    def on_status(self, status):

        if hasattr(status, 'retweeted_status'):
            return  #this filters out retweets
        else:
            global id
            id += 1
            dict = {
                "description": str(status.user.description),
                "loc": str(status.user.location),
                "text": str(status.text),
                "coords": str(status.coordinates),
Exemplo n.º 5
0
def timeline(request):
    if request.POST:
        print(request.POST)
        answer = request.POST['dropdown']
        cat = TwitterCat.objects.filter(category_name=answer)
        name = ""
        for mod in cat:
            res = elastic_utils.iterate_search(index_name=mod.category_name,
                                               query={
                                                   "size":
                                                   20,
                                                   "query": {
                                                       "match_all": {}
                                                   },
                                                   "sort": [{
                                                       "date": {
                                                           "order": "desc"
                                                       }
                                                   }],
                                               })
            med = elastic_utils.search_index(index_name=mod.category_name +
                                             "-median")
            name = mod.category_name
            break
    else:
        cat = TwitterCat.objects.filter(user=request.user)
        name = ""
        for mod in cat:
            res = elastic_utils.iterate_search(index_name=mod.category_name,
                                               query={
                                                   "size":
                                                   20,
                                                   "query": {
                                                       "match_all": {}
                                                   },
                                                   "sort": [{
                                                       "date": {
                                                           "order": "desc"
                                                       }
                                                   }],
                                               })
            med = elastic_utils.search_index(index_name=mod.category_name +
                                             "-median")
            name = mod.category_name
            break

    cat = TwitterCat.objects.filter(user=request.user)
    data = {}
    i = 0
    for entry in res:
        temp_data = {}
        for hour in entry["_source"]["hour_breakdown"]:
            temp_data[int(hour)] = (entry["_source"]["hour_breakdown"][hour])
        data[entry["_source"]["date"]] = temp_data
        i += 1
        if i == 20:
            break

    day_median = med["hits"]["hits"][0]["_source"]["day_median"]
    hour_median = med["hits"]["hits"][0]["_source"]["hour_median"]
    minute_median = med["hits"]["hits"][0]["_source"]["minute_median"]
    hour_med_tresh = round(hour_median * 2, 2)
    minute_med_tresh = round(minute_median * 2, 2)
    day_med_tresh = round(day_median * 1.5, 2)

    print(hour_med_tresh)
    #for entry in res:
    #    temp_data = {}
    #    for hour in entry["_source"]["hour_breakdown"]:
    #        temp_data[int(hour)] = (entry["_source"]["hour_breakdown"][hour])
    #    data[entry["_source"]["date"]] = temp_data
    return render(
        request, "fyp/timeline/index.html", {
            "data": data,
            "name": name,
            "cats": cat,
            "hour_med_tresh": hour_med_tresh,
            "minute_med_tresh": minute_med_tresh,
            "day_med_tresh": day_med_tresh
        })