Пример #1
0
    def save_to_db(self, const):
        # store in db, uses self.data Extract objects, iterate through and generate the appropriate injections for the db

        if const is "search_term":
            s_db = Search(date=timezone.now(), term=self.data[0].search_term)
            print "Adding %s data into db." % s_db
            s_db.save()
            for q in self.data:
                print q
                # save data around Search term for each Extract object in self.data
                # each Extract object has multiple links, get them all and associate to the created search term
                try:
                    for url in q.job_urls:
                        l_db = Links(search=s_db, link=url)
                        l_db.save()
                    # each Extract object has a single location, get it and associate it to search term
                    if q.loc != "":
                        loc_db = Location(city=q.city, state=q.state)
                        loc_db.save()
                    # each Extract object has a summary attribute that has all the data, modify the data pool to fit the parameters specified by user
                    # and store the data in a Results table associated to its Search table
                    summary = q.pool_summary(
                        pos=self.pos, with_filter=self.with_filter, lower=self.lower, with_bigrams=self.with_bigrams
                    )
                    data = summary[("Word", "Word_Count", "POS_Tag")]
                    for tup in data:
                        w = str(tup[0])
                        c = tup[1]
                        try:
                            p = str(tup[2])
                        except IndexError:
                            p = ""
                        r_db = Results(
                            search=s_db, location=loc_db, word=w, count=c, pos=p, is_bigram=self.with_bigrams
                        )
                        r_db.save()
                except:
                    if q.loc != "":
                        loc_db = Location(city=q.city, state=q.state)
                        loc_db.save()
                    r_db = Results(search=s_db, location=loc_db, word="N/A", count=0, pos="", is_bigram=False)
                    r_db.save()
Пример #2
0
 def save_to_db(self,const):
     # store in db, uses self.data Extract objects, iterate through and generate the appropriate injections for the db
     
     if const is "search_term":
         s_db = Search(date=timezone.now(),term=self.data[0].search_term)
         print "Adding %s data into db."% s_db
         s_db.save()
         for q in self.data:
             print q
             # save data around Search term for each Extract object in self.data
             # each Extract object has multiple links, get them all and associate to the created search term
             try:
                 for url in q.job_urls:
                     l_db = Links(search=s_db, link=url)
                     l_db.save()
                 # each Extract object has a single location, get it and associate it to search term
                 if q.loc != "":
                     loc_db = Location(city=q.city,state=q.state)
                     loc_db.save()
                 # each Extract object has a summary attribute that has all the data, modify the data pool to fit the parameters specified by user
                 # and store the data in a Results table associated to its Search table
                 summary = q.pool_summary(pos=self.pos, with_filter=self.with_filter, lower=self.lower, with_bigrams=self.with_bigrams)
                 data = summary[('Word', 'Word_Count', 'POS_Tag')]
                 for tup in data:
                     w = str(tup[0])
                     c = tup[1]
                     try:
                         p = str(tup[2])
                     except IndexError:
                         p = ""
                     r_db = Results(search=s_db,location=loc_db,word=w,count=c,pos=p,is_bigram=self.with_bigrams)
                     r_db.save()
             except:
                 if q.loc != "":
                     loc_db = Location(city=q.city,state=q.state)
                     loc_db.save()
                 r_db = Results(search=s_db,location=loc_db,word="N/A",count=0,pos="",is_bigram=False)
                 r_db.save()
Пример #3
0
def results(request, query_id):
    """
    :param request:
    :param query_id:
    :return: :raise:
    """
    if request.user.is_authenticated():
        reponseToPresent = []
        categories_counter = []
        positive_counter = 0
        negative_counter = 0
        neutral_counter = 0
        try:
            ## Must store the response, if there is no response, otherwise return the stored one.
            ## IF NOT STORED
            query = Query.objects.get(id=query_id)
            query_params = Query_properties.objects.filter(query=query)
            results = Results.objects.filter(query=query)
            #run for all categories
            list_properties = get_query_properties(query)
            properties = list_properties["Properties"]  # all the available properties, e.g. keywords, twitter, facebook
            #print "properties: %s" %properties
            phrases = list_properties["Phrases"]
            #print "phrases: %s" %phrases
            keywords = list_properties["Keywords"]
            #print "keywords: %s" %keywords
            twitter_usernames = list_properties["Twitter"]
            facebook_pages = list_properties["Facebook"]
            query_properties = ''  # This is the string that forms the properties query (query_string)
            phrase_properties = ''  # This is the string that forms the phrase query (match_phrase)'
            twitter_properties = ''
            facebook_properties = ''

            ## Run the query or bring the results from the Database
            if results:  #bring it from the database
                response = results.__getitem__(0).results
                response = json.loads(response)
            else:  #make a new query
                lang = Query_languages.objects.get(query=query_id)

                #####
                # Get all the properties, keywords, phrases, twitter usernames
                #####
                for kwrd in keywords.keys():
                    temp = ''
                    for keyword_prop in keywords[kwrd]:
                        temp += "%s," % keyword_prop
                    if query.venn == 'OR':
                        query_properties += '%s,' % remove_comma_at_the_end(temp)
                    else:
                        query_properties += '+(%s)' % remove_comma_at_the_end(temp)
                query_properties = query_properties.replace('+()', '')  #Remove any empty keyword
                query_properties = remove_comma_at_the_end(query_properties)

                if query_properties != '':  #if empty list, no properties, no query string, go to phrases
                    if lang:
                        if lang.language == "es":
                            query_properties = '{"query_string":{"query":"%s","fields":["%s"]}}' % (
                            query_properties, "text_no_url_es")
                        elif lang.language == "en":
                            query_properties = '{"query_string":{"query":"%s","fields":["%s"]}}' % (
                            query_properties, "text_no_url")
                        else:
                            query_properties = '{"query_string":{"query":"%s","fields":["%s","%s"]}}' % (
                            query_properties, "text_no_url", "text_no_url_es")
                    else:
                        query_properties = '{"query_string":{"query":"%s","fields":["%s"]}}' % (
                        query_properties, "text_no_url")

                # Create the phrase query
                for phrase_list in phrases.keys():
                    for phrase in phrases[phrase_list]:
                        if lang:
                            if lang.language == "es":
                                phrase_properties += '{"match_phrase":{"doc.text_no_url_es":"%s"}},' % phrase

                            elif lang.language == "en":
                                phrase_properties += '{"match_phrase":{"doc.text_no_url":"%s"}},' % phrase
                            else:
                                phrase_properties += '{"match_phrase":{"doc.text_no_url":"%s"}},{"match_phrase":{"doc.text_no_url_es":"%s"}},' % (
                                phrase, phrase)
                        else:
                            phrase_properties += '{"match_phrase":{"doc.text_no_url":"%s"}},' % phrase
                phrase_properties = remove_comma_at_the_end(phrase_properties)

                for twitter_username in twitter_usernames:
                    twitter_properties += '{"match_phrase_prefix" : { "doc.user_screen_name":"twitter:%s" }},' % twitter_username.replace(
                        " ", "").replace("@", "")
                twitter_properties = remove_comma_at_the_end(twitter_properties)

                for facebook_page in facebook_pages:
                    facebook_properties += '{"match_phrase_prefix" : { "doc.user_screen_name":"facebook:%s" }},' % facebook_page.replace(
                        " ", "")
                facebook_properties = remove_comma_at_the_end(facebook_properties)

                ###
                #query constructor
                ###
                query_all = ''
                if (query_properties != ''):
                    query_all += '%s,' % query_properties
                if (phrase_properties != ''):
                    query_all += '%s,' % phrase_properties
                if (twitter_properties != ''):
                    query_all += '%s,' % twitter_properties
                if (facebook_properties != ''):
                    query_all += '%s,' % facebook_properties
                query_all = remove_comma_at_the_end(query_all)

                query_all = '{"query":{"filtered":{"query":{"bool":{"should":[%s],"minimum_should_match" : 1}},"filter":{"bool":{"must":[{"range":{"doc.created_at":{"from":"%s","to":"%s"}}}],"_cache":true}}}},"from":0,"size":10000, "sort":["_score"]}' % (
                    query_all,
                    int(time.mktime(query.from_date.timetuple()) * 1000),
                    int(time.mktime(query.to_date.timetuple()) * 1000))

                print query_all
                response = parse_query_for_sentiments(query_all)
                newResponse = Results(query=query, results=json.dumps(response), updated=datetime.now())
                newResponse.save()


            ## count the occurrences of keywords in in response
            for property in properties.keys():
                word_counter = []
                r = re.compile("|".join(r"\b%s\b" % w.lower() for w in properties[property].split(",")), re.I)
                # temporary solution to double counting...
                number = Counter(re.findall(r, ""))
                for message in response:
                    #dict_you_want = { "text": message["_source"]["doc"]["text"] }
                    #print dict_you_want
                    number = number + Counter(re.findall(r, (message["_source"]["doc"]["text"]).lower().replace("@", " ").replace("#", " ")))
                #                for lala in properties[property].split(","):
                #                   print number[lala]
                #                   print lala
                for phrase in properties[property].split(","):
                    #                   number = json.dumps(response).count(phrase)

                    text = '{"name":"%s","times":%i, "sentiment":%i, "positive":%i, "negative":%i, "neutral":%i}' % (
                    phrase.lower(), number[phrase.lower()], 0, 0, 0, 0)
                    #print text
                    word_counter.append(json.loads(text))
                text = {}
                text["category"] = property
                text["properties"] = word_counter
                categories_counter.append(text)

            for message in response:
                doc_text = message["_source"]["doc"]["text"]
                if message["_source"]["doc"]["senti_tag"] == "positive":
                    #for pie diagram metrics
                    positive_counter += 1
                elif message["_source"]["doc"]["senti_tag"] == "negative":
                    # for pie diagram metrics
                    negative_counter += 1
                elif message["_source"]["doc"]["senti_tag"] == "neutral":
                    neutral_counter += 1
                    #if message["_score"] > 0.05:
                if True:
                    reponseToPresent.append(message["_source"])
                    ##print "Just Added: %s" %message["_source"]["doc"]
                    try:
                        for category in categories_counter:
                            r2 = re.compile("|".join(r"\b%s\b" % w["name"].lower() for w in category["properties"]),
                                            re.I)
                            number2 = Counter(re.findall(r2, (
                            json.dumps(message["_source"]["doc"]["text"])).lower().replace("@", " ").replace("#", " ")))
                            if True:
                                for property in category["properties"]:
                                    if message["_source"]["doc"]["senti_tag"] == "positive":
                                        if (number2[property["name"].lower()]) > 0:
                                            property["sentiment"] = property["sentiment"] + 1
                                            property["positive"] = property["positive"] + 1
                                    elif message["_source"]["doc"]["senti_tag"] == "negative":
                                        if (number2[property["name"].lower()]) > 0:
                                            property["sentiment"] = int(property["sentiment"]) - 1
                                            property["negative"] = property["negative"] + 1
                                    elif message["_source"]["doc"]["senti_tag"] == "neutral":
                                        if (number2[property["name"].lower()]) > 0:
                                            property["neutral"] = property["neutral"] + 1
                    except:
                        continue


        except ValueError:
            #print ValueError.message
            raise Http404()


        return render(request, "results.html",
                      {"query_id": query.id, "query_name": query.name, "query": query_params,
                       "response": reponseToPresent, "positive": positive_counter,
                       "negative": negative_counter, "neutral": neutral_counter,
                       "categories": categories_counter})
    else:
        return HttpResponseRedirect("/")
Пример #4
0
def result_post_method(request):
    """
    Point to post data from quiz into results
    table in dictionary. Comes in as (sample of 3 mats, 1 question):
    <QueryDict: {
        u'overall': [u'{
            "A": 0,
            "node_data": {
                "1001": {
                    "buttonC": "0",
                    "buttonB": "0",
                    "buttonA": "1",
                    "buttonD": "0"
                },
                "1002": {
                    "buttonC": "0",
                    "buttonB": "0",
                    "buttonA": "0",
                    "buttonD": "1"
                },
                "1003": {
                    "buttonC": "0",
                    "buttonB": "1",
                    "buttonA": "0",
                    "buttonD": "0"
                }
            },
            "B": 2,
            "C": 3,
            "D": 8
        }'],
        u'question0': [u'{
            "1001": {
                "buttonC": "0",
                "buttonB": "0",
                "buttonA": "1",
                "buttonD": "0"
            },
            "1002": {
                "buttonC": "0",
                "buttonB": "0",
                "buttonA": "0",
                "buttonD": "1"
            },
            "1003": {
                "buttonC": "0",
                "buttonB": "1",
                "buttonA": "0",
                "buttonD": "0"
            }
        }']
    }>

    Then processes the data into manageable pieces
    to be cross referenced with the correct answers
    in the quiz dictionaries. The device ids that are
    attached to each mat are also cross referenced to
    whatever student is linked to that device, and
    given a score in results.
    :param request: wsgi request
    :return: HTTP response
    """
    quizname = request.GET.get('quizname')
    devices = Device.objects.all()
    quiz_obj = Quiz.objects.get(name=quizname)

    quizjson = json.loads(quiz_obj.quizjson)
    overall_dict = results_process_questions(quizjson)

    post_dict = request.POST
    results_dict = results_process_data(post_dict, overall_dict, devices)

    for device in devices:
        score = 0
        for result in results_dict:
            if device.student.name in result['student']:
                score += int(result['score'])
        try:
            result = Results()
            result.quiz = quiz_obj
            result.student = device.student
            result.score = score
            result.save()
        except Exception:
            return HttpResponse("failure")

    return HttpResponse("success")
Пример #5
0
def findTweets(request):

	if request.method == 'POST':
		searchQuery = request.POST.get('search')

		try:
			tmpModel = Results.objects.get(username=searchQuery)

		except:

			location = 'data/' + searchQuery + '_results.txt'
			results = [[0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0]]

			try:

				topic = 0
				word = 0
				with open(location, 'r') as F:

					for line in F:

						if word < 9:
							results[topic][word] = line.strip("\n")
							word += 1
						else:
							word = 0
							topic += 1

			except:

				try:
						ts = 'data/' + searchQuery + '_tweets.csv'
						with open(ts, 'r') as F:
							pass

				except:

						# RETRIEVING TWEETS
						td.get_all_tweets(str(searchQuery))

				try:

					location = 'data/' + searchQuery + '_cleaned.txt'
					with open(location, 'r') as F:
						pass

				except:

					# CLEANING DATA WITH TURKISH NLP
					nlpPath = 'basic/codes/NLPTurkish.py'
					os.system("python3 " +  " " + nlpPath + " " + searchQuery)

				# RUNNING SPARK
				sparkPath = '/Users/k/Spark/bin/spark-submit'
				scriptPath = 'basic/codes/simpleApp.py'
				os.system(sparkPath + " --master local[4] " + scriptPath + " " + searchQuery)

				print("Last seconds...")
				time.sleep(2)

				location = 'data/' + searchQuery + '_results.txt'
				results = [[0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0]]


				try:
					topic = 0
					word = 0
					with open(location, 'r') as fp:

						for line in fp:

							if word < 9:
								results[topic][word] = line.strip("\n")
								word += 1
							else:
								word = 0
								topic += 1
				except:
					context = {}
					return render(request, 'searchPage.html', context)


			# SAVING FINDINGS TO DATABASE
			tmpModel = Results(username=searchQuery)
			tmpModel.t1w1 = results[0][0]
			tmpModel.t1w2 = results[0][1]
			tmpModel.t1w3 = results[0][2]
			tmpModel.t1w4 = results[0][3]
			tmpModel.t1w5 = results[0][4]
			tmpModel.t1w6 = results[0][5]
			tmpModel.t1w7 = results[0][6]
			tmpModel.t2w1 = results[1][0]
			tmpModel.t2w2 = results[1][1]
			tmpModel.t2w3 = results[1][2]
			tmpModel.t2w4 = results[1][3]
			tmpModel.t2w5 = results[1][4]
			tmpModel.t2w6 = results[1][5]
			tmpModel.t2w7 = results[1][6]
			tmpModel.t3w1 = results[2][0]
			tmpModel.t3w2 = results[2][1]
			tmpModel.t3w3 = results[2][2]
			tmpModel.t3w4 = results[2][3]
			tmpModel.t3w5 = results[2][4]
			tmpModel.t3w6 = results[2][5]
			tmpModel.t3w7 = results[2][6]
			tmpModel.save()


		# RETURNING REQUESTED PAGE 
		context = {"searchQuery":searchQuery, "tmpModel":tmpModel}
		return render(request, 'findTweets.html', context)