Exemplo n.º 1
0
 def update_term(self, term, stream_name):
     key = ndb.Key('SearchTerm', term)
     search_term = key.get()
     if not search_term:
         search_term = SearchTerm(term=term, stream_names=[stream_name])
     else:
         terms = search_term.stream_names.append(stream_name)
         search_term.stream_names = list(set(terms))
     search_term.put()
Exemplo n.º 2
0
 def update_term(self, term, stream_name):
     key = ndb.Key('SearchTerm', term)
     search_term = key.get()
     if not search_term:
         search_term = SearchTerm(term=term, stream_names=[stream_name])
     else:
         terms = search_term.stream_names.append(stream_name)
         search_term.stream_names = list(set(terms))
     search_term.put()
Exemplo n.º 3
0
def store(request, q):
    from stats import stats
    # if search term is at least three chars long, store in db
    if len(q) > 2:
        term = SearchTerm()
        term.q = q
        term.tracking_id = stats.tracking_id(request)
        term.ip_address = request.META.get('REMOTE_ADDR')
        term.user = None
        if request.user.is_authenticated():
            term.user = request.user
        term.save()
Exemplo n.º 4
0
 def get(self):
     term = self.request.get('term')
     terms = map(lambda t: t.term,
                 SearchTerm.query().order(SearchTerm.term).fetch())
     matches = list(set(filter(lambda t: term in t, terms)))
     self.response.headers['Content-Type'] = 'application/json'
     self.response.out.write(json.dumps({"matches": matches}))
Exemplo n.º 5
0
 def get(self):
     term = self.request.get('term')
     terms = map(lambda t: t.term,
                 SearchTerm.query().order(SearchTerm.term).fetch())
     matches = list(set(filter(lambda t: term in t, terms)))
     self.response.headers['Content-Type'] = 'application/json'
     self.response.out.write(json.dumps({"matches": matches}))
Exemplo n.º 6
0
def store(request, q):
    if len(q) > 2:
        term = SearchTerm()
        term.q = q
        term.ip_address = request.META.get('REMOTE_ADDR')
        term.tracking_id = stats.tracking_id(request)
        term.user = None
        if request.user.is_authenticated():
            term.user = request.user
        term.save()
def update_search_stats(query, browser):
    """
    Here we update the stats in the database for the SearchTerm and the browser.

    We start by searching for our SearchTerm and browser in the database.
    In Part 1 we see if the SearchTerm already exists, and if so, we add 1
    to its counter. If not, we create a new SearchTerm in the database and 
    set its count to 1.
    In Part 2 we see if the SearchBrowser already exists, and if so, we add 1
    to its counter. If not, we create a new SearchBrowser in the database and 
    set its count to 1.
    """
    SearchTerm_exists=SearchTerm.objects.filter(name=query).exists()
    SearchBrowser_exists=SearchBrowser.objects.filter(name=browser).exists()
    #1.) The SearchTerm
    #if the search term already exists in the database
    if SearchTerm_exists == True:
        newSearchTerm=SearchTerm.objects.get(name=query)
        #find the count
        count=newSearchTerm.count
        #increase the count by 1
        count+=1
        newSearchTerm.count=count
        newSearchTerm.save()
    #if the search term does not exist yet
    elif (SearchTerm_exists == False):
        newSearchTerm=SearchTerm(name=query)
        newSearchTerm.count=1
        newSearchTerm.save()

    #2.) The SearchBrowser
    #if the browser already exists in the database
    if SearchBrowser_exists == True:
        newSearchBrowser=SearchBrowser.objects.get(name=browser)
        #find the count
        count=newSearchBrowser.count
        #increase the count by 1
        count+=1
        newSearchBrowser.count=count
        newSearchBrowser.save()
    #if the browser does not exist yet
    elif  SearchBrowser_exists == False:
        newSearchBrowser=SearchBrowser(name=browser)
        newSearchBrowser.count=1
        newSearchBrowser.save()
Exemplo n.º 8
0
def store(request, q):
    # if search term is at least three chars long, store in db
    if len(q) > 2:
        term = SearchTerm()
        term.q = q
        term.ip_address = request.META.get('REMOTE_ADDR')
        term.tracking_id = stats.tracking_id(request)
        term.user = None
        if request.user.is_authenticated():
            term.user = request.user
        term.save()
Exemplo n.º 9
0
def store(request, q, matching):
    """ stores the search text """
    # if search term is at least three chars long, store in db
    if len(q) > 1:
        term = SearchTerm()
        term.q = q
        term.ip_address = request.META.get('REMOTE_ADDR')
        term.tracking_id = stats.__tracking_id(request)
        term.user = None
        if request.user.is_authenticated():
            term.user = request.user
        # salvamos para que no nos salga el error: <SearchTerm: inst> need to have a value for field "searchterm"
        # before this many-to-many relationship can be used.
        term.save()
        for match in matching:
            term.found_products.add(match)
Exemplo n.º 10
0
def store(request ,q):
    # if the search term is atleast three chars long , store in the
    if len(q)>2:
        term = SearchTerm()
        term.q = q
        term.user = None
        if request.user.is_authenticated():
            term.user = request.user
        term.save()
Exemplo n.º 11
0
def process_search():
    search_query = request.GET.get('search_query', '').strip()
    query = search_query.lower()

    show_daverank = False
    results = False
    number_pages = 10
    number_videos = 5

    #Move this stuff to its own procedure tomorrow!
    if query.find('--') == 0:
        if query.find('--forum') == 0:
            redirect_url = 'http://www.udacity-forums.com/cs101/search/?q=' + urllib.quote(
                query[8:])
            return redirect(redirect_url)
        if query.find('--cs373') == 0:
            redirect_url = 'http://www.udacity-forums.com/cs373/search/?q=' + urllib.quote(
                query[8:])
            return redirect(redirect_url)
        if query.find('--python') == 0:
            redirect_url = 'http://docs.python.org/search.html?q=' + urllib.quote(
                query[9:])
            return redirect(redirect_url)
        if query.find('--searchwithpeterdotinfo') == 0:
            redirect_url = 'http://searchwithpeter.info/secretplans.html?q=' + urllib.quote(
                query[25:])
            return redirect(redirect_url)
        if query.find('--showmore') == 0:
            query = query[11:]
            search_query = query
            number_pages = 20
            number_videos = 10
        if query.find('--daverank') == 0:
            query = query[11:]
            search_query = query
            show_daverank = True

    if query.find('python') == 0:
        pyquery = query[7:]
    else:
        pyquery = query

    ddgurl_root = 'http://duckduckgo.com/?q=python+'
    ddgurl_suffix = urllib.quote(pyquery) + '&format=json'

    response = urllib.urlopen(ddgurl_root + ddgurl_suffix)
    response_json = response.read()

    pythonterm = json.loads(response_json)

    if pythonterm:
        pyterm_info = {}
        if pythonterm['AbstractSource'] == 'Python Documentation':
            pyterm = BeautifulSoup(pythonterm['AbstractText'])
            try:
                pyterm_code = pyterm.find('code').string
                pyterm.pre.decompose()
                pyterm_info['code'] = pyterm_code
            except:
                pyterm_info['code'] = None
            pyterm_desc = pyterm.get_text()
            pyterm_info['desc'] = pyterm_desc
            pyterm_info['url'] = pythonterm['AbstractURL']
            results = True
    else:
        pyterm_info = None

    query_words = query.split()
    for word in query_words:
        if word in stopwords:
            query_words.remove(word)

    query_urls = []
    for term in query_words:
        # Get all SearchTerm objects that match the search_query.
        q = SearchTerm.all().filter('term =', term).get()
        if q:
            query_urls.append(set(q.urls))

    if query_urls:
        query_url_set = set.intersection(*query_urls)
        query_url_list = list(query_url_set)

        if len(query_url_list) > 0:
            results = True
        if len(query_url_list) > 30:
            query_url_list = query_url_list[0:30]

        page_results = Page.all().filter(
            'url IN', query_url_list).order('-dave_rank').fetch(number_pages)
        page_dicts = []
        for page in page_results:
            page_info = {}
            query_index = page.text.find(query)
            if query_index != -1:
                i = page.text.find(' ', query_index - 25)
                excerpt_words = page.text[i:].split(' ')
                page_info['exact_match'] = True
            else:
                excerpt_words = page.text.split(' ')
                page_info['exact_match'] = False
            excerpt = ' '.join(excerpt_words[:50])

            page_info['text'] = excerpt
            page_info['title'] = page.title
            page_info['url'] = page.url
            page_info['daverank'] = page.dave_rank
            page_info['doc'] = page.doc
            page_dicts.append(page_info)
        page_dicts.sort(key=itemgetter('exact_match'), reverse=True)

        video_results = Video.all().filter(
            'url IN', query_url_list).order('-views').fetch(number_videos)
        video_dicts = []
        for video in video_results:
            video_info = {}
            subtitles = video.text.lower()
            query_index = subtitles.find(query)
            time_string = ''
            if query_index != -1:
                subtitle_list = subtitles.splitlines()
                for phrase in subtitle_list:
                    if phrase.find(query) != -1:
                        timestamp_index = subtitle_list.index(phrase) - 1
                        timestamp = subtitle_list[timestamp_index]
                        if len(timestamp) > 1:
                            minutes = timestamp[3:5]
                            seconds = timestamp[6:8]
                            time_string = '#t=' + minutes + 'm' + seconds + 's'
                            start = 60 * int(minutes) + int(seconds)
            if time_string:
                url = video.url + time_string
                video_info['exact_match'] = True
            else:
                url = video.url
                start = 0
                video_info['exact_match'] = False
            video_info['title'] = video.title
            video_info['url'] = url
            video_info['subtitle'] = video.text[-20:query_index:20]
            video_info['id'] = video.id
            video_info['start'] = start
            video_dicts.append(video_info)
        video_dicts.sort(key=itemgetter('exact_match'), reverse=True)

    else:
        page_dicts = None
        video_dicts = None

    query_string_words = query.split()

    return template('templates/results',
                    search_query=search_query,
                    query_string_words=query_string_words,
                    page_dicts=page_dicts,
                    video_dicts=video_dicts,
                    pyterm_info=pyterm_info,
                    show_daverank=show_daverank,
                    results=results)
Exemplo n.º 12
0
def process_search():
    search_query = request.GET.get("search_query", "").strip()
    query = search_query.lower()

    show_daverank = False
    results = False
    number_pages = 10
    number_videos = 5

    # Move this stuff to its own procedure tomorrow!
    if query.find("--") == 0:
        if query.find("--forum") == 0:
            redirect_url = "http://www.udacity-forums.com/cs101/search/?q=" + urllib.quote(query[8:])
            return redirect(redirect_url)
        if query.find("--cs373") == 0:
            redirect_url = "http://www.udacity-forums.com/cs373/search/?q=" + urllib.quote(query[8:])
            return redirect(redirect_url)
        if query.find("--python") == 0:
            redirect_url = "http://docs.python.org/search.html?q=" + urllib.quote(query[9:])
            return redirect(redirect_url)
        if query.find("--searchwithpeterdotinfo") == 0:
            redirect_url = "http://searchwithpeter.info/secretplans.html?q=" + urllib.quote(query[25:])
            return redirect(redirect_url)
        if query.find("--showmore") == 0:
            query = query[11:]
            search_query = query
            number_pages = 20
            number_videos = 10
        if query.find("--daverank") == 0:
            query = query[11:]
            search_query = query
            show_daverank = True

    if query.find("python") == 0:
        pyquery = query[7:]
    else:
        pyquery = query

    ddgurl_root = "http://duckduckgo.com/?q=python+"
    ddgurl_suffix = urllib.quote(pyquery) + "&format=json"

    response = urllib.urlopen(ddgurl_root + ddgurl_suffix)
    response_json = response.read()

    pythonterm = json.loads(response_json)

    if pythonterm:
        pyterm_info = {}
        if pythonterm["AbstractSource"] == "Python Documentation":
            pyterm = BeautifulSoup(pythonterm["AbstractText"])
            try:
                pyterm_code = pyterm.find("code").string
                pyterm.pre.decompose()
                pyterm_info["code"] = pyterm_code
            except:
                pyterm_info["code"] = None
            pyterm_desc = pyterm.get_text()
            pyterm_info["desc"] = pyterm_desc
            pyterm_info["url"] = pythonterm["AbstractURL"]
            results = True
    else:
        pyterm_info = None

    query_words = query.split()
    for word in query_words:
        if word in stopwords:
            query_words.remove(word)

    query_urls = []
    for term in query_words:
        # Get all SearchTerm objects that match the search_query.
        q = SearchTerm.all().filter("term =", term).get()
        if q:
            query_urls.append(set(q.urls))

    if query_urls:
        query_url_set = set.intersection(*query_urls)
        query_url_list = list(query_url_set)

        if len(query_url_list) > 0:
            results = True
        if len(query_url_list) > 30:
            query_url_list = query_url_list[0:30]

        page_results = Page.all().filter("url IN", query_url_list).order("-dave_rank").fetch(number_pages)
        page_dicts = []
        for page in page_results:
            page_info = {}
            query_index = page.text.find(query)
            if query_index != -1:
                i = page.text.find(" ", query_index - 25)
                excerpt_words = page.text[i:].split(" ")
                page_info["exact_match"] = True
            else:
                excerpt_words = page.text.split(" ")
                page_info["exact_match"] = False
            excerpt = " ".join(excerpt_words[:50])

            page_info["text"] = excerpt
            page_info["title"] = page.title
            page_info["url"] = page.url
            page_info["daverank"] = page.dave_rank
            page_info["doc"] = page.doc
            page_dicts.append(page_info)
        page_dicts.sort(key=itemgetter("exact_match"), reverse=True)

        video_results = Video.all().filter("url IN", query_url_list).order("-views").fetch(number_videos)
        video_dicts = []
        for video in video_results:
            video_info = {}
            subtitles = video.text.lower()
            query_index = subtitles.find(query)
            time_string = ""
            if query_index != -1:
                subtitle_list = subtitles.splitlines()
                for phrase in subtitle_list:
                    if phrase.find(query) != -1:
                        timestamp_index = subtitle_list.index(phrase) - 1
                        timestamp = subtitle_list[timestamp_index]
                        if len(timestamp) > 1:
                            minutes = timestamp[3:5]
                            seconds = timestamp[6:8]
                            time_string = "#t=" + minutes + "m" + seconds + "s"
                            start = 60 * int(minutes) + int(seconds)
            if time_string:
                url = video.url + time_string
                video_info["exact_match"] = True
            else:
                url = video.url
                start = 0
                video_info["exact_match"] = False
            video_info["title"] = video.title
            video_info["url"] = url
            video_info["subtitle"] = video.text[-20:query_index:20]
            video_info["id"] = video.id
            video_info["start"] = start
            video_dicts.append(video_info)
        video_dicts.sort(key=itemgetter("exact_match"), reverse=True)

    else:
        page_dicts = None
        video_dicts = None

    query_string_words = query.split()

    return template(
        "templates/results",
        search_query=search_query,
        query_string_words=query_string_words,
        page_dicts=page_dicts,
        video_dicts=video_dicts,
        pyterm_info=pyterm_info,
        show_daverank=show_daverank,
        results=results,
    )