def _parse_json_response(query, results): """ Parses GOV.uk's JSON response and returns as an ifind Response. Args: query (ifind Query): object encapsulating details of a search query. results : requests library response object containing search results. Returns: ifind Response: object encapsulating a search request's results. Usage: Private method. """ response = Response(query.terms, query) content = json.loads(results.text) # The base url - results do not provide a full link. base_url = "https://www.gov.uk" rank = 0 for result in content[u'results']: text = result.get(u'description', '') title = result[u'title'] url = base_url + result[u'link'] rank = rank + 1 # Add the result to the ifind response response.add_result(title=title, url=url, summary=text, rank=rank) if len(response) == query.top: break return response
def _parse_xml_response(query, results): """ Parses Wikipedia's XML response and returns as an ifind Response. Args: query (ifind Query): object encapsulating details of a search query. results : requests library response object containing search results. Returns: ifind Response: object encapsulating a search request's results. Usage: Private method. """ response = Response(query.terms) xml_doc = xml.dom.minidom.parseString(results.content) results = xml_doc.getElementsByTagName('Item') for result in results: title = result.getElementsByTagName('Text')[0].firstChild.data url = result.getElementsByTagName('Url')[0].firstChild.data summary = result.getElementsByTagName( 'Description')[0].firstChild.data response.add_result(title=title, url=url, summary=summary) return response
def _parse_json_response(query, results): """ Parses Pipl's JSON response and returns as an ifind Response. Args: query (ifind Query): object encapsulating details of a search query. results : requests library response object containing search results. Returns: ifind Response: object encapsulating a search request's results. Usage: Private method. """ response = Response(query.terms) content = json.loads(results.text) for record in content[u'records']: name = record[u'names'][0][u'display'] url = record[u'source'][u'url'] imageurl = None try: imageurl = record[u'images'][0][u'url'] except: pass summary = Pipl._build_summary(record) response.add_result(title=name, url=url, summary=summary, imageurl=imageurl) return response
def _parse_xml_response(query, results): """ Parses Wikipedia's XML response and returns as an ifind Response. Args: query (ifind Query): object encapsulating details of a search query. results : requests library response object containing search results. Returns: ifind Response: object encapsulating a search request's results. Usage: Private method. """ response = Response(query.terms) xml_doc = xml.dom.minidom.parseString(results.content) results = xml_doc.getElementsByTagName("Item") for result in results: title = result.getElementsByTagName("Text")[0].firstChild.data url = result.getElementsByTagName("Url")[0].firstChild.data summary = result.getElementsByTagName("Description")[0].firstChild.data response.add_result(title=title, url=url, summary=summary) return response
def _parse_json_response(query, results): """ Parses GOV.uk's JSON response and returns as an ifind Response. Args: query (ifind Query): object encapsulating details of a search query. results : requests library response object containing search results. Returns: ifind Response: object encapsulating a search request's results. Usage: Private method. """ response = Response(query.terms) content = json.loads(results.text) for result in content[u"results"]: text = result[u"details"][u"description"] title = result[u"title"] url = result[u"web_url"] response.add_result(title=title, url=url, summary=text) if len(response) == query.top: break return response
def _parse_json_response(query, results): """ Parses Facebook's JSON response and returns as an ifind Response. Args: query (ifind Query): object encapsulating details of a search query. results : requests library response object containing search results. Returns: ifind Response: object encapsulating a search request's results. Usage: Private method. """ response = Response(query.terms) content = json.loads(results.text) # Check to see if the response contains any API errors. Facebook._check_errors(content) if query.result_type == 'user' or not query.result_type: # Sample response # { # "data": [ # { # "name": "John Doe", # "id": "999999999999999" # }, # { # "name": "John Doe", # "id": "88888888888888" # } # ], # "paging": { # "next": "long_url" # } # } # The base URL is used to create the link to the profile, it will redirect to a permanent user URL. base_url = "https://www.facebook.com/app_scoped_user_id/" for user in content[u'data']: name = user[u'name'] tempid = user[u'id'] url = base_url + tempid + '/' text = '' img = "https://graph.facebook.com/{}/picture?type=normal".format( tempid) # Minimal information, probably need a second round of querying the API for each user to get something # for the snippet. Better way? response.add_result(title=name, url=url, summary=text, imageurl=img) # Implement the other search tpyes. return response
def _parse_json_response(self, query, results): """ Parses Neutrinogeoaddress's JSON response and returns as an ifind Response. Args: query (ifind Query): object encapsulating details of a search query. results : requests library response object containing search results. Returns: ifind Response: object encapsulating a search request's results. Usage: Private method. """ response = Response(query.terms, query) content = json.loads(results.text) # Results aren't paginated, no more to get. response.no_more_results = True url_base = 'https://www.google.co.uk/maps/place/' trans_table = maketrans(u' ', u'+') # Switch spaces with + for the google maps url locations = content.get(u'locations') if locations: # There are results present, iterate over them. for loc in locations: # Kwargs below address = loc.get(u'address', '') latitude = loc.get(u'latitude', '') longitude = loc.get(u'longitude', '') country = loc.get(u'country', '') country_code = loc.get(u'country-code', '') city = loc.get(u'city', '') postcode = loc.get(u'postal-code', '') # The iframe_url must be placed in an iframe in order to render the map. if self.google_api_key: iframe_url = self._build_iframe_url(address, trans_table) else: iframe_url = None url = url_base + encode_symbols(address.encode('utf-8').translate(trans_table)) text = Neutrinogeoaddress._build_summary(address, city, country, postcode, latitude, longitude) response.add_result(title=address, url=url, summary=text, imageurl=None, address=address, latitude=latitude, longitude=longitude, country=country, country_code=country_code, city=city, postcode=postcode, iframe_url=iframe_url) return response
def _parse_json_response(self, query, results): """ Parses Googleplus's JSON response and returns as an ifind Response. Args: query (ifind Query): object encapsulating details of a search query. results : requests library response object containing search results. Returns: ifind Response: object encapsulating a search request's results. Usage: Private method. """ response = Response(query.terms) content = json.loads(results.text) result_type = DEFAULT_RESULT_TYPE if query.result_type: result_type = query.result_type if result_type == 'people' or result_type == 'people+': for user in content[u'items']: name = user[u'displayName'] url = user[u'url'] imageurl = Googleplus._resize_image(user[u'image'][u'url']) # Check to see if the search results needs recusrively acquired person details. if result_type == 'people+': summary = self._build_person_summary(user[u'id']) else: summary = '' # Add the result to the response response.add_result(title=name, url=url, summary=summary, imageurl=imageurl) elif result_type == 'activities': for activity in content[u'items']: title = activity[u'verb'] + ' ' + activity[u'title'] url = activity[u'url'] summary = Googleplus._build_activity_summary(activity) imageurl = '' try: imageurl = Googleplus._resize_image(activity[u'image'][u'url']) except KeyError: pass # Add the result to the response. response.add_result(title=title, url=url, summary=summary, imageurl=imageurl) return response
def _parse_json_response(query, results): """ Parses Bing's JSON response and returns as an ifind Response. Args: query (ifind Query): object encapsulating details of a search query. results : requests library response object containing search results. Returns: ifind Response: object encapsulating a search request's results. Usage: Private method. """ response = Response(query.terms) content = json.loads(results.text) rank_counter = 1 if query.result_type == 'web' or not query.result_type: for result in content[u'd'][u'results'][0][u'Web']: response.add_result(title=result[u'Title'], url=result[u'Url'], summary=result[u'Description'], rank=rank_counter) #print result[u'Title'] #print rank_counter #print ' ' rank_counter += 1 if query.result_type == 'image': for result in content[u'd'][u'results'][0][u'Image']: file_size = str(int(result[u'FileSize']) / 1024) # in kilobytes width = result[u'Width'] height = result[u'Height'] media_url = result[u'MediaUrl'] thumb_url = result[u'Thumbnail'][u'MediaUrl'] response.add_result(file_size=file_size, width=width, height=height, media_url=media_url, thumb_url=thumb_url) if query.result_type == 'video': for result in content[u'd'][u'results'][0][u'Video']: run_time = Bing._get_video_length(int(result[u'RunTime'])) title = result[u'Title'] media_url = result[u'MediaUrl'] thumb_url = result.get(u'Thumbnail', {}).get(u'MediaUrl', None) if thumb_url is None: continue response.add_result(title=title, media_url=media_url, run_time=run_time, thumb_url=thumb_url) return response
def _parse_json_response(query, results): """ Parses Pipl's JSON response and returns as an ifind Response. Args: query (ifind Query): object encapsulating details of a search query. results : requests library response object containing search results. Returns: ifind Response: object encapsulating a search request's results. Usage: Private method. """ response = Response(query.terms, query) content = json.loads(results.text) # Pipl only returns 20 results, so there are no more. response.no_more_results = True for record in content[u'records']: try: name = record[u'names'][0][u'display'] except KeyError: name = record.get('source').get('url', "_no title_") url = record[u'source'][u'url'] imageurl = None try: imageurl = record[u'images'][0][u'url'] except: pass summary = Pipl._build_summary(record) # Kwargs below # Each keyword contains a list of (potentially empty) dictionary objects. usernames = record.get(u'usernames') addresses = record.get(u'addresses') relationships = record.get(u'relationships') jobs = record.get(u'jobs') educations = record.get(u'educations') tags = record.get(u'tags') response.add_result(title=name, url=url, summary=summary, imageurl=imageurl, usernames=usernames, addresses=addresses, relationships=relationships, jobs=jobs, educations=educations, tags=tags) return response
def _create_response(self, query): response = Response(query.terms) matches = ['one','two','three','four','five','six','seven','eight','nine','ten'] result_list = ['rand','rand','rand','rand','rand','rand','rand','rand','rand','rand'] matched = False if query.terms in matches: matched = True if matched: result_list = matches for x in result_list: response.add_result(x, 'www.'+x+'.com', x +' '+' ' + x) return response
def _parse_json_response(self, query, results): """ Parses Google Custom Search's JSON response and returns as an ifind Response. Args: query (ifind Query): object encapsulating details of a search query. results : requests library response object containing search results. Returns: ifind Response: object encapsulating a search request's results. Usage: Private method. """ response = Response(query.terms, query) content = json.loads(results.text) # The query object wasn't mutated earlier and the result type isn't passed to this function. # Check for a result_type or set it to default. result_type = query.result_type if not result_type: result_type = self.default_result_type # Check for a next page token. next_page_token = content.get(u'nextPageToken') if next_page_token: # A page token exists, create the URL which will fetch the next page response.next_page = "{}&pageToken={}".format(self._create_query_string(query), next_page_token) rank_counter = 1 if result_type == 'web' or not query.result_type: for result in content[u'items']: title = result[u'title'] url = result[u'link'] summary = result[u'snippet'] response.add_result(title=title, url=url, summary=summary, rank=rank_counter) rank_counter+=1 return response
def _parse_json_response(query, results): """ Parses Twitter's JSON response and returns as an ifind Response. Args: query (ifind Query): object encapsulating details of a search query. results : requests library response object containing search results. Returns: ifind Response: object encapsulating a search request's results. Usage: Private method. """ response = Response(query.terms) content = json.loads(results.text) for result in content[u'statuses']: text = result[u'text'] result_id = str(result[u'id']) user_id = result[u'user'][u'id_str'] # TODO clean this up created_at = result[u'created_at'].split() created_at.pop(4) created_at = ' '.join(created_at) url = 'https://www.twitter.com/{0}/status/{1}'.format(user_id, result_id) response.add_result(title=created_at, url=url, summary=text) if len(response) == query.top: break return response
def _create_response(self, query): response = Response(query.terms) matches = [ 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten' ] result_list = [ 'rand', 'rand', 'rand', 'rand', 'rand', 'rand', 'rand', 'rand', 'rand', 'rand' ] matched = False if query.terms in matches: matched = True if matched: result_list = matches for x in result_list: response.add_result(x, 'www.' + x + '.com', x + ' ' + ' ' + x) return response
def _parse_json_response(self, query, results): """ Parses Googleplus's JSON response and returns as an ifind Response. Args: query (ifind Query): object encapsulating details of a search query. results : requests library response object containing search results. Returns: ifind Response: object encapsulating a search request's results. Usage: Private method. """ response = Response(query.terms) content = json.loads(results.text) result_type = DEFAULT_RESULT_TYPE if query.result_type: result_type = query.result_type if result_type == 'people' or result_type == 'people+': for user in content[u'items']: name = user[u'displayName'] url = user[u'url'] imageurl = Googleplus._resize_image(user[u'image'][u'url']) # Check to see if the search results needs recusrively acquired person details. if result_type == 'people+': summary = self._build_person_summary(user[u'id']) else: summary = '' # Add the result to the response response.add_result(title=name, url=url, summary=summary, imageurl=imageurl) elif result_type == 'activities': for activity in content[u'items']: title = activity[u'verb'] + ' ' + activity[u'title'] url = activity[u'url'] summary = Googleplus._build_activity_summary(activity) imageurl = '' try: imageurl = Googleplus._resize_image( activity[u'image'][u'url']) except KeyError: pass # Add the result to the response. response.add_result(title=title, url=url, summary=summary, imageurl=imageurl) return response
def _parse_json_response(query, results): """ Parses GOV.uk's JSON response and returns as an ifind Response. Args: query (ifind Query): object encapsulating details of a search query. results : requests library response object containing search results. Returns: ifind Response: object encapsulating a search request's results. Usage: Private method. """ response = Response(query.terms) content = json.loads(results.text) # The base url - results do not provide a full link. base_url = "https://www.gov.uk" for result in content[u'results']: try: # Catch results with no description (they exist!) text = result[u'description'] except KeyError: text = '' title = result[u'title'] url = base_url + result[u'link'] response.add_result(title=title, url=url, summary=text) if len(response) == query.top: break return response
def _parse_json_response(query, results): """ Parses Bing's JSON response and returns as an ifind Response. Args: query (ifind Query): object encapsulating details of a search query. results : requests library response object containing search results. Returns: ifind Response: object encapsulating a search request's results. Usage: Private method. """ response = Response(query.terms) content = json.loads(results.text) rank_counter = 1 if query.result_type == 'web' or not query.result_type: for result in content[u'd'][u'results'][0][u'Web']: response.add_result(title=result[u'Title'], url=result[u'Url'], summary=result[u'Description'], rank=rank_counter) #print result[u'Title'] #print rank_counter #print ' ' rank_counter+=1 if query.result_type == 'image': for result in content[u'd'][u'results'][0][u'Image']: file_size = str(int(result[u'FileSize']) / 1024) # in kilobytes width = result[u'Width'] height = result[u'Height'] media_url = result[u'MediaUrl'] thumb_url = result[u'Thumbnail'][u'MediaUrl'] response.add_result(file_size=file_size, width=width, height=height, media_url=media_url, thumb_url=thumb_url) if query.result_type == 'video': for result in content[u'd'][u'results'][0][u'Video']: run_time = Bing._get_video_length(int(result[u'RunTime'])) title = result[u'Title'] media_url = result[u'MediaUrl'] thumb_url = result.get(u'Thumbnail', {}).get(u'MediaUrl', None) if thumb_url is None: continue response.add_result(title=title, media_url=media_url, run_time=run_time, thumb_url=thumb_url) return response
def _parse_json_response(self, query, results): """ Parses Twitter's JSON response and returns as an ifind Response. Args: query (ifind Query): object encapsulating details of a search query. results : requests library response object containing search results. Returns: ifind Response: object encapsulating a search request's results. Usage: Private method. """ response = Response(query.terms, query) content = json.loads(results.text) # Check to see if there are more results. next_results = content[u'search_metadata'].get(u'next_results', '') if next_results: # Create a dictionary from the string found in u'next_results' params = next_results[1:] params = params.split('&') for index in range(len(params)): params[index] = params[index].split('=') param_dic = {} # At this point params looks like: [['someparam', 'somevalue'], ['someparam', 'somevalue']....] for lis in params: param_dic[lis[0]] = lis[1] # Set the next page URL in the response. response.next_page = self._create_query_string(query, search_params=param_dic) else: # No more results, set the flag in the response response.no_more_results = True for result in content[u'statuses']: text = result[u'text'] result_id = str(result[u'id']) # User dictionary user = {'user_id': result[u'user'][u'id_str'], 'profile_image': result.get(u'user').get(u'profile_image_url'), 'geo_enabled': result.get(u'user').get(u'geo_enabled'), 'description': result.get(u'user').get(u'description'), 'follower_count': result.get(u'user').get(u'followers_count'), 'protected': result.get(u'user').get(u'protected'), 'location': result.get(u'user').get(u'location'), 'utc_offset': result.get(u'user').get(u'utc_offset'), 'time_zone': result.get(u'user').get(u'time_zone'), 'name': result.get(u'user').get(u'name'), 'screen_name': result.get(u'user').get(u'screen_name'), 'member_since': result.get(u'user').get(u'created_at') } # TODO clean this up stamp = result[u'created_at'].split() # Created at in format: '01 Jan, 2014 @ 20:23' created_at = "{} {}, {} @ {}".format(stamp[2], stamp[1], stamp[5], stamp[3][:-3]) url = 'https://www.twitter.com/{0}/status/{1}'.format(user['user_id'], result_id) imageurl = user.get('profile_image') title = u"{} ({}) - {}".format(user['name'], user['screen_name'], created_at) # Kwargs below source = result.get(u'source') coordinates = result.get(u'coordinates') place = result.get(u'place') hashtags= result.get(u'entities').get(u'hashtags') user_info = user reply_to_screen_name = result.get(u'in_reply_to_screen_name') reply_to_userid = result.get(u'in_reply_to_user_id_str') reply_to_status = result.get(u'in_reply_to_status_id_str') tweet_id = result_id # List of links in the tweet. Each item in the list is a dictionary with keys: # u'url, u'indices', u'expanded_url, u'display_url' links = result.get(u'entities').get(u'urls') # List of media items in the tweet. Each item in the list is a dictionary with keys: # u'expanded_url', u'sizes', u'url', u'media_url_https', # u'id_str', u'indices', u'media_url', u'type', u'id', u'display_url' media = result.get(u'entities').get(u'media') # List of users mentioned in the tweet. Each item in the list is a dictionary with keys: # u'indices', 'u'screen_name', u'PSG_inside', u'id', u'name', u'id_str' user_mentions = result.get(u'entities').get(u'user_mentions') response.add_result(title=title, url=url, summary=text, imageurl=imageurl, stamp=stamp, user_info=user_info, media=media, links=links, user_mentions=user_mentions, source=source, coordinates=coordinates, place=place, hashtags=hashtags, reply_to_screen_name=reply_to_screen_name, reply_to_status=reply_to_status, reply_to_userid=reply_to_userid, tweet_id=tweet_id) if len(response) == query.top: break return response
def _parse_whoosh_response(query, results): """ Parses Whoosh's response and returns as an ifind Response. Args: query (ifind Query): object encapsulating details of a search query. results : requests library response object containing search results. Returns: ifind Response: object encapsulating a search request's results. Usage: Private method. """ response = Response(query.terms) # Dmax thinks this line is incorrect. # I've substituted it with a line just before returning the response... #response.result_total = results.pagecount r = 0 for result in results: r = r + 1 title = result["title"] if title: title = title.strip() else: title = "Untitled" rank = ((int(results.pagenum)-1) * results.pagelen) + r url = "/treconomics/" + str(result.docnum) summary = result.highlights("content") trecid = result["docid"] trecid = trecid.strip() #score = result["score"] source = result["source"] response.add_result(title=title, url=url, summary=summary, docid=trecid, source=source, rank=rank, whooshid=result.docnum, score=result.score) #if len(response) == query.top: # break # Dmax has added this line as a replacement for the one commented out above. response.result_total = len(results) # Add the total number of pages from the results object as an attribute of our response object. # We also add the total number of results shown on the page. setattr(response, 'total_pages', results.pagecount) setattr(response, 'results_on_page', results.pagelen) setattr(response, 'actual_page', results.actual_page) return response
def _request(self, query): """ Issues a single request to Whoosh Index and returns the result as an ifind Response. Args: query (ifind Query): object encapsulating details of a search query. Returns: ifind Response: object encapsulating a search request's results. Raises: EngineException Usage: Private method. """ #try: query_terms = self.parser.parse(unicode(query.terms)) page = query.skip pagelen = query.top with self.docIndex.searcher(weighting=self.scoring_model) as searcher: #invalid_page_no = True cache_key = self.get_cache_key(page, query.terms) if self.use_cache and self.cache.exists(cache_key): return_response = self.cache.get(cache_key) return_response = pickle.loads(return_response) print "WhooshTRECNewsEngine found CACHED results" else: results = searcher.search_page(query_terms, page, pagelen=(FORWARD_LOOK_PAGES * pagelen)) results.fragmenter = highlight.ContextFragmenter(maxchars=3000, surround=3000) results.formatter = highlight.HtmlFormatter() results.fragmenter.charlimit = 100000 setattr(results, 'actual_page', page) ifind_response = self._parse_whoosh_response(query, results) interleaved_results = self.__interleave_results(ifind_response.results, pagelen) split_results = self.__split_results(query, interleaved_results) page_counter = page return_response = Response(query.terms) for page_list in split_results: response = Response(query.terms) for hit in page_list: response.add_result_object(hit) response.pagenum = results.pagenum response.total_pages = results.pagecount response.results_on_page = len(page_list) response.actual_page = page_counter loop_cache_key = self.get_cache_key(page_counter, query.terms) if self.use_cache and not self.cache.exists(loop_cache_key): response_str = pickle.dumps(response) self.cache.set(loop_cache_key, response_str) if page_counter == page: return_response = response #print "WhooshTRECNewsEngine found: " + str(len(results)) + " results for query: " + query.terms #print "Page %d of %d - PageLength of %d" % (results.pagenum, results.pagecount, results.pagelen) page_counter = page_counter + 1 """ # If the user specifies a page number that's higher than the number of pages available, # this loop looks until a page number is found that contains results and uses that instead. # Prevents a horrible AttributeError exception later on! while invalid_page_no: try: results = searcher.search_page(query_terms, page, pagelen) invalid_page_no = False setattr(results, 'actual_page', page) except ValueError: page -= page results.fragmenter = highlight.ContextFragmenter(maxchars=300, surround=300) results.formatter = highlight.HtmlFormatter() results.fragmenter.charlimit = 100000 print "WhooshTRECNewsEngine found: " + str(len(results)) + " results for query: " + query.terms print "Page %d of %d - PageLength of %d" % (results.pagenum, results.pagecount, results.pagelen) response = self._parse_whoosh_response(query, results) """ #except: # print "Error in Search Service: Whoosh TREC News search failed" return return_response
def parse_response(reader, fieldname, analyzer, fragmenter, formatter, query, results, results_are_page=False): """ Returns an ifind Response, given a query and set of results from Whoosh/Redis. Takes an ifind Query object and a list of SORTED results for the given query. If the page requested (query.skip) is < 0, page 1 is returned. If the page requested is greater than the number of available pages, the last page is returned. """ def get_term_list(): if isinstance(query.parsed_terms, unicode): return [query.parsed_terms] return [text for term_fieldname, text in query.parsed_terms.all_terms() if fieldname == fieldname] response = Response(query.terms) response.results_total = len(results) if results_are_page: page = results[0] response.total_pages = results[1] results = results[2] else: page, response.total_pages, results = get_page(query, results) page_len = query.top i = 0 for result in results: i = i + 1 rank = (page - 1) * page_len + i whoosh_docnum = result[0] score = result[1] stored_data = reader.stored_fields(whoosh_docnum) title = stored_data["title"] if title: title = title.strip() else: title = "Untitled Document" url = "/treconomics/{0}/".format(whoosh_docnum) trecid = stored_data["docid"].strip() source = stored_data["source"].strip() summary = highlight(stored_data["content"], get_term_list(), analyzer, fragmenter, formatter) summary = "{0}...".format(summary) response.add_result( title=title, url=url, summary=summary, docid=trecid, source=source, rank=rank, whooshid=whoosh_docnum, score=score, ) # The following two lines are for compatibility purposes with the existing codebase. # Would really like to take these out. setattr(response, "results_on_page", len(results)) setattr(response, "actual_page", page) return response
def _parse_whoosh_response(query, search_page, field, fragmenter, snippet_size): """ Parses Whoosh's response and returns as an ifind Response. Args: query (ifind Query): object encapsulating details of a search query. results : requests library response object containing search results. Returns: ifind Response: object encapsulating a search request's results. Usage: Private method. """ response = Response(query.terms) r = 0 search_page.results.fragmenter = fragmenter for result in search_page: title = result["title"] if title: title = title.strip() else: title = "Untitled" if title == '': title = "Untitled" rank = result.rank + 1 url = "/treconomics/" + str(result.docnum) summary = result.highlights(field,top=snippet_size) content = result[field] trecid = result["docid"] trecid = trecid.strip() source = result["source"] response.add_result(title=title, url=url, summary=summary, docid=trecid, source=source, rank=rank, whooshid=result.docnum, score=result.score, content=content) response.result_total = len(search_page) # Add the total number of pages from the results object as an attribute of our response object. # We also add the total number of results shown on the page. setattr(response, 'total_pages', search_page.pagecount) setattr(response, 'results_on_page', search_page.pagelen) setattr(response, 'actual_page', search_page.actual_page) return response
def parse_response(reader, fieldname, analyzer, fragmenter, formatter, query, results, results_are_page=False): """ Returns an ifind Response, given a query and set of results from Whoosh/Redis. Takes an ifind Query object and a list of SORTED results for the given query. If the page requested (query.skip) is < 0, page 1 is returned. If the page requested is greater than the number of available pages, the last page is returned. """ def get_term_list(): if isinstance(query.parsed_terms, unicode): return [query.parsed_terms] return [ text for term_fieldname, text in query.parsed_terms.all_terms() if fieldname == fieldname ] response = Response(query.terms) response.results_total = len(results) if results_are_page: page = results[0] response.total_pages = results[1] results = results[2] else: page, response.total_pages, results = get_page(query, results) page_len = query.top i = 0 for result in results: i = i + 1 rank = (page - 1) * page_len + i whoosh_docnum = result[0] score = result[1] stored_data = reader.stored_fields(whoosh_docnum) title = stored_data['title'] if title: title = title.strip() else: title = "Untitled Document" url = "/treconomics/{0}/".format(whoosh_docnum) trecid = stored_data['docid'].strip() source = stored_data['source'].strip() summary = highlight(stored_data['content'], get_term_list(), analyzer, fragmenter, formatter) summary = "{0}...".format(summary) response.add_result(title=title, url=url, summary=summary, docid=trecid, source=source, rank=rank, whooshid=whoosh_docnum, score=score) # The following two lines are for compatibility purposes with the existing codebase. # Would really like to take these out. setattr(response, 'results_on_page', len(results)) setattr(response, 'actual_page', page) return response
def _parse_json_response(self, query, results): """ Parses Googleplus's JSON response and returns as an ifind Response. Args: query (ifind Query): object encapsulating details of a search query. results : requests library response object containing search results. Returns: ifind Response: object encapsulating a search request's results. Usage: Private method. """ response = Response(query.terms, query) content = json.loads(results.text) # The query object wasn't mutated earlier and the result type isn't passed to this function. # Check for a result_type or set it to default. result_type = query.result_type if not result_type: result_type = self.default_result_type # Check for a next page token. next_page_token = content.get(u'nextPageToken') if next_page_token: # A page token exists, create the URL which will fetch the next page response.next_page = "{}&pageToken={}".format(self._create_query_string(query), next_page_token) if result_type == 'people': # Build the ifind response for a people search for user in content[u'items']: name = user[u'displayName'] url = user[u'url'] imageurl = Googleplus._resize_image(user[u'image'][u'url']) summary = '' # Kwargs id = user[u'id'] # Add the result to the response response.add_result(title=name, url=url, summary=summary, imageurl=imageurl, id=id) elif result_type == 'activities': # Build the ifind response for an activity search for activity in content[u'items']: # The three dictionaries below are passed as keyword arguments to the result object activity_dict = { 'url': activity.get(u'url'), 'verb': activity.get(u'verb'), 'title': activity.get(u'title'), 'published': activity.get(u'published'), 'updated': activity.get(u'updated'), 'kind': activity.get(u'kind'), 'id': activity.get(u'id') } actor_dict = { 'display_name': activity.get(u'actor').get(u'displayName'), 'url': activity.get(u'actor').get(u'url'), 'image': activity.get(u'actor').get(u'image').get(u'url'), 'id': activity.get(u'actor').get(u'id') } object_dict = { 'type': activity.get(u'object').get(u'objectType'), 'content': activity.get(u'object').get(u'content').encode('utf-8'), 'url': activity.get(u'object').get(u'url'), } title = u"{} ({})".format(activity_dict.get('title'), activity_dict.get('verb')) url = activity_dict.get('url') summary = Googleplus._build_activity_summary(activity) imageurl = Googleplus._resize_image(actor_dict.get('image')) # Attachments is a list of dictionaries with keys: # u'objectType', u'displayName', u'content', u'url' and potentially nested dictionaries, # such as u'embed', u'image', u'thumbnails (list of dicts). attachments = activity.get(u'object').get(u'attachments') # Add the result to the response. response.add_result(title=title, url=url, summary=summary, imageurl=imageurl, actor=actor_dict, object=object_dict, activity=activity_dict, attachments=attachments) elif result_type == 'person_lookup': # Build the ifind response for a person lookup. No loop as the content is for a single person. title = content[u'displayName'] url = content[u'url'] imageurl = content[u'image'][u'url'] summary = Googleplus._build_person_summary(content) about_me = content.get(u'aboutMe') occupation = content.get(u'occupation') verified = content.get(u'verified') circled_count = content.get(u'circledByCount') is_plus_user = content.get(u'isPlusUser') birthday = content.get(u'birthday') bragging_rights = content.get(u'braggingRights') emails = content.get(u'emails') skills = content.get(u'skills') relationship_status = content.get(u'relationshipStatus') places_lived = content.get(u'placesLived') organizations = content.get(u'organizations') tagline = content.get(u'tagline') # Kwargs below person = {'about_me': about_me, 'occupation':occupation, 'verified': verified, 'emails': emails, 'circled_count': circled_count, 'is_plus_user': is_plus_user, 'birthday': birthday, 'bragging_rights': bragging_rights, 'skills': skills, 'relationship_status': relationship_status, 'places_lived': places_lived, 'organizations': organizations, 'tagline': tagline } # Add the result to the response. response.add_result(title=title, url=url, summary=summary, imageurl=imageurl, person=person) return response
def _parse_json_response(self, query, results): """ Parses Companycheck's JSON response and returns as an ifind Response. Args: query (ifind Query): object encapsulating details of a search query. results : requests library response object containing search results. Returns: ifind Response: object encapsulating a search request's results. Usage: Private method. """ response = Response(query.terms, query) content = json.loads(results.text) # The base URL to add the director or company number to, which provides the complete link. url_base = 'http://companycheck.co.uk/' # Since the object isn't mutated, set the default again if there is nothing present. result_type = query.result_type if not result_type: result_type = self.default_result_type # CompanyCheck returns all the results it has, it has no further results. response.no_more_results = True if result_type == 'company' or not result_type: # Create the ifind response for company searches for company in content: name = company[u'name'] url = url_base + 'company/' + str(company[u'number']) imageurl = None summary = Companycheck._build_company_summary(company) # Keyword args below number = company[u'number'] country = company[u'country'] address = company[u'address'] sic = company[u'sic'] status = company[u'status'] # Add result object to the response response.add_result(title=name, url=url, summary=summary, imageurl=imageurl, number=number, country=country, address=address, sic=sic, status=status) elif result_type == 'director': # Create the ifind response for director searches for director in content: name = director[u'name'] url = url_base + 'director/' + str(director[u'number']) imageurl = None sum_dic = Companycheck._build_director_summary(director) summary = sum_dic.get('summary') # Keyword args below postcodes = sum_dic.get('postcode_list') number = director[u'number'] # Add result object to the response response.add_result(title=name, url=url, summary=summary, imageurl=imageurl, postcodes=postcodes, number=number) return response
def _parse_json_response(query, results): """ Parses Twitter's JSON response and returns as an ifind Response. Args: query (ifind Query): object encapsulating details of a search query. results : requests library response object containing search results. Returns: ifind Response: object encapsulating a search request's results. Usage: Private method. """ response = Response(query.terms) content = json.loads(results.text) for result in content[u'statuses']: text = result[u'text'] result_id = str(result[u'id']) # User dictionary user = {'user_id': result[u'user'][u'id_str'], 'profile_image': result.get(u'user').get(u'profile_image_url'), 'geo_enabled': result.get(u'user').get(u'geo_enabled'), 'description': result.get(u'user').get(u'description'), 'follower_count': result.get(u'user').get(u'followers_count'), 'protected': result.get(u'user').get(u'protected'), 'location': result.get(u'user').get(u'location'), 'utc_offset': result.get(u'user').get(u'utc_offset'), 'time_zone': result.get(u'user').get(u'time_zone'), 'name': result.get(u'user').get(u'name'), 'screen_name': result.get(u'user').get(u'screen_name'), 'member_since': result.get(u'user').get(u'created_at') } # TODO clean this up stamp = result[u'created_at'].split() # Created at in format: '01 Jan, 2014 @ 20:23' created_at = "{} {}, {} @ {}".format(stamp[2], stamp[1], stamp[5], stamp[3][:-3]) url = 'https://www.twitter.com/{0}/status/{1}'.format(user['user_id'], result_id) imageurl = user.get('profile_image') title = u"{} ({}) - {}".format(user['name'], user['screen_name'], created_at) # Kwargs below source = result.get(u'source') coordinates = result.get(u'coordinates') place = result.get(u'place') hashtags= result.get(u'entities').get(u'hashtags') user_info = user reply_to_screen_name = result.get(u'in_reply_to_screen_name') reply_to_userid = result.get(u'in_reply_to_user_id_str') reply_to_status = result.get(u'in_reply_to_status_id_str') # List of links in the tweet. Each item in the list is a dictionary with keys: # u'url, u'indices', u'expanded_url, u'display_url' links = result.get(u'entities').get(u'urls') # List of media items in the tweet. Each item in the list is a dictionary with keys: # u'expanded_url', u'sizes', u'url', u'media_url_https', # u'id_str', u'indices', u'media_url', u'type', u'id', u'display_url' media = result.get(u'entities').get(u'media') # List of users mentioned in the tweet. Each item in the list is a dictionary with keys: # u'indices', 'u'screen_name', u'PSG_inside', u'id', u'name', u'id_str' user_mentions = result.get(u'entities').get(u'user_mentions') response.add_result(title=title, url=url, summary=text, imageurl=imageurl, stamp=stamp, user_info=user_info, media=media, links=links, user_mentions=user_mentions, source=source, coordinates=coordinates, place=place, hashtags=hashtags, reply_to_screen_name=reply_to_screen_name, reply_to_status=reply_to_status, reply_to_userid=reply_to_userid) if len(response) == query.top: break return response
def _parse_json_response(query, results): """ Parses Companycheck's JSON response and returns as an ifind Response. Args: query (ifind Query): object encapsulating details of a search query. results : requests library response object containing search results. Returns: ifind Response: object encapsulating a search request's results. Usage: Private method. """ response = Response(query.terms) content = json.loads(results.text) url_base = 'http://companycheck.co.uk/' if query.result_type: result_type = query.result_type else: result_type = DEFAULT_RESULT_TYPE if result_type == 'company' or not result_type: for company in content: name = company[u'name'] url = url_base + 'company/' + str(company[u'number']) imageurl = None summary = Companycheck._build_company_summary(company) # Keyword args below number = company[u'number'] country = company[u'country'] address = company[u'address'] sic = company[u'sic'] status = company[u'status'] # Add result object to the response response.add_result(title=name, url=url, summary=summary, imageurl=imageurl, number=number, country=country, address=address, sic=sic, status=status) elif result_type == 'director': for director in content: name = director[u'name'] url = url_base + 'director/' + str(director[u'number']) imageurl = None sum_dic = Companycheck._build_director_summary(director) summary = sum_dic.get('summary') postcodes = sum_dic.get('postcode_list') number = director[u'number'] response.add_result(title=name, url=url, summary=summary, imageurl=imageurl, postcodes=postcodes, number=number) return response