Пример #1
0
 def parse_whoosh_trec(site, query, results):
     response = Response()
     response.version = 'trec'
     response.feed.setdefault('title', "{0}: {1}".format(site, query))
     response.feed.setdefault('link','')
     response.feed.setdefault('description',"Search results for '{0}' at {1}".format(query, site))
     response.namespaces.setdefault("opensearch", "http://a9.com/-/spec/opensearch/1.1/")
     response.feed.setdefault("opensearch_totalresults", results.pagecount )
     response.feed.setdefault("opensearch_itemsperpage", pagelen)
     response.feed.setdefault("opensearch_startindex", results.pagenum)
     response.feed.setdefault('query', query)
     try:
         r = 0
         if len(results)>1:
             for hit in results:
                 r = r + 1
                 title = hit["title"]
                 title = title.strip()
                 if len(title) < 1:
                     title = query
                 rank = ((int(results.pagenum)-1) * results.pagelen) + r
                 link = "/treconomics/" + str(hit.docnum) + "?rank="+str(rank)
                 desc = hit.highlights("content")
                 docid = hit["docid"]
                 docid = docid.strip()
                 source = hit["source"]
                 response.entries.append({'title': title, 'link': link, 'summary': desc, 'docid': docid ,'source': source})
         else:
             print "No hits found for query: " + query
     except Exception, e:
         print "Converting results to OpenSearch Failed"
Пример #2
0
 def parse_yahoo_json(self, site, query, results):
   """Create a OpenSearch Response from Solr/Lucene results.
   
   We choose to ask for results in JSON format. This function simply loads the JSON into memory and creates an equivalent representation that is OpenSearch compliant.
   
   Parameters:
   
   * site (str): search engine name
   * query (str): query search terms (n.b. not a OpenSearch Query object)
   * results (dict): results from service
   
   Returns:
   
   * OpenSearch.Response
   
   """
   response = Response()
   response.version = 'json'
   response.feed.setdefault('title', "{0}: {1}".format(site, query))
   response.feed.setdefault('link', results['link'])
   response.feed.setdefault('description', "Search results for '{0}' at {1}".format(query, site))
   
   response.feed.setdefault('total_results', results['numFound'])
   response.feed.setdefault('start', results['start'])
   
   for result in results['docs']:
     response.entries.append({"title": result['title'][0], "link":result['attr_stream_name'][0], "summary":result['attr_content'][0]})
   
   return response
Пример #3
0
 def parse_google_json(site, url, query, num_results, results):
   """Create a OpenSearch Response from Google AJAX Search results.
   
   Google's search API returns results in JSON format. This function loads the JSON into memory and creates an equivalent representation that is OpenSearch compliant.
   
   Parameters:
   
   * site (str): search engine name
   * url (str): search url used
   * query (str): query search terms (n.b. not a OpenSearch Query object)
   * num_results (int): number of desired results
   * results (dict): results from service
   
   Returns:
   
   * results (puppy.model.Response)
   
   """
   response = Response()
   response.version = 'json'
   response.feed.setdefault('title', "{0}: {1}".format(site, query))
   response.feed.setdefault('link',results['cursor']['moreResultsUrl'])
   response.feed.setdefault('description',"Search results for '{0}' at {1}".format(query, site))
   try:
     response.feed.setdefault('opensearch_totalresults',results['cursor']['estimatedResultCount'])
     response.feed.setdefault('opensearch_startindex', results['cursor']['currentPageIndex'])
   except KeyError:
     response.feed.setdefault('opensearch_totalresults',0)
     response.feed.setdefault('opensearch_startindex', 0)
   
   for result in results['results']:
     response.entries.append( { 'title':result['title'],'link':result['url'], 'summary':result['content'] })
   
   response.feed.setdefault('opensearch_itemsperpage',len(response.entries))   
   return response
Пример #4
0
 def parse_whoosh_trec(site, query, results):
     response = Response()
     response.version = 'trec'
     response.feed.setdefault('title', "{0}: {1}".format(site, query))
     response.feed.setdefault('link','')
     response.feed.setdefault('description',"Search results for '{0}' at {1}".format(query, site))
     response.namespaces.setdefault("opensearch", "http://a9.com/-/spec/opensearch/1.1/")
     response.feed.setdefault("opensearch_totalresults", len(results) )
     response.feed.setdefault("opensearch_itemsperpage", len(results))
     response.feed.setdefault("opensearch_startindex", 1)
     response.feed.setdefault('query', query)
     try:
         if len(results)>1:
             resultNum = 1
             for hit in results:
                 if resultNum > self.resultsPerPage:
                     break
                 title = hit["title"]
                 link = "?query=" + title.replace(' ','+')
                 desc = hit.highlights("content")                        
                 response.entries.append({'title': title, 'link': link, 'summary': desc })
                 resultNum += 1
             else:
                 print "No hits found for query: " + query
     except Exception, e:
         print "Converting results to OpenSearch Failed"
Пример #5
0
        def parse_whoosh_trec(site, query, results):
            response = Response()
            response.version = 'trec'
            response.feed.setdefault('title', "{0}: {1}".format(site, query))
            response.feed.setdefault('link','')
            response.feed.setdefault('description',"Search results for '{0}' at {1}".format(query, site))
            response.namespaces.setdefault("opensearch", "http://a9.com/-/spec/opensearch/1.1/")
            response.feed.setdefault("opensearch_totalresults", len(results) )
            response.feed.setdefault("opensearch_itemsperpage", len(results))
            response.feed.setdefault("opensearch_startindex", 1)
            response.feed.setdefault('query', query)
            
            try:
		duplicates=set()
		buff=""
                if len(results)>1:
                    resultNum = 1
                    for hit in results:
                        if resultNum > self.resultsPerPage:
                            break
                        
                        desc = hit.highlights("content")   
                        desc = desc.split("\t")[0]
                        
                        if desc not in duplicates and query.lower() != desc.lower() and desc !=(buff+"?"):
                            response.entries.append({'title': desc, 'link': '', 'summary': desc })
                            resultNum += 1
                            duplicates.add(desc)
			buff =desc
                else:
                        print "No hits found for query: " + query
            except Exception, e:
                print "Converting results to OpenSearch Failed"
Пример #6
0
 def create_echo_response(query, offset):
   """Create a Response from the query.
   
   The response repeats the terms of the query - only useful for debugging purposes.
   
   Parameters:
   
   * query (str): query search terms (n.b. not a OpenSearch Query object)
   * offset (int): result offset
   
   Returns:
   
   * results (puppy.model.Response)
   
   """
   
   response = Response()
   response.version = 'test'
   response.feed.setdefault('title', "EchoSearch")
   response.feed.setdefault('link', "www.puppyIR.eu")
   response.feed.setdefault('description', "Search engine for testing purposes")
   response.feed.setdefault('query', query.search_terms)
   
   query_list = query.search_terms.split()
   for term in query_list:
     response.entries.append({
       'title': term,
       'link': "http://www."+term+".com", 
       'summary': term
     })
   return response
Пример #7
0
        def parse_bing_xml_response(site, query, results, numResults=10, offset=0):

            xmlSoup = BeautifulSoup(results)

            response = Response()
            response.version = 'xml'
            response.feed.setdefault('title', "{0}: {1}".format(site, query))
            response.feed.setdefault('description', "Search results for {0} at {1}".format(query, site))
            response.feed.setdefault('link', '')
            response.namespaces.setdefault('opensearch', 'http://a9.com/-/spec/opensearch/1.1/')

            resultCount = 0
            resultsRetrieved = 0
            for r in xmlSoup.findAll('entry'):
                if (resultCount >= offset) and (resultCount < (numResults+offset)):
                    xmlTitleData =  r.find('d:title').string
                    xmlURLData =  r.find('d:url').string
                    xmlDescriptionData = r.find('d:description').string
                    response.entries.append({'title': xmlTitleData, 'link': xmlURLData, 'summary': xmlDescriptionData})
                    resultsRetrieved += 1
                resultCount += 1

            response.feed.setdefault('opensearch_totalresults', resultCount)
            response.feed.setdefault('opensearch_startindex', offset)
            response.feed.setdefault('opensearch_itemsperpage', resultsRetrieved)

            return response
Пример #8
0
    def parse_geocode_json(site, url, query, results):
      """Create a OpenSearch Response from Google Geoode results results.
      
      Google's Geocode search API returns results in JSON format. This function simply loads the JSON into memory and creates an equivalent representation that is OpenSearch compliant.
      
      Parameters:
      
      * site (str): search engine name
      * url (str): the url for the results that were retrieved to use as the OpenSearch link for the response
      * query (str): query search terms (n.b. not a OpenSearch Query object)
      * results (dict): results from service
      
      Returns:
      
      * puppy.model.OpenSearch.Response
      
      """
      response = Response()
      response.version = 'json'
      response.feed.setdefault('title', "{0}: {1}".format(site, query))
      response.feed.setdefault('link', url)
      response.feed.setdefault('description', "Search results for '{0}' at {1}".format(query, site))
      response.namespaces.setdefault("opensearch", "http://a9.com/-/spec/opensearch/1.1/")
      response.feed.setdefault("opensearch_startindex", 0)
      
      for result in results:
        try:
          resultDict ={}
          resultDict['title'] = result['formatted_address']
          longTitle = ''
          for component in result['address_components']:
            longTitle += (component['long_name'] + ', ')
          resultDict['link'] = ''
          resultDict['longTitle'] = longTitle[:len(longTitle)-2]
          resultDict['lat'] = result['geometry']['location']['lat']
          resultDict['lon'] = result['geometry']['location']['lng']
       
          if 'bounds' in result['geometry']:
            resultDict['neBorderLat'] = result['geometry']['bounds']['northeast']['lat']
            resultDict['neBorderLon'] = result['geometry']['bounds']['northeast']['lng']
            resultDict['swBorderLat'] = result['geometry']['bounds']['southwest']['lat']
            resultDict['swBorderLon'] = result['geometry']['bounds']['southwest']['lng']
            resultDict['distanceAcross'] = self.calcDistance(resultDict['neBorderLat'], resultDict['swBorderLat'], resultDict['neBorderLon'], resultDict['swBorderLon'])
            resultDict['summary'] = "{0} is found at: Latitude: {1}, Longitude: {2}. The area it covers is {3}km across (between the NE and SW corners).".format(resultDict['title'], resultDict['lat'], resultDict['lon'], resultDict['distanceAcross'])
          else:
            resultDict['summary'] = "{0} is found at: Latitude: {1}, Longitude: {2}.".format(resultDict['title'], resultDict['lat'], resultDict['lon'])
          response.entries.append(resultDict)

        # If there is an arithmetic error pass on the result but note it for the user and the result in question
        except ArithmeticError, e:
          note =  "Arithmetic Error occured when calculating the distance across for a result."
          print "An {0}\nResult: {1}\n\n".format(note, result)
          continue
        except Exception, e:
          print "Skipping a result due to: {0} \nWhen parsing a result from: {1}\n".format(e, url)
          continue
Пример #9
0
        def parse_guardian_json(site, url, query, results):
            """Create a OpenSearch Response from Guardian results.
      
      Guardians's search API returns results in JSON format. This function simply loads the JSON into memory and creates an equivalent representation that is OpenSearch compliant.
      
      Parameters:
      
      * site (str): search engine name
      * url (str): the url for the results that were retrieved to use as the OpenSearch link for the response
      * query (str): query search terms (n.b. not a OpenSearch Query object)
      * results (dict): results from service
      
      Returns:
      
      * puppy.model.OpenSearch.Response
      
      """
            response = Response()
            response.version = 'json'
            response.feed.setdefault('title', "{0}: {1}".format(site, query))
            response.feed.setdefault('link', url)
            response.feed.setdefault(
                'description',
                "Search results for '{0}' at {1}".format(query, site))
            response.namespaces.setdefault(
                "opensearch", "http://a9.com/-/spec/opensearch/1.1/")
            try:
                response.feed.setdefault("opensearch_totalresults",
                                         int(results['total']))
                response.feed.setdefault("opensearch_itemsperpage",
                                         int(results['pageSize']))
                response.feed.setdefault("opensearch_startindex",
                                         int(results['startIndex']))
            except KeyError:
                response.feed.setdefault("opensearch_totalresults", 0)
                response.feed.setdefault("opensearch_itemsperpage", 0)
                response.feed.setdefault("opensearch_startindex", 0)

            for result in results['results']:
                try:
                    response.entries.append({
                        'title':
                        result['webTitle'],
                        'link':
                        result['webUrl'],
                        'summary':
                        result['fields']['standfirst']
                    })
                except Exception, e:  # If there is a parsing problem, print out an error and just skip this individual result
                    print "Skipping a result due to: {0} \nWhen parsing a result from: {1}\n".format(
                        e, url)
                    continue
Пример #10
0
    def parse_soundcloud_json(site, query, results, url, offset):
      """      
      SoundCloud's search API returns results in JSON format. This function simply loads the JSON into memory and creates an equivalent representation that is OpenSearch compliant.
      
      Parameters:
      
      * site (str): search engine name
      * query (str): query search terms (n.b. not a OpenSearch Query object)
      * results (dict): results from service
      * url (str): the url for the results that were retrieved to use as the OpenSearch link for the response
      * offset (int): which page of results we are retrieving
      
      Returns:
      
      * puppy.model.OpenSearch.Response
      
      """
      response = Response()
      response.version = 'json'
      response.feed.setdefault('title', "{0}: {1}".format(site, query))
      response.feed.setdefault('link', url)
      response.feed.setdefault('description', "Search results for '{0}' at {1}".format(query, site))
      response.namespaces.setdefault("opensearch", "http://a9.com/-/spec/opensearch/1.1/")
      
      try:
        response.feed.setdefault("opensearch_itemsperpage", self.resultsPerPage)
        response.feed.setdefault("opensearch_totalresults", int(len(results)))
        response.feed.setdefault("opensearch_startindex", 0)
      except KeyError:
        response.feed.setdefault("opensearch_totalresults", 0)
        response.feed.setdefault("opensearch_itemsperpage", 0)
        response.feed.setdefault("opensearch_startindex", 0)
      
      # There is no pagination as a parameter, all results are simple returned in one, so this mimics pagination
      startIndex = offset * self.resultsPerPage

      if (startIndex + self.resultsPerPage) > len(results):
        endIndex = len(results)
      else:
        endIndex = startIndex + self.resultsPerPage

      # Go through a subset of the results and grab them - corresponding to the page in question
      for i in range(startIndex, endIndex):
        try:
          result_dict = results[i]
          result_dict['summary'] = results[i]['description']
          result_dict['link'] = results[i]['permalink_url']
          result_dict['artist'] = results[i]['user']['username']
          response.entries.append(result_dict)
        except Exception, e:
          print "Skipping a result due to: {0} \nWhen parsing a result from: {1}\n".format(e, url)
          continue
Пример #11
0
    def parse_google_books_json(site, url, pos, query, results):
      """Create a OpenSearch Response from Google Books results.
      
      Google Books's search API returns results in JSON format. This function simply loads the JSON into memory and creates an equivalent representation that is OpenSearch compliant.
      
      Parameters:
      
      * site (str): search engine name
      * url (str): the url for the results that were retrieved to use as the OpenSearch link for the response
      * pos (int): what page number we are on
      * query (str): query search terms (n.b. not a OpenSearch Query object)
      * results (dict): results from service
      
      Returns:
      
      * puppy.model.OpenSearch.Response
      
      """
      response = Response()
      response.version = 'json'
      response.feed.setdefault('title', "{0}: {1}".format(site, query))
      response.feed.setdefault('link', url)
      response.feed.setdefault('description', "Search results for '{0}' at {1}".format(query, site))
      response.namespaces.setdefault("opensearch", "http://a9.com/-/spec/opensearch/1.1/")
      try:
        response.feed.setdefault("opensearch_totalresults", int(results['totalItems']))
        response.feed.setdefault("opensearch_itemsperpage", self.resultsPerPage)
        response.feed.setdefault("opensearch_startindex", pos)
      except KeyError:
        response.feed.setdefault("opensearch_totalresults", 0)
        response.feed.setdefault("opensearch_itemsperpage", 0)
        response.feed.setdefault("opensearch_startindex", 0)
      
      for result in results['items']:
        try:
          book_dict = result
          book_dict['title'] = result['volumeInfo']['title']

          if 'subtitle' in result['volumeInfo']:
            book_dict['title'] += " {0}".format(result['volumeInfo']['subtitle'])
          
          book_dict['link'] = result['selfLink']

          if 'description' in result:
            book_dict['summary'] = result['description']
          else:
            book_dict['summary'] = '' # If there's in no description it's up to the app developer to make use of the other data
          response.entries.append(book_dict)
          
        except Exception, e:    # If there is a parsing problem, print out an error and just skip this individual result
          print "Skipping a result due to: {0} \nWhen parsing a result from: {1}\n".format(e, url)
          continue
Пример #12
0
 def parse_bing_json(site, url, query, results, sources, pos):
   """Create a OpenSearch Response from Bing V2 results.
   
   Bing's search API returns results in JSON format. This function simply loads the JSON into memory and creates an equivalent representation that is OpenSearch compliant.
   
   Parameters:
   
   * site (str): search engine name
   * url (str): the url for the results that were retrieved to use as the OpenSearch link for the response
   * query (str): query search terms (n.b. not a OpenSearch Query object)
   * results (dict): results from service
   * sources (array): all the sources we are currently using i.e. Web and News or just Web
   * pos(int): what page we are starting on
   
   Returns:
   
   * puppy.model.OpenSearch.Response
   
   """
   response = Response()
   response.version = 'json'
   response.feed.setdefault('title', "{0}: {1}".format(site, query))
   response.feed.setdefault('link', url)
   response.feed.setdefault('description', "Search results for '{0}' at {1}".format(query, site))
   response.namespaces.setdefault("opensearch", "http://a9.com/-/spec/opensearch/1.1/")
   
   try:
     response.feed.setdefault("opensearch_totalresults", int(results[self.source]['Total']))
     response.feed.setdefault("opensearch_itemsperpage", self.resultsPerPage)
     response.feed.setdefault("opensearch_startindex", pos )
   except KeyError:
     response.feed.setdefault("opensearch_totalresults", 0)
     response.feed.setdefault("opensearch_itemsperpage", 0)
     response.feed.setdefault("opensearch_startindex", 0)
   
   for sourceType in sources: # Go through every source type selected, parse its results and store them. 
     if (sourceType == 'Web') and ('Results' in results['Web']):
       response = parseWebJson(response, results, url)
     elif (sourceType == 'News') and ('News' in results):
       response = parseNewsJson(response, results, url)
     elif (sourceType == 'Image') and ('Results' in results['Image']):
       response = parseImageJson(response, results, url)
     elif (sourceType == 'Video') and ('Results' in results['Video']):
       response = parseVideoJson(response, results, url)
     elif (sourceType == 'Spell') and ('Spell' in results):
      response =  parseSpellJson(response, results, query, url)
     elif (sourceType == 'RelatedSearch') and ('RelatedSearch' in results):
      response = parseRelatedSearchJson(response, results, query, url)
            
   return response
Пример #13
0
        def parse_web_spell_checker_xml(site, url, query, results):
            """      
      Web Spell Checker's search API returns results in XML format. This function simply loads the XML into memory and creates an equivalent representation that is OpenSearch compliant.
      
      Parameters:
      
      * site (str): search engine name
      * url (str): the url for the results that were retrieved to use as the OpenSearch link for the response
      * query (str): query search terms (n.b. not a OpenSearch Query object)
      * results (dict): results from service
      
      Returns:
      
      * puppy.model.OpenSearch.Response
      
      """
            response = Response()
            response.version = 'xml'
            response.feed.setdefault('title', "{0}: {1}".format(site, query))
            response.feed.setdefault('link', url)
            response.feed.setdefault(
                'description',
                "Search results for '{0}' at {1}".format(query, site))
            response.namespaces.setdefault(
                "opensearch", "http://a9.com/-/spec/opensearch/1.1/")
            response.feed.setdefault("opensearch_itemsperpage", '')
            response.feed.setdefault("opensearch_startindex", 0)

            root = etree.XML(results)
            section = root.find("misspelling")
            suggestions = section.find("suggestions")

            for item in suggestions:
                try:
                    suggestion = item.text
                    spell_dict = {
                        "title":
                        "Spelling Suggestion for: '{0}'".format(query),
                        "link": ''
                    }
                    spell_dict[
                        'summary'] = "Original query: '{0}'. Suggested correction of query: '{1}'.".format(
                            query, suggestion)
                    spell_dict['suggestion'] = suggestion
                    response.entries.append(spell_dict)
                except Exception, e:
                    print "Skipping a result due to: {0} \nWhen parsing a result from: {1}\n".format(
                        e, url)
                    continue
Пример #14
0
        def parse_digg_json(site, url, pos, query, results):
            """Create a OpenSearch Response from Digg results.
      
      Digg's search API returns results in JSON format. This function simply loads the JSON into memory and creates an equivalent representation that is OpenSearch compliant.
      
      Parameters:
      
      * site (str): search engine name
      * url (str): the url for the results that were retrieved to use as the OpenSearch link for the response
      * pos(int): which page number we're on
      * query (str): query search terms (n.b. not a OpenSearch Query object)
      * results (dict): results from service
      
      Returns:
      
      * puppy.model.OpenSearch.Response
      
      """
            response = Response()
            response.version = 'json'
            response.feed.setdefault('title', "{0}: {1}".format(site, query))
            response.feed.setdefault('link', url)
            response.feed.setdefault(
                'description',
                "Search results for '{0}' at {1}".format(query, site))
            response.namespaces.setdefault(
                "opensearch", "http://a9.com/-/spec/opensearch/1.1/")
            try:
                response.feed.setdefault("opensearch_totalresults",
                                         results['total'])
                response.feed.setdefault("opensearch_itemsperpage",
                                         self.resultsPerPage)
                response.feed.setdefault("opensearch_startindex", pos)
            except KeyError:
                response.feed.setdefault("opensearch_totalresults", 0)
                response.feed.setdefault("opensearch_itemsperpage", 0)
                response.feed.setdefault("opensearch_startindex", 0)

            for result in results['stories']:
                try:
                    item_dict = result  # See Digg for the result format for all the other data
                    item_dict['summary'] = result['description']
                    item_dict['link'] = result['href']
                    response.entries.append(item_dict)
                except Exception, e:  # If there is a parsing problem, print out an error and just skip this individual result
                    print "Skipping a result due to: {0} \nWhen parsing a result from: {1}\n".format(
                        e, url)
                    continue
Пример #15
0
  def search(self, query, offset):
    """
    Search function for Twitter.
    
    Parameters:
    
    * query (puppy.model.OpenSearch.Query)

    * offset (int): result offset for the search
    
    Returns:
    
    * puppy.model.Response
    
    Raises:
    
    * urllib2.URLError    
    """    
    try:	
      pos = self._origin() + offset   
      url = 'http://search.twitter.com/search.atom?q={0}&lang={1}&page={2}&result_type={3}&rpp={4}&include_entities={5}'.format(urllib2.quote(query.search_terms), self.language, pos, self.type, self.resultsPerPage, self.includeEntities)
	 
      if self.geocode:
        url += '&geocode:{0}'.format(self.geocode)

      data = urllib2.urlopen(url)
      return Response.parse_feed(data.read())

    # urllib2 - this catches http errors due to the service being down, lack of a proxy etc
    except urllib2.URLError, e:
      raise SearchEngineError("Twitter", e, errorType = 'urllib2', url = url)
Пример #16
0
    def search(self, query, offset):
        """
    Search function for YouTube.
    
    Parameters:
    
    * query (puppy.model.OpenSearch.Query)

    * offset (int): result offset for the search
    
    Returns:
    
    * puppy.model.OpenSearch.Response
    
    Raises:
    
    * urllib2.URLError
    
    """

        try:
            pos = self._origin() + offset
            url = 'http://gdata.youtube.com/feeds/api/videos?vq={0}&racy=exclude&orderby=viewCount&start-index={1}'.format(
                urllib2.quote(query.search_terms), pos)

            data = urllib2.urlopen(url)
            return Response.parse_feed(data.read())

        # urllib2 - this catches http errors due to the service being down, lack of a proxy etc
        except urllib2.URLError, e:
            raise SearchEngineError("YouTube", e, errorType='urllib2', url=url)
Пример #17
0
  def search(self, query, offset):
    """
    Search function for retrieving results from the PuppyIR Pathfinder service which searches the information centre at the Emma Children's Hospital.
  
    Parameters:
  
    * query (puppy.model.Query)

    * offset (int): result offset for the search
  
    Returns:
  
    * results puppy.model.Response
  
    Raises:
  
    * urllib2.URLError
  
    """
    try:
      pos = self._origin() + offset
      format = 'rss'
      url = "http://pathfinder.cs.utwente.nl/cgi-bin/opensearch/ekz.cgi?query={0}&page={1}&format={2}&leeftijd={3}&size={4}".format(urllib2.quote(query.search_terms), pos, format, self.age, self.resultsPerPage)
    
      data = urllib2.urlopen(url)
      emmaResponse = Response.parse_feed(data.read())
      emmaResponse = self.addEmmaAge(emmaResponse)
      emmaResponse = self.addEmmaItemId(emmaResponse)
      return emmaResponse

	# urllib2 - this catches http errors due to the service being down, lack of a proxy etc
    except urllib2.URLError, e:
      raise SearchEngineError("PuppyIR Pathfinder Search", e, errorType = 'urllib2', url = url)
Пример #18
0
    def search(self, query, offset):
        """
    Search function for YouTube.
    
    Parameters:
    
    * query (puppy.model.OpenSearch.Query)

    * offset (int): result offset for the search
    
    Returns:
    
    * puppy.model.OpenSearch.Response
    
    Raises:
    
    * urllib2.URLError
    
    """

        try:
            pos = self._origin() + offset
            url = "http://gdata.youtube.com/feeds/api/videos?vq={0}&racy=exclude&orderby=viewCount&start-index={1}".format(
                urllib2.quote(query.search_terms), pos
            )

            data = urllib2.urlopen(url)
            return Response.parse_feed(data.read())

        # urllib2 - this catches http errors due to the service being down, lack of a proxy etc
        except urllib2.URLError, e:
            raise SearchEngineError("YouTube", e, errorType="urllib2", url=url)
Пример #19
0
        def parse_google_json(site, url, query, num_results, results):
            """Create a OpenSearch Response from Google AJAX Search results.
      
      Google's search API returns results in JSON format. This function loads the JSON into memory and creates an equivalent representation that is OpenSearch compliant.
      
      Parameters:
      
      * site (str): search engine name
      * url (str): search url used
      * query (str): query search terms (n.b. not a OpenSearch Query object)
      * num_results (int): number of desired results
      * results (dict): results from service
      
      Returns:
      
      * results (puppy.model.Response)
      
      """
            response = Response()
            response.version = 'json'
            response.feed.setdefault('title', "{0}: {1}".format(site, query))
            response.feed.setdefault('link',
                                     results['cursor']['moreResultsUrl'])
            response.feed.setdefault(
                'description',
                "Search results for '{0}' at {1}".format(query, site))
            try:
                response.feed.setdefault(
                    'opensearch_totalresults',
                    results['cursor']['estimatedResultCount'])
                response.feed.setdefault('opensearch_startindex',
                                         results['cursor']['currentPageIndex'])
            except KeyError:
                response.feed.setdefault('opensearch_totalresults', 0)
                response.feed.setdefault('opensearch_startindex', 0)

            for result in results['results']:
                response.entries.append({
                    'title': result['title'],
                    'link': result['url'],
                    'summary': result['content']
                })

            response.feed.setdefault('opensearch_itemsperpage',
                                     len(response.entries))
            return response
Пример #20
0
    def parse_wiki_xml(url, query, results):
      """      
      Simple Wikipedias's search API returns results in XML format. This function simply loads the XML into memory and creates an equivalent representation that is OpenSearch compliant.
      
      Parameters:
      
      * url (str): the url for the results that were retrieved to use as the OpenSearch link for the response
      * query (str): query search terms (n.b. not a OpenSearch Query object)
      * results (dict): results from service
      
      Returns:
      
      * puppy.model.OpenSearch.Response      
      """
      response = Response()
      response.feed.setdefault("title", "Simple Wikipedia Search")
      response.feed.setdefault('link', url)
      response.feed.setdefault("description", "Simple Wikipedia Search Suggestions for: {0}".format(query))
      response.namespaces.setdefault("searchsuggest", "{http://opensearch.org/searchsuggest2}")
      response.version = 'xml'
    
      root = etree.XML(results)
      ns = response.namespaces["searchsuggest"]
      section = root.find("{0}Section".format(ns))
      items = section.findall("{0}Item".format(ns))
      
      for item in items:
        try:
          title = item.find("{0}Text".format(ns)).text
          summary = item.find("{0}Description".format(ns)).text
          link = item.find("{0}Url".format(ns)).text
          image = item.find("{0}Image".format(ns))
          thumbnail = image.get("source") if image is not None else ""
          image_fullsize = ""
        
          if thumbnail is not "":
            image_fullsize = thumbnail.replace("thumb/", "").rpartition('/')[0]

          response.entries.append({'title': title, 'summary': summary, 'link': link, 'thumbnail': thumbnail, 'image': image_fullsize})
        except etree.XMLSyntaxError, e:
          print "Skipping a result due to an lxml syntax error.\nWhen parsing a result from: {0}\n"(url)
          continue
        except etree.LxmlError, e:
          print "Skipping a result due to an lxml error: {0} \nWhen parsing a result from: {1}\n"(e, url)
          continue
Пример #21
0
        def parse_spotify_json(site, url, query, results):
            """      
      Spotify's search API returns results in JSON format. This function simply loads the JSON into memory and creates an equivalent representation that is OpenSearch compliant.
      
      Parameters:
      
      * site (str): search engine name
      * url (str): the url for the results that were retrieved to use as the OpenSearch link for the response
      * query (str): query search terms (n.b. not a OpenSearch Query object)
      * results (dict): results from service
      
      Returns:
      
      * puppy.model.OpenSearch.Response      
      """
            response = Response()
            response.version = 'json'
            response.feed.setdefault('title', "{0}: {1}".format(site, query))
            response.feed.setdefault('link', url)
            response.feed.setdefault(
                'description',
                "Search results for '{0}' at {1}".format(query, site))
            response.namespaces.setdefault(
                "opensearch", "http://a9.com/-/spec/opensearch/1.1/")

            try:
                response.feed.setdefault("opensearch_totalresults",
                                         int(results['info']['num_results']))
                response.feed.setdefault("opensearch_itemsperpage",
                                         int(results['info']['limit']))
                response.feed.setdefault("opensearch_startindex",
                                         int(results['info']['page']))
            except KeyError:
                response.feed.setdefault("opensearch_totalresults", 0)
                response.feed.setdefault("opensearch_itemsperpage", 0)
                response.feed.setdefault("opensearch_startindex", 0)

            if (self.source == 'tracks') and ('tracks' in results):
                response = parse_tracks_json(response, results, url)
            elif (self.source == 'albums') and ('albums' in results):
                response = parse_albums_json(response, results, url)
            elif (self.source == 'artists') and ('artists' in results):
                response = parse_artists_json(response, results, url)

            return response
Пример #22
0
    def search(self, query, offset):
        """
    Search function for Microsoft Bing.
    
    Parameters:
    
    * query (puppy.model.Query)

    * offset (int): result offset for the search
    
    Returns:
    
    * results (puppy.model.Response)
    
    Raises:
    
    * urllib2.URLError
    """
        def addDefaultThumbnails(bingResponse):
            """This goes through the results and adds a easy access default thumbnail."""
            for result in bingResponse.entries:
                result['thumbnail'] = result['media_thumbnail'][0]['url']
                result['thumbnailWidth'] = result['media_thumbnail'][0][
                    'width']
                result['thumbnailHeight'] = result['media_thumbnail'][0][
                    'height']
            return bingResponse

        try:
            formattedQuery = urllib2.quote(
                self._modify_query(query.search_terms))
            pos = self._origin()

            if (offset > 0):
                pos = pos + (offset * self.resultsPerPage)

            url = 'http://api.search.live.net/rss.aspx?&query={0}&source={1}&{1}.count={2}&{1}.offset={3}&Adult={4}&Market={5}'.format(
                formattedQuery, self.source, self.resultsPerPage, pos,
                self.adult, self.market)

            # If the source type is web or phonebook we can add lon/lat/radius for local search
            if (self.source == 'web') or (self.source == 'phonebook'):
                if (self.lat) and (self.lon) and (self.radius):
                    url += "&Latitude={0}&Longitude={1}&Radius={2}".format(
                        self.lat, self.lon, self.radius)

            data = urllib2.urlopen(url)
            bingResponse = Response.parse_feed(data.read())

            if self.source == 'image':
                bingResponse = addDefaultThumbnails(bingResponse)

            return bingResponse

# urllib2 - this catches http errors due to the service being down, lack of a proxy etc
        except urllib2.URLError, e:
            raise SearchEngineError("Bing", e, errorType='urllib2', url=url)
Пример #23
0
        def parse_guardian_json(site, url, query, results):
            """Create a OpenSearch Response from Guardian results.
      
      Guardians's search API returns results in JSON format. This function simply loads the JSON into memory and creates an equivalent representation that is OpenSearch compliant.
      
      Parameters:
      
      * site (str): search engine name
      * url (str): the url for the results that were retrieved to use as the OpenSearch link for the response
      * query (str): query search terms (n.b. not a OpenSearch Query object)
      * results (dict): results from service
      
      Returns:
      
      * puppy.model.OpenSearch.Response
      
      """
            response = Response()
            response.version = "json"
            response.feed.setdefault("title", "{0}: {1}".format(site, query))
            response.feed.setdefault("link", url)
            response.feed.setdefault("description", "Search results for '{0}' at {1}".format(query, site))
            response.namespaces.setdefault("opensearch", "http://a9.com/-/spec/opensearch/1.1/")
            try:
                response.feed.setdefault("opensearch_totalresults", int(results["total"]))
                response.feed.setdefault("opensearch_itemsperpage", int(results["pageSize"]))
                response.feed.setdefault("opensearch_startindex", int(results["startIndex"]))
            except KeyError:
                response.feed.setdefault("opensearch_totalresults", 0)
                response.feed.setdefault("opensearch_itemsperpage", 0)
                response.feed.setdefault("opensearch_startindex", 0)

            for result in results["results"]:
                try:
                    response.entries.append(
                        {
                            "title": result["webTitle"],
                            "link": result["webUrl"],
                            "summary": result["fields"]["standfirst"],
                        }
                    )
                except Exception, e:  # If there is a parsing problem, print out an error and just skip this individual result
                    print "Skipping a result due to: {0} \nWhen parsing a result from: {1}\n".format(e, url)
                    continue
Пример #24
0
  def search(self, query, offset):
    """
    Search function for YouTube.
    
    Parameters:
    
    * query (puppy.model.OpenSearch.Query)

    * offset (int): result offset for the search
    
    Returns:
    
    * puppy.model.OpenSearch.Response
    
    Raises:
    
    * urllib2.URLError
    
    """
    def addExtraFields(youtubeResponse):
      """This goes through the results and adds: the summary field, the embed url and adds a thumbnail shortcut."""
      for result in youtubeResponse.entries:
        author = result['author']
        fullDescription = result['media_group'] # This is author+description+'youtube'
        result['summary'] = fullDescription[len(author):len(fullDescription)-7] #Remove author from start and 'youtube' from end - Perhaps find more elegant method
        result['embedUrl'] = 'http://www.youtube.com/embed/' + result['id'].split(':video:')[1]

        if len(result['media_thumbnail']) >= 2: # If we have 2 or more thumbnails use the second (hq thumbnail)
          result['thumbnail'] = result['media_thumbnail'][1]['url']
        elif len(result['media_thumbnail']) == 1: # Otherwise use the first (it's pretty low res compared to above)
          result['thumbnail'] = result['media_thumbnail'][0]['url']
        else:
          result['thumbnail'] = '' # If that fails just leave it blank

      return youtubeResponse
      
    try:
      pos = self._origin() + (offset * self.resultsPerPage)
      url = 'http://gdata.youtube.com/feeds/api/videos?q={0}&max-results={1}&safeSearch={2}&start-index={3}&orderby={4}&v=2'.format(urllib2.quote(query.search_terms), self.resultsPerPage, self.safeSearch, pos, self.orderBy)
    
      if self.format:
        url += "&format={0}".format(self.format)
            
      if self.location and self.locationRadius:
        url+= "&location-radius={0}&location={1}".format(self.locationRadius, self.location)
        if self.onlyLocation == True:
          url += '!' # This forces YouTube to only return results with a location 

      data = urllib2.urlopen(url)
      youtubeResponse = Response.parse_feed(data.read())
      youtubeResponse = addExtraFields(youtubeResponse) # Does some processing to get embed url, summary and thumbnail shortcut
      return youtubeResponse

    # urllib2 - this catches http errors due to the service being down, lack of a proxy etc
    except urllib2.URLError, e:
      raise SearchEngineError("YouTube V2", e, errorType = 'urllib2', url = url)
Пример #25
0
 def parse_flickr_json(site, query, results):
   """Create a OpenSearch Response from Flickr results.
   
   Flickr's search API returns results in JSON format. This function simply loads the JSON into memory and creates an equivalent representation that is OpenSearch compliant.
   
   Parameters:
   
   * site (str): search engine name
   * query (str): query search terms (n.b. not a OpenSearch Query object)
   * results (dict): results from service
   
   Returns:
   
   * puppy.model.OpenSearch.Response
   
   """
   response = Response()
   response.version = 'json'
   response.feed.setdefault('title', "{0}: {1}".format(site, query))
   response.feed.setdefault('link', results['link'])
   response.feed.setdefault('description', "Search results for '{0}' at {1}".format(query, site))
   response.namespaces.setdefault("opensearch", "http://a9.com/-/spec/opensearch/1.1/")
   try:
     response.feed.setdefault("opensearch_totalresults", int(results['total']))
     response.feed.setdefault("opensearch_itemsperpage", int(results['perpage']))
     response.feed.setdefault("opensearch_startindex", int(results['page']))
   except KeyError:
     response.feed.setdefault("opensearch_totalresults", 0)
     response.feed.setdefault("opensearch_itemsperpage", 0)
     response.feed.setdefault("opensearch_startindex", 0)
   
   if 'photo' in results:
     for result in results['photo']:
       # Links need to be created from several fields - see the Flickr API for a detailed explanation
       
       try:
         resultLink = "http://www.flickr.com/photos/{0}/{1}".format(result['owner'], result['id'])
         resultThumbnail = "http://farm{0}.static.flickr.com/{1}/{2}_{3}_t.jpg".format(result['farm'], result['server'], result['id'], result['secret'])
         resultSummary = "Photo result for '{0}' from {1}".format(query, site)
         response.entries.append({'title': result['title'], 'link': resultLink, 'summary': resultSummary, 'thumbnail': resultThumbnail})
       except Exception, e:
         print "Skipping a result due to: {0} \nWhen parsing a result from: {1}\n".format(e, results['link'])
         continue
Пример #26
0
 def parse_whoosh_trec(site, query, results):
     response = Response()
     response.version = 'trec'
     response.feed.setdefault('title', "{0}: {1}".format(site, query))
     response.feed.setdefault('link', '')
     response.feed.setdefault(
         'description',
         "Search results for '{0}' at {1}".format(query, site))
     response.namespaces.setdefault(
         "opensearch", "http://a9.com/-/spec/opensearch/1.1/")
     response.feed.setdefault("opensearch_totalresults",
                              results.pagecount)
     response.feed.setdefault("opensearch_itemsperpage", pagelen)
     response.feed.setdefault("opensearch_startindex", results.pagenum)
     response.feed.setdefault('query', query)
     try:
         r = 0
         if len(results) > 1:
             for hit in results:
                 r = r + 1
                 title = hit["title"]
                 title = title.strip()
                 if len(title) < 1:
                     title = query
                 rank = (
                     (int(results.pagenum) - 1) * results.pagelen) + r
                 link = "/treconomics/" + str(
                     hit.docnum) + "?rank=" + str(rank)
                 desc = hit.highlights("content")
                 docid = hit["docid"]
                 docid = docid.strip()
                 source = hit["source"]
                 response.entries.append({
                     'title': title,
                     'link': link,
                     'summary': desc,
                     'docid': docid,
                     'source': source
                 })
         else:
             print "No hits found for query: " + query
     except Exception, e:
         print "Converting results to OpenSearch Failed"
Пример #27
0
 def parse_digg_json(site, url, pos, query, results):
   """Create a OpenSearch Response from Digg results.
   
   Digg's search API returns results in JSON format. This function simply loads the JSON into memory and creates an equivalent representation that is OpenSearch compliant.
   
   Parameters:
   
   * site (str): search engine name
   * url (str): the url for the results that were retrieved to use as the OpenSearch link for the response
   * pos(int): which page number we're on
   * query (str): query search terms (n.b. not a OpenSearch Query object)
   * results (dict): results from service
   
   Returns:
   
   * puppy.model.OpenSearch.Response
   
   """
   response = Response()
   response.version = 'json'
   response.feed.setdefault('title', "{0}: {1}".format(site, query))
   response.feed.setdefault('link', url)
   response.feed.setdefault('description', "Search results for '{0}' at {1}".format(query, site))
   response.namespaces.setdefault("opensearch", "http://a9.com/-/spec/opensearch/1.1/")
   try:
     response.feed.setdefault("opensearch_totalresults", results['total'])
     response.feed.setdefault("opensearch_itemsperpage", self.resultsPerPage)
     response.feed.setdefault("opensearch_startindex", pos)
   except KeyError:
     response.feed.setdefault("opensearch_totalresults", 0)
     response.feed.setdefault("opensearch_itemsperpage", 0)
     response.feed.setdefault("opensearch_startindex", 0)
   
   for result in results['stories']:
     try:
       item_dict = result    # See Digg for the result format for all the other data
       item_dict['summary'] = result['description']
       item_dict['link'] = result['href']
       response.entries.append(item_dict)
     except Exception, e:    # If there is a parsing problem, print out an error and just skip this individual result
       print "Skipping a result due to: {0} \nWhen parsing a result from: {1}\n".format(e, url)
       continue
Пример #28
0
    def _parse_bing_response(self, query, results, offset):
        """Handles processing the JSON into a PuppyIR Response"""
        response = Response()
        response.version = 'json'
        response.feed.setdefault(
            'title',
            "Results from %s for: %s" % (self.engineName, query.search_terms))
        response.feed.setdefault('link', "")
        response.feed.setdefault(
            'description',
            "%s results from %s" % (self.source, self.engineName))
        response.namespaces.setdefault("opensearch",
                                       "http://a9.com/-/spec/opensearch/1.1/")

        if self.source == "Web":
            entries = self._parse_web_results(results)

        elif self.source == "Image":
            entries = self._parse_image_results(query, results)

        elif self.source == "News":
            entries = self._parse_news_results(results)

        elif self.source == "RelatedSearch":
            entries = self._parse_related_results(query, results)

        elif self.source == "Video":
            entries = self._parse_video_results(query, results)

        elif self.source == "SpellingSuggestions":
            entries = self._parse_spelling_results(query, results)

        for entry in entries:
            response.entries.append(entry)

        response.feed.setdefault('opensearch_totalresults', len(entries))
        response.feed.setdefault('opensearch_startindex', offset)
        response.feed.setdefault('opensearch_itemsperpage',
                                 self.resultsPerPage)

        return response
Пример #29
0
  def __init__(self, service_manager, name, query=None):
    """Constructor for Service."""

    super(SearchServiceIterable,self).__init__(service_manager,name)


    # for iterator
    self.query = query
    self.resultsBuffer = Response()
    # This is an indication about the Response is not a real response, and we need to ask for a real response in Next
    self.resultsBuffer.feed['opensearch_totalresults'] = sys.maxint
    self.next_result = 0    
Пример #30
0
        def parse_bing_xml_response(site,
                                    query,
                                    results,
                                    numResults=10,
                                    offset=0):

            xmlSoup = BeautifulSoup(results)

            response = Response()
            response.version = 'xml'
            response.feed.setdefault('title', "{0}: {1}".format(site, query))
            response.feed.setdefault(
                'description',
                "Search results for {0} at {1}".format(query, site))
            response.feed.setdefault('link', '')
            response.namespaces.setdefault(
                'opensearch', 'http://a9.com/-/spec/opensearch/1.1/')

            resultCount = 0
            resultsRetrieved = 0
            for r in xmlSoup.findAll('entry'):
                if (resultCount >= offset) and (resultCount <
                                                (numResults + offset)):
                    xmlTitleData = r.find('d:title').string
                    xmlURLData = r.find('d:url').string
                    xmlDescriptionData = r.find('d:description').string
                    response.entries.append({
                        'title': xmlTitleData,
                        'link': xmlURLData,
                        'summary': xmlDescriptionData
                    })
                    resultsRetrieved += 1
                resultCount += 1

            response.feed.setdefault('opensearch_totalresults', resultCount)
            response.feed.setdefault('opensearch_startindex', offset)
            response.feed.setdefault('opensearch_itemsperpage',
                                     resultsRetrieved)

            return response
Пример #31
0
  def search(self, query, offset):
    """
    Search function for Microsoft Bing.
    
    Parameters:
    
    * query (puppy.model.Query)

    * offset (int): result offset for the search
    
    Returns:
    
    * results (puppy.model.Response)
    
    Raises:
    
    * urllib2.URLError
    """

    def addDefaultThumbnails(bingResponse):
      """This goes through the results and adds a easy access default thumbnail."""
      for result in bingResponse.entries:
        result['thumbnail'] = result['media_thumbnail'][0]['url']
        result['thumbnailWidth'] = result['media_thumbnail'][0]['width']
        result['thumbnailHeight'] = result['media_thumbnail'][0]['height']
      return bingResponse

    try:
      formattedQuery = urllib2.quote(self._modify_query(query.search_terms))
      pos = self._origin()

      if (offset > 0):
        pos = pos + (offset * self.resultsPerPage)   

      url = 'http://api.search.live.net/rss.aspx?&query={0}&source={1}&{1}.count={2}&{1}.offset={3}&Adult={4}&Market={5}'.format(formattedQuery, self.source, self.resultsPerPage, pos, self.adult, self.market)
      
      # If the source type is web or phonebook we can add lon/lat/radius for local search
      if(self.source == 'web') or (self.source == 'phonebook'):
        if (self.lat) and (self.lon) and (self.radius):
          url += "&Latitude={0}&Longitude={1}&Radius={2}".format(self.lat, self.lon, self.radius)
      
      data = urllib2.urlopen(url)
      bingResponse = Response.parse_feed(data.read())

      if self.source == 'image':
        bingResponse = addDefaultThumbnails(bingResponse)          
          
      return bingResponse

	# urllib2 - this catches http errors due to the service being down, lack of a proxy etc
    except urllib2.URLError, e:
      raise SearchEngineError("Bing", e, errorType = 'urllib2', url = url)
Пример #32
0
    def search(self, query, pos=0):
        """Search function for OpenSearch compliant website.
    
    If a template exists, a search will be executed immediately using the search template,
    Otherwise, given the site URL, a template will attempt to be discovered.
    
    Parameters:
    
    * query (puppy.model.Query)
    * pos (int)
    
    Returns:
    
    * results (puppy.model.Response)
    
    Raises:
    
    * urllib2.URLError
    
    """

        if self.template:
            # template exists, use it to search
            search_url = self.template.replace(
                '{searchTerms}', urllib2.quote(query.search_terms))
            if (pos != 0):
                search_url = search_url.replace('{start}', urllib2.quote(pos))
            else:
                pass
            try:
                response = urllib2.urlopen(search_url).read()
                if self.results and self.xml:
                    return Response.parse_feed(response)
                elif not self.results and self.xml:
                    return Response.parse_xml_suggestions(response)
                elif not self.results and not self.xml:
                    return Response.parse_json_suggestions(response)
            except urllib2.URLError, e:
                print "Opensearch for {0} failed".format(self.url)
Пример #33
0
    def parse_web_spell_checker_xml(site, url, query, results):
      """      
      Web Spell Checker's search API returns results in XML format. This function simply loads the XML into memory and creates an equivalent representation that is OpenSearch compliant.
      
      Parameters:
      
      * site (str): search engine name
      * url (str): the url for the results that were retrieved to use as the OpenSearch link for the response
      * query (str): query search terms (n.b. not a OpenSearch Query object)
      * results (dict): results from service
      
      Returns:
      
      * puppy.model.OpenSearch.Response
      
      """
      response = Response()
      response.version = 'xml'
      response.feed.setdefault('title', "{0}: {1}".format(site, query))
      response.feed.setdefault('link', url)
      response.feed.setdefault('description', "Search results for '{0}' at {1}".format(query, site))
      response.namespaces.setdefault("opensearch", "http://a9.com/-/spec/opensearch/1.1/")
      response.feed.setdefault("opensearch_itemsperpage", '')
      response.feed.setdefault("opensearch_startindex", 0)
      
      root = etree.XML(results)
      section = root.find("misspelling")
      suggestions = section.find("suggestions")

      for item in suggestions:
        try:
          suggestion = item.text
          spell_dict = {"title": "Spelling Suggestion for: '{0}'".format(query),  "link": ''}
          spell_dict['summary'] = "Original query: '{0}'. Suggested correction of query: '{1}'.".format(query, suggestion) 
          spell_dict['suggestion'] = suggestion
          response.entries.append(spell_dict)
        except Exception, e:
          print "Skipping a result due to: {0} \nWhen parsing a result from: {1}\n".format(e, url)
          continue              
Пример #34
0
 def parse_rotten_tomatoes_json(site, pos, query, results):
   """      
   Rotten Tomatoes's search API returns results in JSON format. This function simply loads the JSON into memory and creates an equivalent representation that is OpenSearch compliant.
   
   Parameters:
   
   * site (str): search engine name
   * query (str): query search terms (n.b. not a OpenSearch Query object)
   * results (dict): results from service
   
   Returns:
   
   * puppy.model.OpenSearch.Response      
   """
   response = Response()
   response.version = 'json'
   response.feed.setdefault('title', "{0}: {1}".format(site, query))
   response.feed.setdefault('link', results['links']['self'])
   response.feed.setdefault('description', "Search results for '{0}' at {1}".format(query, site))
   response.namespaces.setdefault("opensearch", "http://a9.com/-/spec/opensearch/1.1/")
   
   try:
     response.feed.setdefault("opensearch_totalresults", int(results['total']))
     response.feed.setdefault("opensearch_itemsperpage", self.resultsPerPage)
     response.feed.setdefault("opensearch_startindex", pos)
   except KeyError:
     response.feed.setdefault("opensearch_totalresults", 0)
     response.feed.setdefault("opensearch_itemsperpage", 0)
     response.feed.setdefault("opensearch_startindex", 0)
   
   for result in results['movies']:
     try:
       movie_dict = result
       movie_dict['link'] = result['links']['alternate']
       movie_dict['summary'] = result['synopsis']
       response.entries.append(movie_dict)
     except Exception, e:
       print "Skipping a result due to: {0} \nWhen parsing a result from: {1}\n".format(e, url)
       continue
Пример #35
0
    def parse_spotify_json(site, url, query, results):
      """      
      Spotify's search API returns results in JSON format. This function simply loads the JSON into memory and creates an equivalent representation that is OpenSearch compliant.
      
      Parameters:
      
      * site (str): search engine name
      * url (str): the url for the results that were retrieved to use as the OpenSearch link for the response
      * query (str): query search terms (n.b. not a OpenSearch Query object)
      * results (dict): results from service
      
      Returns:
      
      * puppy.model.OpenSearch.Response      
      """
      response = Response()
      response.version = 'json'
      response.feed.setdefault('title', "{0}: {1}".format(site, query))
      response.feed.setdefault('link', url)
      response.feed.setdefault('description', "Search results for '{0}' at {1}".format(query, site))
      response.namespaces.setdefault("opensearch", "http://a9.com/-/spec/opensearch/1.1/")

      try:
        response.feed.setdefault("opensearch_totalresults", int(results['info']['num_results']))
        response.feed.setdefault("opensearch_itemsperpage", int(results['info']['limit']))
        response.feed.setdefault("opensearch_startindex", int(results['info']['page']))
      except KeyError:
        response.feed.setdefault("opensearch_totalresults", 0)
        response.feed.setdefault("opensearch_itemsperpage", 0)
        response.feed.setdefault("opensearch_startindex", 0)
      
      if (self.source == 'tracks') and ('tracks' in results):
        response = parse_tracks_json(response, results, url)
      elif (self.source == 'albums') and ('albums' in results):
        response = parse_albums_json(response, results, url)
      elif (self.source == 'artists') and ('artists' in results):
        response = parse_artists_json(response, results, url)
      
      return response
Пример #36
0
    def search(self, query, pos=0):
        """Search function for OpenSearch compliant website.
    
    If a template exists, a search will be executed immediately using the search template,
    Otherwise, given the site URL, a template will attempt to be discovered.
    
    Parameters:
    
    * query (puppy.model.Query)
    * pos (int)
    
    Returns:
    
    * results (puppy.model.Response)
    
    Raises:
    
    * urllib2.URLError
    
    """

        if self.template:
            # template exists, use it to search
            search_url = self.template.replace("{searchTerms}", urllib2.quote(query.search_terms))
            if pos != 0:
                search_url = search_url.replace("{start}", urllib2.quote(pos))
            else:
                pass
            try:
                response = urllib2.urlopen(search_url).read()
                if self.results and self.xml:
                    return Response.parse_feed(response)
                elif not self.results and self.xml:
                    return Response.parse_xml_suggestions(response)
                elif not self.results and not self.xml:
                    return Response.parse_json_suggestions(response)
            except urllib2.URLError, e:
                print "Opensearch for {0} failed".format(self.url)
Пример #37
0
 def parse_yahoo_json(site, query, results):
   """Create a OpenSearch Response from Yahoo! BOSS results.
   
   Yahoo!'s search API returns results in JSON format. This function simply loads the JSON into memory and creates an equivalent representation that is OpenSearch compliant.
   
   Parameters:
   
   * site (str): search engine name
   * query (str): query search terms (n.b. not a OpenSearch Query object)
   * results (dict): results from service
   
   Returns:
   
   * puppy.model.OpenSearch.Response
   
   """
   response = Response()
   response.version = 'json'
   response.feed.setdefault('title', "{0}: {1}".format(site, query))
   response.feed.setdefault('link', results['link'])
   response.feed.setdefault('description', "Search results for '{0}' at {1}".format(query, site))
   response.namespaces.setdefault("opensearch", "http://a9.com/-/spec/opensearch/1.1/")
   
   try:
     response.feed.setdefault("opensearch_totalresults", int(results['totalhits']))
     response.feed.setdefault("opensearch_itemsperpage", int(results['count']))
     response.feed.setdefault("opensearch_startindex", int(results['start']))
   except KeyError:
     response.feed.setdefault("opensearch_totalresults", 0)
     response.feed.setdefault("opensearch_itemsperpage", 0)
     response.feed.setdefault("opensearch_startindex", 0)
   
   for result in results['resultset_web']:
     try:
       response.entries.append({'title': result['title'], 'link': result['url'], 'summary': result['abstract']})
     except Exception, e:    # If there is a parsing problem, print out an error and just skip this individual result
       print "Skipping a result due to: {0} \nWhen parsing a result from: {1}\n".format(e, url)
       continue
Пример #38
0
    def _parse_bing_response(self, query, results, offset):
        """Handles processing the JSON into a PuppyIR Response"""
        response = Response()
        response.version = 'json'
        response.feed.setdefault('title', "Results from %s for: %s" % (self.engineName, query.search_terms))
        response.feed.setdefault('link', "")
        response.feed.setdefault('description', "%s results from %s" % (self.source, self.engineName))
        response.namespaces.setdefault("opensearch", "http://a9.com/-/spec/opensearch/1.1/")
        
        
        if self.source == "Web":
            entries = self._parse_web_results(results) 
                           
        elif self.source == "Image":
            entries = self._parse_image_results(query, results)
                
        elif self.source == "News":
            entries = self._parse_news_results(results)
                
        elif self.source == "RelatedSearch":
            entries = self._parse_related_results(query, results)
                
        elif self.source == "Video":
            entries = self._parse_video_results(query, results)
                
        elif self.source == "SpellingSuggestions":
            entries = self._parse_spelling_results(query, results)
                
        for entry in entries:
            response.entries.append(entry)
        
        response.feed.setdefault('opensearch_totalresults', len(entries))
        response.feed.setdefault('opensearch_startindex', offset)
        response.feed.setdefault('opensearch_itemsperpage', self.resultsPerPage)

        return response    
Пример #39
0
  def search(self, query, limit):
    """
    Search function for Hospial Data.
  
    Parameters:
  
    * query (puppy.model.Query)

    * limit (int): maximum number of results returned by the search
  
    Returns:
  
    * results puppy.model.Response
  
    Raises:
  
    * Exception 
  
    """
    try:
      conn = _mssql.connect(server=self._server, user=self._user, password=self._password, database=self._database)

#      conn.execute_query('SELECT TOP ' + str(limit)+ ' * FROM '+ _database_bookstable + ' INNER JOIN FREETEXTTABLE('+ _database_bookstable+',*, \''+ query+'\') as ft ON ft.[KEY]=' + _database_bookstable_id+ ' ORDER BY ft.RANK DESC FOR xml raw, ELEMENTS;')
  
      sql_query ='SELECT  temp.Id, temp.Titel, temp.Auteurs, temp.Leeftijdscategorie, temp.Annotatie, temp.ISBN, temp.Editie, temp.Uitgever, temp.[Classificatie code], temp.[Jaar van uitgave], temp.[Plaats van uitgave], temp.[Ref Type], temp.[Prijs], temp.[Serie], img.location  FROM (SELECT TOP ' + str(self.resultsPerPage)+ ' * FROM '+ str(self._database_bookstable) + ' INNER JOIN FREETEXTTABLE('+ self._database_bookstable+',*,\''+ query.search_terms+'\') as ft ON ft.[KEY]=Id ORDER BY ft.RANK DESC) as temp LEFT OUTER JOIN '+ str(self._database_images)+' as img ON temp.Id = img.item_id  FOR xml raw, ELEMENTS;';
    
  
      conn.execute_query(sql_query)
      response=""
   
      for row in conn:
	response = response + row[0].strip()

      response = self.cleanXmlEmma(response)
      #add rss tag for parsing
      response = "<rss>" + response + "</rss>"

      
      results = Response.parse_feed(response)

      results = self.parseEmmaAge(results)
      results = self.parseEmmaLinks(results)
      return results

     # Catch all exception, just in case
    except Exception, e:
      raise SearchEngineError("PuppyIR Emma Sql Server Search", e)
Пример #40
0
    def load(self, folp='', filn=''):
        """
            * folp (string) = folder path
            * filn (string) = file name
            Load the saved pickle results from the pickle folder.
            You can choose from a series of results under the same name or all the results. You can also choose only a specific result.
            The returning object should be a Response object. Due to the nature of the program can be any other the programmer has saved using the pickle_filter.
            In a similar way we can do with the pickleFilter, you can put the folder path directly in the loader and manage different folders or files. Should be the way: './folderpath' or 'c:/folder1/folder2/folder3/.../foldern'
            """
        if filn == '': filn = self.fileName
        elif folp == '': folp = self.folderPath

        try:
            if os.path.isdir(folp + '/'):
                resp = pickle.load(open(folp + '/' + filn + '.p', "rb"))
                if type(resp) != type(Response()):
                    print "The object in the Pickle file is not a Response Type. Be carefull"
                return resp
        except IOError:
            print "The path doesn't exist"
Пример #41
0
 def simplesearch(self, query, offset=0):
     """Search without query and result filter pipelines.
 
 Parameters:
 
 * query (puppy.model.Query): search query
 * offset (int): result offset
 
 Returns:
 
 * results (puppy.model.Response): search results
 """
     try:  # Get the results from the defined search engine wrapper
         results = self.search_engine.search(query, offset)
     except Exception, e:
         if self.search_engine.handleException:  # Should we ignore this?
             print "Handled Search Engine Error"
             return Response()
         else:  # Or should we raise an exception?
             raise SearchEngineError(self.search_engine.__class__.__name__,
                                     e)
Пример #42
0
    def search(self, query, offset=0, highlight=False):
        """
    Search with query and result filter pipelines active.
    
    Parameters:
    
    * query (puppy.model.Query): search query
    * offset (int): result offset
    
    Returns:
    
    * results (puppy.model.Response): search results
    """

        # Do pre-pipeline query logging
        if (self.query_logger) and (self.postLogging == False):
            self.query_logger.log(query)

        # Run through the query modifier pipeline and get the modified query back
        query = self._run_query_modifiers(query)

        # Run the query filter pipeline
        self._run_query_filters(query)

        # Do post-pipeline query logging
        if (self.query_logger) and (self.postLogging == True):
            self.query_logger.log(
                query, processed=True
            )  # Processed i.e. the query after going through the query pipeline

        try:  # Get the results from the defined search engine wrapper
            results = self.search_engine.search(query, offset)
        except Exception, e:
            if self.search_engine.handleException:  # Should we ignore this?
                print "Handled Search Engine Error"
                return Response()
            else:  # Or should we raise an exception?
                raise SearchEngineError(self.search_engine.__class__.__name__,
                                        e)
Пример #43
0
    def search(self, query, offset):
        """
    Search function for Picassa.
    
    Parameters:
    
    * query (puppy.model.OpenSearch.Query)

    * offset (int): result offset for the search
    
    Returns:
    
    * puppy.model.OpenSearch.Response
    
    Raises:
    
    * urllib2.URLError
    
    """
        def addThumbnailShortcut(picassaResponse):
            """This goes through the results and adds a thumbnail shortcut"""
            for result in picassaResponse.entries:
                result['thumbnail'] = result['media_thumbnail'][2]['url']
            return picassaResponse

        try:
            pos = self._origin() + offset
            url = 'https://picasaweb.google.com/data/feed/api/all?q={0}&access={1}&kind={2}&start-index={3}&max-results={4}'.format(
                urllib2.quote(query.search_terms), self.access, self.kind, pos,
                self.resultsPerPage)

            data = urllib2.urlopen(url)
            picassaResponse = Response.parse_feed(data.read())
            picassaResponse = addThumbnailShortcut(picassaResponse)
            return picassaResponse

# urllib2 - this catches http errors due to the service being down, lack of a proxy etc
        except urllib2.URLError, e:
            raise SearchEngineError("Picassa", e, errorType='urllib2', url=url)
Пример #44
0
    def search(self, query, offset):
        """
    Search function for retrieving results from the PuppyIR Pathfinder service which searches the information centre at the Emma Children's Hospital.
  
    Parameters:
  
    * query (puppy.model.Query)

    * offset (int): result offset for the search
  
    Returns:
  
    * results puppy.model.Response
  
    Raises:
  
    * urllib2.URLError
  
    """
        try:
            pos = self._origin() + offset
            format = 'rss'
            url = "http://pathfinder.cs.utwente.nl/cgi-bin/opensearch/ekz.cgi?query={0}&page={1}&format={2}&leeftijd={3}&size={4}".format(
                urllib2.quote(query.search_terms), pos, format, self.age,
                self.resultsPerPage)

            data = urllib2.urlopen(url)
            emmaResponse = Response.parse_feed(data.read())
            emmaResponse = self.addEmmaAge(emmaResponse)
            emmaResponse = self.addEmmaItemId(emmaResponse)
            return emmaResponse

# urllib2 - this catches http errors due to the service being down, lack of a proxy etc
        except urllib2.URLError, e:
            raise SearchEngineError("PuppyIR Pathfinder Search",
                                    e,
                                    errorType='urllib2',
                                    url=url)
Пример #45
0
  def search(self, query, offset):
    """
    Search function for Picassa.
    
    Parameters:
    
    * query (puppy.model.OpenSearch.Query)

    * offset (int): result offset for the search
    
    Returns:
    
    * puppy.model.OpenSearch.Response
    
    Raises:
    
    * urllib2.URLError
    
    """
    def addThumbnailShortcut(picassaResponse):
      """This goes through the results and adds a thumbnail shortcut"""
      for result in picassaResponse.entries:
        result['thumbnail'] = result['media_thumbnail'][2]['url']
      return picassaResponse

    try:
      pos = self._origin() + offset    
      url ='https://picasaweb.google.com/data/feed/api/all?q={0}&access={1}&kind={2}&start-index={3}&max-results={4}'.format(urllib2.quote(query.search_terms), self.access, self.kind, pos, self.resultsPerPage)
    
      data = urllib2.urlopen(url)
      picassaResponse = Response.parse_feed(data.read())
      picassaResponse = addThumbnailShortcut(picassaResponse)
      return picassaResponse

	# urllib2 - this catches http errors due to the service being down, lack of a proxy etc
    except urllib2.URLError, e:
      raise SearchEngineError("Picassa", e, errorType = 'urllib2', url = url)
Пример #46
0
    def parse_wordnik_json(site, query, results, url):
      """      
      Wordnik's search API returns results in JSON format. This function simply loads the JSON into memory and creates an equivalent representation that is OpenSearch compliant.
      
      Parameters:
      
      * site (str): search engine name
      * query (str): query search terms (n.b. not a OpenSearch Query object)
      * results (dict): results from service
      * url (str): the url for the results that were retrieved to use as the OpenSearch link for the response
      
      Returns:
      
      * puppy.model.OpenSearch.Response
      
      """
      response = Response()
      response.version = 'json'
      response.feed.setdefault('title', "{0}: {1}".format(site, query))
      response.feed.setdefault('link', url)
      response.feed.setdefault('description', "Search results for '{0}' at {1}".format(query, site))
      response.namespaces.setdefault("opensearch", "http://a9.com/-/spec/opensearch/1.1/")

      try:
        response.feed.setdefault("opensearch_itemsperpage", self.resultsPerPage)
        response.feed.setdefault("opensearch_startindex", 0)
      except KeyError:
        response.feed.setdefault("opensearch_itemsperpage", 0)
        response.feed.setdefault("opensearch_startindex", 0)
	
      if (self.source == 'Suggestions') and ('suggestions' in results):
        response.entries = parseSuggestionsJson(results, query, url)
        response.feed.setdefault("opensearch_totalresults", int(len(results['suggestions'])))
      elif (self.source == 'Examples') and ('examples' in results):
        response.entries = parseExamplesJson(results, url)
        response.feed.setdefault("opensearch_totalresults", int(len(results['examples'])))
      elif (self.source == 'Definitions'):
        response.entries = parseDefinitionsJson(results, url)
        response.feed.setdefault("opensearch_totalresults", int(len(results)))
      else:
        response.feed.setdefault("opensearch_totalresults", 0)
      
      return response
Пример #47
0
class SearchServiceIterable(SearchService):
    """A SearchService that can be used as an iterator"""
    def __init__(self, service_manager, name, query=None):
        """Constructor for Service."""

        super(SearchServiceIterable, self).__init__(service_manager, name)

        # for iterator
        self.query = query
        self.resultsBuffer = Response()
        # This is an indication about the Response is not a real response, and we need to ask for a real response in Next
        self.resultsBuffer.feed['opensearch_totalresults'] = sys.maxint
        self.next_result = 0

    def set_query(self, query):
        """
      Sets the query

      It is mandatory to set a query, here or in the constractor. 
    """
        self.query = query

    def __iter__(self):
        """
    Iterator member function
    
    Parameters:
    
    * None
    
    Returns:
    
    * An iterarator for the object, in this case, the object itself
    """
        return self

    def next(self):
        """
    Iterator member function
    
    Parameters:
    
    * None
    
    Returns:
    
    * The next item of the iterator, the next entry in the results
    """
        #print " total results  ", self.resultsBuffer.get_totalresults()
        #print " items per page ", self.resultsBuffer.get_itemsperpage()
        if self.query == None:
            print "None to search!!"
            #None to search!! The loop has finished
            raise StopIteration
        if len(self.resultsBuffer.entries) == 0:
            #print self.resultsBuffer.get_totalresults(), " == ", self.next_result
            if (int(self.resultsBuffer.get_totalresults()) <=
                    self.next_result):
                raise StopIteration
            self.resultsBuffer = self.search(self.query, self.next_result)
            self.next_result += int(self.resultsBuffer.get_itemsperpage())

        if len(self.resultsBuffer.entries) == 0:
            raise StopIteration
        else:
            return self.resultsBuffer.entries.pop(0)

    def pull(self, numberItems):
        """
    Iterator member function. It gives a list of results.

    Note: Usually, "pull" returns an iterator. Here it returns a list (that, in turn, is iterable), The reason is
    that the list is easy to store in a variable. It is the only easy way to repear the results, because when
    the exclusion filter change el number of responses, going backwards  
    
    Parameters:
    
    * NumberItems: Number of demanded items
    
    Returns:
    
    * A list with the next numberItems items.
    """
        listItems = []
        for i, item in enumerate(self):
            if i < numberItems:
                listItems.append(item)
            else:
                break
        return listItems
Пример #48
0
        def parse_bing_xml_response(site, query, results, offset=0):



            def extractElementString(node, element):
                res =node.find(element)
                if res:
                    return res.string
                else:
                    return ''

            xmlSoup = BeautifulSoup(results)

            response = Response()
            response.version = 'xml'
            response.feed.setdefault('title', "{0}: {1}".format(site, query))
            response.feed.setdefault('description', "Search results for {0} at {1}".format(query, site))
            response.feed.setdefault('link', '')
            response.namespaces.setdefault('opensearch', 'http://a9.com/-/spec/opensearch/1.1/')

            resultCount = 0
            resultsRetrieved = 0
            for r in xmlSoup.findAll('entry'):

                # These element are in Web
                xmlTitleData = extractElementString(r, 'd:title')
                xmlURLData = extractElementString(r,'d:url')
                xmlDescriptionData = extractElementString(r,'d:description')
                # These elements are in News
                xmlSource = extractElementString(r, 'd:source')
                xmlDate = extractElementString(r, 'd:date')

                result_dict = {'title': xmlTitleData, 'link': xmlURLData, 'summary': xmlDescriptionData, 'source': xmlSource, 'date': xmlDate }

                # These elements are in Images
                xmlLink = extractElementString(r, 'd:mediaurl')
                if xmlLink: result_dict['link'] = xmlLink

                xmlSourceUrl = extractElementString(r, 'd:sourceurl')
                if xmlSourceUrl: result_dict['sourceLink'] = xmlSourceUrl

                xmlDisplayLink = extractElementString(r,'d:displayurl')
                if xmlDisplayLink: result_dict['displayLink'] = xmlDisplayLink

                xmlWidth = extractElementString(r,'d:width')
                if xmlWidth: result_dict['width'] = xmlWidth

                xmlHeight = extractElementString(r,'d:height')
                if xmlHeight: result_dict['height'] = xmlHeight

                thumbnail = r.find('d:thumbnail')

                if thumbnail:
                    xmlThumbnail = extractElementString(thumbnail,'d:mediaurl')
                    if xmlThumbnail: result_dict['thumbnail'] = xmlThumbnail

                    xmlThumbnailWidth = extractElementString(thumbnail,'d:width')
                    if xmlThumbnailWidth: result_dict['thumbnailWidth'] = xmlThumbnailWidth

                    xmlThumbnailHeight = extractElementString(thumbnail,'d:height')
                    if xmlThumbnailHeight: result_dict['thumbnailHeight'] = xmlThumbnailHeight



                response.entries.append(result_dict)
                resultsRetrieved += 1
                resultCount += 1

            response.feed.setdefault('opensearch_totalresults', resultCount+offset)
            response.feed.setdefault('opensearch_startindex', offset)
            response.feed.setdefault('opensearch_itemsperpage', resultsRetrieved)

            return response
Пример #49
0
    def search(self, query, offset):
        """
    Search function for YouTube.
    
    Parameters:
    
    * query (puppy.model.OpenSearch.Query)

    * offset (int): result offset for the search
    
    Returns:
    
    * puppy.model.OpenSearch.Response
    
    Raises:
    
    * urllib2.URLError
    
    """
        def addExtraFields(youtubeResponse):
            """This goes through the results and adds: the summary field, the embed url and adds a thumbnail shortcut."""
            for result in youtubeResponse.entries:
                author = result['author']
                fullDescription = result[
                    'media_group']  # This is author+description+'youtube'
                result['summary'] = fullDescription[
                    len(author):len(fullDescription) -
                    7]  #Remove author from start and 'youtube' from end - Perhaps find more elegant method
                result['embedUrl'] = 'http://www.youtube.com/embed/' + result[
                    'id'].split(':video:')[1]

                if len(
                        result['media_thumbnail']
                ) >= 2:  # If we have 2 or more thumbnails use the second (hq thumbnail)
                    result['thumbnail'] = result['media_thumbnail'][1]['url']
                elif len(
                        result['media_thumbnail']
                ) == 1:  # Otherwise use the first (it's pretty low res compared to above)
                    result['thumbnail'] = result['media_thumbnail'][0]['url']
                else:
                    result[
                        'thumbnail'] = ''  # If that fails just leave it blank

            return youtubeResponse

        try:
            pos = self._origin() + (offset * self.resultsPerPage)
            url = 'http://gdata.youtube.com/feeds/api/videos?q={0}&max-results={1}&safeSearch={2}&start-index={3}&orderby={4}&v=2'.format(
                urllib2.quote(query.search_terms), self.resultsPerPage,
                self.safeSearch, pos, self.orderBy)

            if self.format:
                url += "&format={0}".format(self.format)

            if self.location and self.locationRadius:
                url += "&location-radius={0}&location={1}".format(
                    self.locationRadius, self.location)
                if self.onlyLocation == True:
                    url += '!'  # This forces YouTube to only return results with a location

            data = urllib2.urlopen(url)
            youtubeResponse = Response.parse_feed(data.read())
            youtubeResponse = addExtraFields(
                youtubeResponse
            )  # Does some processing to get embed url, summary and thumbnail shortcut
            return youtubeResponse

        # urllib2 - this catches http errors due to the service being down, lack of a proxy etc
        except urllib2.URLError, e:
            raise SearchEngineError("YouTube V2",
                                    e,
                                    errorType='urllib2',
                                    url=url)
Пример #50
0
        def parse_itunes_json(site, url, query, results):
            """Create a OpenSearch Response from iTunes results.
      
      iTunes's search API returns results in JSON format. This function simply loads the JSON into memory and creates an equivalent representation that is OpenSearch compliant.
      
      Parameters:
      
      * site (str): search engine name
      * url (str): the url for the results that were retrieved to use as the OpenSearch link for the response
      * query (str): query search terms (n.b. not a OpenSearch Query object)
      * results (dict): results from service
      
      Returns:
      
      * puppy.model.OpenSearch.Response
      
      """
            response = Response()
            response.version = 'json'
            response.feed.setdefault('title', "{0}: {1}".format(site, query))
            response.feed.setdefault('link', url)
            response.feed.setdefault(
                'description',
                "Search results for '{0}' at {1}".format(query, site))
            response.namespaces.setdefault(
                "opensearch", "http://a9.com/-/spec/opensearch/1.1/")

            try:
                response.feed.setdefault("opensearch_totalresults",
                                         int(results['resultCount']))
                response.feed.setdefault("opensearch_itemsperpage",
                                         self.resultsPerPage)
                response.feed.setdefault("opensearch_startindex", 0)
            except KeyError:
                response.feed.setdefault("opensearch_totalresults", 0)
                response.feed.setdefault("opensearch_itemsperpage", 0)
                response.feed.setdefault("opensearch_startindex", 0)

            for result in results['results']:
                try:
                    result_dict = result
                    result_dict['title'] = ''
                    result_dict['link'] = ''

                    # For ease of access if we have a thumbnail store it under that name as well
                    if 'artworkUrl60' in result:
                        result_dict['thumbnail'] = result['artworkUrl60']

                    # If we have a trackname use it - this is iTunes's default for title
                    if 'trackName' in result:
                        result_dict['title'] = result['trackName']

                        # Use censored track name instead if explicit content should be avoided
                        if (self.explicit == False) and ('trackCensoredName'
                                                         in result):
                            result_dict['title'] = result['trackCensoredName']

                        result_dict['summary'] = "{0} by {1}".format(
                            result_dict['title'], result['artistName'])

                    # Otherwise see if there's a collection name - if we have a collection of videos or songs it will use this not the above
                    elif 'collectionName' in result:
                        result_dict['title'] = result['collectionName']

                        # Use censored collection name instead if explicit content should be avoided
                        if (self.explicit
                                == False) and ('collectionCensoredName'
                                               in result):
                            result_dict['title'] = result[
                                'collectionCensoredName']

                        result_dict[
                            'summary'] = "An item by {0} from the collection {1}".format(
                                result['artistName'], result_dict['title'])

                    # If we have a description then use this instead of the above for the summary
                    if 'longDescription' in result:
                        result_dict['summary'] = result['longDescription']
                    elif 'shortDescription' in result:
                        result_dict['summary'] = result['shortDescription']

                    # Track is the default - same as above this is the iTunes default link for an item
                    if 'trackViewUrl' in result:
                        result_dict['link'] = result['trackViewUrl']
                        # Next check if there's a collection - if its a collection it will use this
                    elif 'collectionViewUrl' in result:
                        result_dict['link'] = result['collectionViewUrl']
                    # Finally artist - this is the final fallback for a link to this item, a link to the artist page
                    elif 'artistViewUrl' in result:
                        result_dict['link'] = result['artistViewUrl']

                    response.entries.append(result_dict)

                except Exception, e:  # If there is a parsing problem, print out an error and just skip this individual result
                    print "Skipping a result due to: {0} \nWhen parsing a result from: {1}\n".format(
                        e, url)
                    continue