示例#1
0
class YouTubeV2(SearchEngine):
    """
  YouTube search engine API version 2.

  The orderBy parameter allows results to be filtered by their language relevence - see below for more.
  
  N.B. in the text below replace <languageCode> with a code i.e. English: 'en', Dutch: 'nl' depending upon your applications needs.

  Parameters:

  * resultsPerPage (int): results per page
  
  * safeSearch (str) : default is strict it's not recommended to change this

  * orderBy: (str)  rating, viewCount, relevance, relevance_lang_<languageCode>

  * format (int): this defines if videos must conform to a standard for example 5 means only videos that can be embedded
      
  * location (str): defines the location the videos should be from, in the format 'lat,lon'
      
  * locationRadius (str): format is '<radius><unit>' the radius around the location, within which results should be return from
                          the valid units are: m, km, ft and mi
      
  * onlyLocation (boolean): only return results with a location (i.e. a geotag) 
  """
    def __init__(self,
                 service,
                 resultsPerPage=8,
                 safeSearch='strict',
                 orderBy='relevance',
                 format=None,
                 location=None,
                 locationRadius=None,
                 onlyLocation=False,
                 **args):
        super(YouTubeV2, self).__init__(service, **args)
        self.resultsPerPage = resultsPerPage
        self.safeSearch = safeSearch
        self.orderBy = orderBy
        self.format = format
        self.location = location
        self.locationRadius = locationRadius
        self.onlyLocation = onlyLocation

    def _origin(self):
        """ This overrides SearchEngine's default origin (for results from a search engine) for Youtube V2 """
        return 1

    def search(self, query, offset):
        """
    Search function for YouTube.
    
    Parameters:
    
    * query (puppy.model.OpenSearch.Query)

    * offset (int): result offset for the search
    
    Returns:
    
    * puppy.model.OpenSearch.Response
    
    Raises:
    
    * urllib2.URLError
    
    """
        def addExtraFields(youtubeResponse):
            """This goes through the results and adds: the summary field, the embed url and adds a thumbnail shortcut."""
            for result in youtubeResponse.entries:
                author = result['author']
                fullDescription = result[
                    'media_group']  # This is author+description+'youtube'
                result['summary'] = fullDescription[
                    len(author):len(fullDescription) -
                    7]  #Remove author from start and 'youtube' from end - Perhaps find more elegant method
                result['embedUrl'] = 'http://www.youtube.com/embed/' + result[
                    'id'].split(':video:')[1]

                if len(
                        result['media_thumbnail']
                ) >= 2:  # If we have 2 or more thumbnails use the second (hq thumbnail)
                    result['thumbnail'] = result['media_thumbnail'][1]['url']
                elif len(
                        result['media_thumbnail']
                ) == 1:  # Otherwise use the first (it's pretty low res compared to above)
                    result['thumbnail'] = result['media_thumbnail'][0]['url']
                else:
                    result[
                        'thumbnail'] = ''  # If that fails just leave it blank

            return youtubeResponse

        try:
            pos = self._origin() + (offset * self.resultsPerPage)
            url = 'http://gdata.youtube.com/feeds/api/videos?q={0}&max-results={1}&safeSearch={2}&start-index={3}&orderby={4}&v=2'.format(
                urllib2.quote(query.search_terms), self.resultsPerPage,
                self.safeSearch, pos, self.orderBy)

            if self.format:
                url += "&format={0}".format(self.format)

            if self.location and self.locationRadius:
                url += "&location-radius={0}&location={1}".format(
                    self.locationRadius, self.location)
                if self.onlyLocation == True:
                    url += '!'  # This forces YouTube to only return results with a location

            data = urllib2.urlopen(url)
            youtubeResponse = Response.parse_feed(data.read())
            youtubeResponse = addExtraFields(
                youtubeResponse
            )  # Does some processing to get embed url, summary and thumbnail shortcut
            return youtubeResponse

        # urllib2 - this catches http errors due to the service being down, lack of a proxy etc
        except urllib2.URLError, e:
            raise SearchEngineError("YouTube V2",
                                    e,
                                    errorType='urllib2',
                                    url=url)

        # Check for a type error for offset or resultsPerPage
        except TypeError, e:
            note = "Please ensure that 'offset' and 'resultsPerPage' are integers if used"
            if isinstance(offset, int) == False:
                raise SearchEngineError("YouTube V2",
                                        e,
                                        note=note,
                                        offsetType=type(offset))

            if isinstance(self.resultsPerPage, int) == False:
                raise SearchEngineError("YouTube V2",
                                        e,
                                        note=note,
                                        resultsPerPageType=type(
                                            self.resultsPerPage))

            raise SearchEngineError("YouTube V2", e, url=url)
示例#2
0
                urllib2.quote(query.search_terms), pos)

            data = urllib2.urlopen(url)
            return Response.parse_feed(data.read())

        # urllib2 - this catches http errors due to the service being down, lack of a proxy etc
        except urllib2.URLError, e:
            raise SearchEngineError("YouTube", e, errorType='urllib2', url=url)

        # Check for a type error for offset or resultsPerPage
        except TypeError, e:
            note = "Please ensure that 'offset' and 'resultsPerPage' are integers if used"
            if isinstance(offset, int) == False:
                raise SearchEngineError("YouTube",
                                        e,
                                        note=note,
                                        offsetType=type(offset))

            if isinstance(self.resultsPerPage, int) == False:
                raise SearchEngineError("YouTube",
                                        e,
                                        note=note,
                                        resultsPerPageType=type(
                                            self.resultsPerPage))

                raise SearchEngineError("YouTube", e, url=url)

        # Catch Attribute error which deals with unexpected none type for the objects the wrapper uses and other associated issues
        except AttributeError, e:
            raise SearchEngineError("YouTube", e, url=url)
示例#3
0
class Twitter(SearchEngine):
  """
  Twitter search engine.
  
  Geocode format is: latitude,longitude,radius - for example: '37.781157,-122.398720,1mi'

  Parameters:

  * language (str): en = English, de = German etc

  * type (str): what sort of results to get can be - mixed, recent, popular

  * geocode (str): to get queries around a specific location

  * includeEntities (boolean): if this is true then a lot of meta-data is included (mentions, associated images, associated urls)

  * resultsPerPage (int): results per page

  
  """
  
  def __init__(self, service, language = 'en', type = 'mixed', geocode = None, resultsPerPage = 9, includeEntities = False, **args):
    super(Twitter, self).__init__(service, **args)
    self.language = language
    self.type = type
    self.geocode = geocode
    self.resultsPerPage = resultsPerPage
    self.includeEntities = includeEntities

  def _origin(self):
    """ This overrides SearchEngine's default origin (for results from a search engine) for Twitter """
    return 1  
  
  def search(self, query, offset):
    """
    Search function for Twitter.
    
    Parameters:
    
    * query (puppy.model.OpenSearch.Query)

    * offset (int): result offset for the search
    
    Returns:
    
    * puppy.model.Response
    
    Raises:
    
    * urllib2.URLError    
    """    
    try:	
      pos = self._origin() + offset   
      url = 'http://search.twitter.com/search.atom?q={0}&lang={1}&page={2}&result_type={3}&rpp={4}&include_entities={5}'.format(urllib2.quote(query.search_terms), self.language, pos, self.type, self.resultsPerPage, self.includeEntities)
	 
      if self.geocode:
        url += '&geocode:{0}'.format(self.geocode)

      data = urllib2.urlopen(url)
      return Response.parse_feed(data.read())

    # urllib2 - this catches http errors due to the service being down, lack of a proxy etc
    except urllib2.URLError, e:
      raise SearchEngineError("Twitter", e, errorType = 'urllib2', url = url)

    # Check for a type error for offset or resultsPerPage
    except TypeError, e:
      note = "Please ensure that 'offset' and 'resultsPerPage' are integers if used"
      if isinstance(offset, int) == False:
        raise SearchEngineError("Twitter", e, note = note, offsetType = type(offset))

      if isinstance(self.resultsPerPage, int) == False:
        raise SearchEngineError("Twitter", e, note = note, resultsPerPageType = type(self.resultsPerPage))

      raise SearchEngineError("Twitter", e, url = url)
示例#4
0
class SoundCloud(SearchEngine):
    """
  SoundCliud search engine wrapper for a music sharing application allowing the searching for tracks.

  You must include your api key for Wordnik in your service manage config to use this service. It should be under the identifier "soundcloud_api_key"

  Parameters:

  * resultsPerPage (int): the number of results to return for a search query

  * order (str): the order to return results in, valid values are 'created_at' and 'hotness' (this later one being popularity of tracks)

  * tags (str): a comma separated string of tags to look for along with the query

  * filter (str): filter via the access category, valid values are: 'all', 'public', 'private', 'streamable', 'downloadable'

  * genres (str):  a comma separated string of genres to look for along with the query (see the SoundCloud site for a list of genres)

  * types (str): a comma separated string of types of track to look for along with the query (see the SoundCloud site for a list of types - examples are 'live' or 'demo')

  * bpmFilter (dict): filters via beats per minute, with the fields being 'from' and 'to' their values both being ints

  * durationFilter (dict): filters via duration of the track, with the fields being 'from' and 'to' their values both being ints with the units being milliseconds

  * createdFilter (dict): filters via when the track was created, with the fields being a string of format: 'yyyy-mm-dd hh:mm:ss'

  """
    def __init__(self,
                 service,
                 resultsPerPage=8,
                 order=None,
                 tags=None,
                 filter=None,
                 genres=None,
                 types=None,
                 bpmFilter=None,
                 durationFilter=None,
                 createdFilter=None,
                 **args):
        SearchEngine.__init__(self, service, **args)

        self.resultsPerPage = resultsPerPage
        self.order = order
        self.tags = tags
        self.filter = filter
        self.genres = genres
        self.types = types
        self.bpmFilter = bpmFilter
        self.durationFilter = durationFilter
        self.createdFilter = createdFilter

    def search(self, query, offset):
        """
    Search function for SoundCloud Search.        
    
    Parameters:
    
    * query (puppy.model.Query)

    * offset (int): result offset for the search
    
    Returns:
    
    * puppy.model.Response
    
    Raises:
    
    * urllib2.URLError
    
    """
        def parse_soundcloud_json(site, query, results, url, offset):
            """      
      SoundCloud's search API returns results in JSON format. This function simply loads the JSON into memory and creates an equivalent representation that is OpenSearch compliant.
      
      Parameters:
      
      * site (str): search engine name
      * query (str): query search terms (n.b. not a OpenSearch Query object)
      * results (dict): results from service
      * url (str): the url for the results that were retrieved to use as the OpenSearch link for the response
      * offset (int): which page of results we are retrieving
      
      Returns:
      
      * puppy.model.OpenSearch.Response
      
      """
            response = Response()
            response.version = 'json'
            response.feed.setdefault('title', "{0}: {1}".format(site, query))
            response.feed.setdefault('link', url)
            response.feed.setdefault(
                'description',
                "Search results for '{0}' at {1}".format(query, site))
            response.namespaces.setdefault(
                "opensearch", "http://a9.com/-/spec/opensearch/1.1/")

            try:
                response.feed.setdefault("opensearch_itemsperpage",
                                         self.resultsPerPage)
                response.feed.setdefault("opensearch_totalresults",
                                         int(len(results)))
                response.feed.setdefault("opensearch_startindex", 0)
            except KeyError:
                response.feed.setdefault("opensearch_totalresults", 0)
                response.feed.setdefault("opensearch_itemsperpage", 0)
                response.feed.setdefault("opensearch_startindex", 0)

            # There is no pagination as a parameter, all results are simple returned in one, so this mimics pagination
            startIndex = offset * self.resultsPerPage

            if (startIndex + self.resultsPerPage) > len(results):
                endIndex = len(results)
            else:
                endIndex = startIndex + self.resultsPerPage

            # Go through a subset of the results and grab them - corresponding to the page in question
            for i in range(startIndex, endIndex):
                try:
                    result_dict = results[i]
                    result_dict['summary'] = results[i]['description']
                    result_dict['link'] = results[i]['permalink_url']
                    result_dict['artist'] = results[i]['user']['username']
                    response.entries.append(result_dict)
                except Exception, e:
                    print "Skipping a result due to: {0} \nWhen parsing a result from: {1}\n".format(
                        e, url)
                    continue

            return response

        # Try and get the API key from config, if it's not there raise an API Key error - the application will have to deal with this
        try:
            apiKey = self.service.config["soundcloud_api_key"]
        except KeyError:
            raise ApiKeyError("SoundCloud Search API", "soundcloud_api_key")

        try:
            url = "http://api.soundcloud.com/tracks.json?client_id={0}&q={1}".format(
                apiKey, urllib2.quote(query.search_terms))

            if self.order:  # If we have set a non default ordering of results
                url += "&order={0}".format(self.order)

            if self.tags:  # If we have defined tags to search for along with the query
                url += "&tags={0}".format(self.tags)

            if self.filter:  # If we are filtered based on public, private, streamable etc
                url += "&filter={0}".format(self.filter)

            if self.genres:  # If we are filtering by genre
                url += "&genres={0}".format(self.genres)

            if self.types:  # If we are filtering by type i.e. demo etc
                url += "&types={0}".format(self.types)

            # If we are filtering by bpm, beats per minute, with a minimum value
            if (self.bpmFilter) and ('from' in self.bpmFilter):
                url += "&bpm[from]={0}".format(self.bpmFilter['from'])

            # If we are filtering by bpm, beats per minute, with a maximum value
            if (self.bpmFilter) and ('to' in self.bpmFilter):
                url += "&bpm[to]={0}".format(self.bpmFilter['to'])

            # If we are filtering by duration with a minimum value
            if (self.durationFilter) and ('from' in self.durationFilter):
                url += "&duration[from]={0}".format(
                    self.durationFilter['from'])

            # If we are filtering by duration with a maximum value
            if (self.durationFilter) and ('to' in self.durationFilter):
                url += "&duration[to]={0}".format(self.durationFilter['to'])

            # If we are filtering by creation date with a minimum value
            if (self.createdFilter) and ('from' in self.createdFilter):
                url += "&created_at[from]={0}".format(
                    self.createdFilter['from'])

            # If we are filtering by creation date with a maximum value
            if (self.createdFilter) and ('to' in self.createdFilter):
                url += "&created_at[to]={0}".format(self.createdFilter['to'])

            data = urllib2.urlopen(url).read()
            results = json.loads(data)
            return parse_soundcloud_json('SoundCloud Search API',
                                         query.search_terms, results, url,
                                         offset)

        # urllib2 - this catches http errors due to the service being down, lack of a proxy etc
        except urllib2.URLError, e:
            raise SearchEngineError("SoundCloud Search API",
                                    e,
                                    errorType='urllib2',
                                    url=url)
示例#5
0
class Flickr(SearchEngine):
  """
  Flickr search engine.

  You must include your application's Flickr ID in your service manage config to use this service
  it should be under the identifier "flickr_api_key" 


  Parameters:

  * sortBy (str):  how we sort results, default is relevance see Flickr API for more details

  * safeSearch (int): default is 3, i.e. strict, not recommended to change this

  * mediaType (str): all, photos, videos are the options

  * resultsPerPage (int): How many results per page

  * bbox (str): replace the names with the values of the corners of the bounding box 'swLongitude,swLatitude,neLongitude,neLatitude'
  """
    
  def __init__(self, service, sortBy = 'relevance', safeSearch = 3, mediaType = 'photos', resultsPerPage = 8, bbox = None, **args):
    super(Flickr, self).__init__(service, **args)
    self.sortBy = sortBy
    self.safeSearch = safeSearch
    self.mediaType = mediaType
    self.resultsPerPage = resultsPerPage
    self.bbox = bbox

  def _origin(self):
    """ This overrides SearchEngine's default origin (for results from a search engine) for Flickr """
    return 1
    
  def search(self, query, offset):
    """Search function for Flickr Search.
        
    
    Parameters:
    
    * query (puppy.model.Query)
    
    * offset (int): result offset for the search
    
    Returns:
    
    * puppy.model.Response
    
    Raises:
    
    * urllib2.URLError
    
    """
    
    def parse_flickr_json(site, query, results):
      """Create a OpenSearch Response from Flickr results.
      
      Flickr's search API returns results in JSON format. This function simply loads the JSON into memory and creates an equivalent representation that is OpenSearch compliant.
      
      Parameters:
      
      * site (str): search engine name
      * query (str): query search terms (n.b. not a OpenSearch Query object)
      * results (dict): results from service
      
      Returns:
      
      * puppy.model.OpenSearch.Response
      
      """
      response = Response()
      response.version = 'json'
      response.feed.setdefault('title', "{0}: {1}".format(site, query))
      response.feed.setdefault('link', results['link'])
      response.feed.setdefault('description', "Search results for '{0}' at {1}".format(query, site))
      response.namespaces.setdefault("opensearch", "http://a9.com/-/spec/opensearch/1.1/")
      try:
        response.feed.setdefault("opensearch_totalresults", int(results['total']))
        response.feed.setdefault("opensearch_itemsperpage", int(results['perpage']))
        response.feed.setdefault("opensearch_startindex", int(results['page']))
      except KeyError:
        response.feed.setdefault("opensearch_totalresults", 0)
        response.feed.setdefault("opensearch_itemsperpage", 0)
        response.feed.setdefault("opensearch_startindex", 0)
      
      if 'photo' in results:
        for result in results['photo']:
          # Links need to be created from several fields - see the Flickr API for a detailed explanation
          
          try:
            resultLink = "http://www.flickr.com/photos/{0}/{1}".format(result['owner'], result['id'])
            resultThumbnail = "http://farm{0}.static.flickr.com/{1}/{2}_{3}_t.jpg".format(result['farm'], result['server'], result['id'], result['secret'])
            resultSummary = "Photo result for '{0}' from {1}".format(query, site)
            response.entries.append({'title': result['title'], 'link': resultLink, 'summary': resultSummary, 'thumbnail': resultThumbnail})
          except Exception, e:
            print "Skipping a result due to: {0} \nWhen parsing a result from: {1}\n".format(e, results['link'])
            continue
            
      return response

	# Try and get the API key from config, if it's not there raise an API Key error - the application will have to deal with this
    try:
      appId = self.service.config["flickr_api_key"]
    except KeyError:
      raise ApiKeyError("Flickr", "flickr_api_key")

    # Now that an API key has been supplied try to get results from the search engine itself
    try:    
      pos = self._origin() + offset
      appId = self.service.config["flickr_api_key"]
      url = "http://api.flickr.com/services/rest/?method=flickr.photos.search&api_key={0}&text={1}&sort={2}&safe_search={3}&media={4}&per_page={5}&page={6}&format=json&nojsoncallback=1".format(appId, urllib2.quote(query.search_terms), self.sortBy, self.safeSearch, self.mediaType, self.resultsPerPage, pos)
      
      if (self.bbox):
        url += "&bbox={0}".format(self.bbox)   
      data = urllib2.urlopen(url).read()
      results = json.loads(data)
      results['photos'].setdefault(u'link', url)
      return parse_flickr_json('Flickr', query.search_terms, results['photos'])

    # urllib2 - this catches http errors due to the service being down, lack of a proxy etc
    except urllib2.URLError, e:
      raise SearchEngineError("Flickr", e, errorType = 'urllib2', url = url)
示例#6
0
    def search(self, query, offset):
        """Search function for Microsoft Bing.

       Parameters:

       * query (puppy.model.OpenSearch.Query)

       Returns:

       * puppy.model.OpenSearch.Response

       Raises:

       * urllib2.URLError

       """
        def parse_bing_xml_response(site,
                                    query,
                                    results,
                                    numResults=10,
                                    offset=0):

            xmlSoup = BeautifulSoup(results)

            response = Response()
            response.version = 'xml'
            response.feed.setdefault('title', "{0}: {1}".format(site, query))
            response.feed.setdefault(
                'description',
                "Search results for {0} at {1}".format(query, site))
            response.feed.setdefault('link', '')
            response.namespaces.setdefault(
                'opensearch', 'http://a9.com/-/spec/opensearch/1.1/')

            resultCount = 0
            resultsRetrieved = 0
            for r in xmlSoup.findAll('entry'):
                if (resultCount >= offset) and (resultCount <
                                                (numResults + offset)):
                    xmlTitleData = r.find('d:title').string
                    xmlURLData = r.find('d:url').string
                    xmlDescriptionData = r.find('d:description').string
                    response.entries.append({
                        'title': xmlTitleData,
                        'link': xmlURLData,
                        'summary': xmlDescriptionData
                    })
                    resultsRetrieved += 1
                resultCount += 1

            response.feed.setdefault('opensearch_totalresults', resultCount)
            response.feed.setdefault('opensearch_startindex', offset)
            response.feed.setdefault('opensearch_itemsperpage',
                                     resultsRetrieved)

            return response

        # Insert relevant details for Bing API here.
        username = ""
        try:
            appId = self.service.config["bing_api_key"]
            #print appId
        except KeyError:
            raise ApiKeyError("Bing V3", "bing_api_key")
        # Try and get the API key from config, if it's not there raise an API Key error - the application will have to deal with this
        queryBingFor = "'" + query.search_terms + "'"  # REMEMBER: use apostrophes within the string, this is what Bing expects
        quoted_query = urllib.quote(queryBingFor)

        # Create the API URL
        rootURL = "https://api.datamarket.azure.com/Bing/SearchWeb/"
        searchURL = rootURL + "Web?$format=ATOM&Query=" + quoted_query

        # Add the API key to the password manager
        password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
        password_mgr.add_password(None, searchURL, username, appId)

        # Prepare an authentication handler and open the URL
        try:
            handler = urllib2.HTTPBasicAuthHandler(password_mgr)
            opener = urllib2.build_opener(handler)
            urllib2.install_opener(opener)
            xmlresponse = urllib2.urlopen(searchURL)
        except urllib2.URLError, e:
            raise SearchEngineError("Bing V3",
                                    e,
                                    errorType='urllib2',
                                    url=searchURL)
示例#7
0
class WebSpellChecker(SearchEngine):
    """
  Web Spell Checker's search engine api.

  You must include your application's Web Spell Checker Api key in your service manager config to use this service
  It should be under the identifier "web_spell_api_key"

  Parameters:

  * language (str): the language/dictionary to check again i.e. 'en_US' for American English, 'nl_NL' for Dutch etc (this is case sensative)

  """
    def __init__(self, service, language='en_GB', **args):
        super(WebSpellChecker, self).__init__(service, **args)
        self.language = language

    def search(self, query, offset):
        """
    Search function for Web Spell Search.        
    
    Parameters:
    
    * query (puppy.model.Query)
    
    Returns:
    
    * puppy.model.Response
    
    Raises:
    
    * urllib2.URLError
    
    """
        def parse_web_spell_checker_xml(site, url, query, results):
            """      
      Web Spell Checker's search API returns results in XML format. This function simply loads the XML into memory and creates an equivalent representation that is OpenSearch compliant.
      
      Parameters:
      
      * site (str): search engine name
      * url (str): the url for the results that were retrieved to use as the OpenSearch link for the response
      * query (str): query search terms (n.b. not a OpenSearch Query object)
      * results (dict): results from service
      
      Returns:
      
      * puppy.model.OpenSearch.Response
      
      """
            response = Response()
            response.version = 'xml'
            response.feed.setdefault('title', "{0}: {1}".format(site, query))
            response.feed.setdefault('link', url)
            response.feed.setdefault(
                'description',
                "Search results for '{0}' at {1}".format(query, site))
            response.namespaces.setdefault(
                "opensearch", "http://a9.com/-/spec/opensearch/1.1/")
            response.feed.setdefault("opensearch_itemsperpage", '')
            response.feed.setdefault("opensearch_startindex", 0)

            root = etree.XML(results)
            section = root.find("misspelling")
            suggestions = section.find("suggestions")

            for item in suggestions:
                try:
                    suggestion = item.text
                    spell_dict = {
                        "title":
                        "Spelling Suggestion for: '{0}'".format(query),
                        "link": ''
                    }
                    spell_dict[
                        'summary'] = "Original query: '{0}'. Suggested correction of query: '{1}'.".format(
                            query, suggestion)
                    spell_dict['suggestion'] = suggestion
                    response.entries.append(spell_dict)
                except Exception, e:
                    print "Skipping a result due to: {0} \nWhen parsing a result from: {1}\n".format(
                        e, url)
                    continue

            response.feed.setdefault("opensearch_totalresults",
                                     len(response.entries))
            return response

# Try and get the API key from config, if it's not there raise an API Key error - the application will have to deal with this

        try:
            appId = self.service.config["web_spell_api_key"]
        except KeyError:
            raise ApiKeyError("Web Spell Checker", "web_spell_api_key")

        try:
            url = "http://svc.webservius.com/v1/spellcheck/spellcheck/?wsvKey={0}&cmd=check_spelling&version=1.0&out_type=words&slang={1}&text={2}".format(
                appId, self.language, urllib2.quote(query.search_terms))
            data = urllib2.urlopen(url)
            return parse_web_spell_checker_xml('Web Spell Checker', url,
                                               query.search_terms, data.read())

        # urllib2 - this catches http errors due to the service being down, lack of a proxy etc
        except urllib2.URLError, e:
            raise SearchEngineError("Web Spell Checker",
                                    e,
                                    errorType='urllib2',
                                    url=url)
示例#8
0
class RottenTomatoes(SearchEngine):
  """
  Rotten Tomatoes search engine.

  You must include your application's Rotten Tomatoes ID in your service manage config to use this service
  it should be under the identifier "rotten_tomatoes_api_key" 

  Parameters:

  * resultsPerPage (int): How many results per page
  """
    
  def __init__(self, service, resultsPerPage = 8, **args):
    super(RottenTomatoes, self).__init__(service, **args)
    self.resultsPerPage = resultsPerPage

  def _origin(self):
    """ This overrides SearchEngine's default origin (for results from a search engine) for Rotten Tomatoes """
    return 1
    
  def search(self, query, offset):
    """
    Search function for Rotten Tomatoes Search.        
    
    Parameters:
    
    * query (puppy.model.Query)
    
    Returns:
    
    * puppy.model.Response
    
    Raises:
    
    * urllib2.URLError    
    """
    
    def parse_rotten_tomatoes_json(site, pos, query, results):
      """      
      Rotten Tomatoes's search API returns results in JSON format. This function simply loads the JSON into memory and creates an equivalent representation that is OpenSearch compliant.
      
      Parameters:
      
      * site (str): search engine name
      * query (str): query search terms (n.b. not a OpenSearch Query object)
      * results (dict): results from service
      
      Returns:
      
      * puppy.model.OpenSearch.Response      
      """
      response = Response()
      response.version = 'json'
      response.feed.setdefault('title', "{0}: {1}".format(site, query))
      response.feed.setdefault('link', results['links']['self'])
      response.feed.setdefault('description', "Search results for '{0}' at {1}".format(query, site))
      response.namespaces.setdefault("opensearch", "http://a9.com/-/spec/opensearch/1.1/")
      
      try:
        response.feed.setdefault("opensearch_totalresults", int(results['total']))
        response.feed.setdefault("opensearch_itemsperpage", self.resultsPerPage)
        response.feed.setdefault("opensearch_startindex", pos)
      except KeyError:
        response.feed.setdefault("opensearch_totalresults", 0)
        response.feed.setdefault("opensearch_itemsperpage", 0)
        response.feed.setdefault("opensearch_startindex", 0)
      
      for result in results['movies']:
        try:
          movie_dict = result
          movie_dict['link'] = result['links']['alternate']
          movie_dict['summary'] = result['synopsis']
          response.entries.append(movie_dict)
        except Exception, e:
          print "Skipping a result due to: {0} \nWhen parsing a result from: {1}\n".format(e, url)
          continue
      
      return response

	# Try and get the API key from config, if it's not there raise an API Key error - the application will have to deal with this
    try:
      appId = self.service.config["rotten_tomatoes_api_key"]
    except KeyError:
      raise ApiKeyError("Rotten Tomatoes", "rotten_tomatoes_api_key")

    # Now that an API key has been supplied try to get results from the search engine itself
    try:    
      pos = self._origin() + offset
      url = "http://api.rottentomatoes.com/api/public/v1.0/movies.json?apikey={0}&q={1}&page_limit={2}&page={3}".format(appId, urllib2.quote(query.search_terms), int(self.resultsPerPage), pos)
      data = urllib2.urlopen(url).read()
      results = json.loads(data)
      return parse_rotten_tomatoes_json('Rotten Tomatoes', pos, query.search_terms, results)

    # urllib2 - this catches http errors due to the service being down, lack of a proxy etc
    except urllib2.URLError, e:
      raise SearchEngineError("Rotten Tomatoes", e, errorType = 'urllib2', url = url)
示例#9
0
                    continue
            return response

        try:
            pos = self._origin() + offset
            serviceName = self.source[:len(self.source) - 1]
            url = "http://ws.spotify.com/search/1/{0}.json?q={1}&page={2}".format(
                serviceName, urllib2.quote(query.search_terms), pos)
            data = urllib2.urlopen(url).read()
            results = json.loads(data)
            return parse_spotify_json('Spotify', url, query.search_terms,
                                      results)

        # urllib2 - this catches http errors due to the service being down, lack of a proxy etc
        except urllib2.URLError, e:
            raise SearchEngineError("Spotify", e, errorType='urllib2', url=url)

        # Check for a type error for offset
        except TypeError, e:
            if isinstance(offset, int) == False:
                note = "Please ensure that 'offset' is an integer."
                raise SearchEngineError("Spotify",
                                        e,
                                        note=note,
                                        offsetType=type(offset))

            raise SearchEngineError("Spotify", e, url=url)

        # Catch Attribute error which deals with unexpected none type for the objects the wrapper uses and other associated issues
        except AttributeError, e:
            raise SearchEngineError("Spotify", e, url=url)
示例#10
0
class Digg(SearchEngine):
    """
  Digg search engine wrapper.

  Parameters:

  * resultsPerPage (int): How many results per page
 
  * sort (str): how to sort results (see Digg site for a list of the options) an example is 'submit_date-desc' to sort via the item's submit date

  * topic (str): restrict the search to a specific topic (see Digg site for a list of them)

  * media (str): options are: 'all', 'news', 'videos', 'images'

  * max_date (unix timestamp - converted to str): latest date results returned were posted

  * min_date (unix timestamp - converted to str): earliest date results returned were posted

  """
    def __init__(self,
                 service,
                 resultsPerPage=8,
                 sort=None,
                 topic=None,
                 media='all',
                 max_date=None,
                 min_date=None,
                 **args):
        super(Digg, self).__init__(service, **args)
        self.resultsPerPage = resultsPerPage
        self.sort = sort
        self.topic = topic
        self.media = media
        self.max_date = max_date
        self.min_date = min_date

    def search(self, query, offset):
        """Search function for Digg Search.
        
    
    Parameters:
    
    * query (puppy.model.Query)
    
    * offset (int): result offset for the search
    
    Returns:
    
    * puppy.model.Response
    
    Raises:
    
    * urllib2.URLError
    
    """
        def parse_digg_json(site, url, pos, query, results):
            """Create a OpenSearch Response from Digg results.
      
      Digg's search API returns results in JSON format. This function simply loads the JSON into memory and creates an equivalent representation that is OpenSearch compliant.
      
      Parameters:
      
      * site (str): search engine name
      * url (str): the url for the results that were retrieved to use as the OpenSearch link for the response
      * pos(int): which page number we're on
      * query (str): query search terms (n.b. not a OpenSearch Query object)
      * results (dict): results from service
      
      Returns:
      
      * puppy.model.OpenSearch.Response
      
      """
            response = Response()
            response.version = 'json'
            response.feed.setdefault('title', "{0}: {1}".format(site, query))
            response.feed.setdefault('link', url)
            response.feed.setdefault(
                'description',
                "Search results for '{0}' at {1}".format(query, site))
            response.namespaces.setdefault(
                "opensearch", "http://a9.com/-/spec/opensearch/1.1/")
            try:
                response.feed.setdefault("opensearch_totalresults",
                                         results['total'])
                response.feed.setdefault("opensearch_itemsperpage",
                                         self.resultsPerPage)
                response.feed.setdefault("opensearch_startindex", pos)
            except KeyError:
                response.feed.setdefault("opensearch_totalresults", 0)
                response.feed.setdefault("opensearch_itemsperpage", 0)
                response.feed.setdefault("opensearch_startindex", 0)

            for result in results['stories']:
                try:
                    item_dict = result  # See Digg for the result format for all the other data
                    item_dict['summary'] = result['description']
                    item_dict['link'] = result['href']
                    response.entries.append(item_dict)
                except Exception, e:  # If there is a parsing problem, print out an error and just skip this individual result
                    print "Skipping a result due to: {0} \nWhen parsing a result from: {1}\n".format(
                        e, url)
                    continue

            return response

        try:
            pos = self._origin() + offset
            url = "http://services.digg.com/2.0/search.search?query={0}&count={1}&offset={2}&media={3}".format(
                urllib2.quote(query.search_terms), self.resultsPerPage, pos,
                self.media)

            if self.topic:
                url += "&topic={0}".format(self.topic)

            if self.sort:
                url += "&sort={0}".format(self.sort)

            if self.max_date:
                url += "&max_date={0}".format(self.max_date)

            if self.min_date:
                url += "&min_date={0}".format(self.min_date)

            data = urllib2.urlopen(url).read()
            results = json.loads(data)
            return parse_digg_json('Digg', url, pos, query.search_terms,
                                   results)

        # urllib2 - this catches http errors due to the service being down, lack of a proxy etc
        except urllib2.URLError, e:
            raise SearchEngineError("Digg", e, errorType='urllib2', url=url)
示例#11
0
    # Now that an API key has been supplied try to get results from the search engine itself
    try:    
      pos = self._origin() + offset
      url = "http://api.rottentomatoes.com/api/public/v1.0/movies.json?apikey={0}&q={1}&page_limit={2}&page={3}".format(appId, urllib2.quote(query.search_terms), int(self.resultsPerPage), pos)
      data = urllib2.urlopen(url).read()
      results = json.loads(data)
      return parse_rotten_tomatoes_json('Rotten Tomatoes', pos, query.search_terms, results)

    # urllib2 - this catches http errors due to the service being down, lack of a proxy etc
    except urllib2.URLError, e:
      raise SearchEngineError("Rotten Tomatoes", e, errorType = 'urllib2', url = url)

	# Check for a value error with resultsPerPage
    except ValueError, e:
      note = "Please ensure that 'resultsPerPage' is an integer."
      raise SearchEngineError("Rotten Tomatoes", e, note = note, resultsPerPageType = type(self.resultsPerPage))

    # Check for a type error for offset or resultsPerPage
    except TypeError, e:
      note = "Please ensure that 'offset' and 'resultsPerPage' are both integers."
      if isinstance(offset, int) == False:
        raise SearchEngineError("Rotten Tomatoes", e, note = note, offsetType = type(offset))

      if isinstance(self.resultsPerPage, int) == False:
        raise SearchEngineError("Rotten Tomatoes", e, note = note, resultsPerPageType = type(self.resultsPerPage))

      raise SearchEngineError("Rotten Tomatoes", e, url = url)
	  
    # Catch Attribute error which deals with unexpected none type for the objects the wrapper uses and other associated issues
    except AttributeError, e:
      raise SearchEngineError("Rotten Tomatoes", e, url = url)
示例#12
0
    # Now that an API key has been supplied try to get results from the search engine itself
    try:
      pos = self._origin() + offset
      url = "http://ws.audioscrobbler.com/2.0/?method={0}.search&{0}={1}&api_key={2}&limit={3}&page={4}&format=json".format(self.source, urllib2.quote(query.search_terms), appId, self.resultsPerPage, pos)

      if self.artist and self.source == 'track':
        url += "&artist={0}".format(self.artist)

      data = urllib2.urlopen(url).read()
      results = json.loads(data)
      return parse_last_fm_json('LastFM', url, query.search_terms, results['results'], pos)

    # urllib2 - this catches http errors due to the service being down, lack of a proxy etc
    except urllib2.URLError, e:
      raise SearchEngineError("LastFM", e, errorType = 'urllib2', url = url)

	# Check for a value error with resultsPerPage
    except ValueError, e:
      note = "Please ensure that 'resultsPerPage' is an integer"
      raise SearchEngineError("LastFM", e, note = note, resultsPerPageType = type(self.resultsPerPage))

    # Check for a type error for offset or resultsPerPage
    except TypeError, e:
      note = "Please ensure that 'offset' and 'resultsPerPage' are both integers."
      if isinstance(offset, int) == False:
        raise SearchEngineError("LastFM", e, note = note, offsetType = type(offset))

      if isinstance(self.resultsPerPage, int) == False:
        raise SearchEngineError("LastFM", e, note = note, resultsPerPageType = type(self.resultsPerPage))
示例#13
0
class Yahoo(SearchEngine):
  """
  Yahoo search engine.

  You must include your application's Yahoo ID in your service manage config to use this service. It should be under the identifier "yahoo_api_key"
  """
    
  def __init__(self, service, **args):
    super(Yahoo, self).__init__(service, **args)  
    
  def search(self, query, offset):
    """Search function for Yahoo! BOSS Search.
    
    Parameters:
    
    * query (puppy.model.Query)

    * offset (int): result offset for the search
    
    Returns:
    
    * puppy.model.Response
    
    Raises:
    
    * urllib2.URLError
    
    """
    
    def parse_yahoo_json(site, query, results):
      """Create a OpenSearch Response from Yahoo! BOSS results.
      
      Yahoo!'s search API returns results in JSON format. This function simply loads the JSON into memory and creates an equivalent representation that is OpenSearch compliant.
      
      Parameters:
      
      * site (str): search engine name
      * query (str): query search terms (n.b. not a OpenSearch Query object)
      * results (dict): results from service
      
      Returns:
      
      * puppy.model.OpenSearch.Response
      
      """
      response = Response()
      response.version = 'json'
      response.feed.setdefault('title', "{0}: {1}".format(site, query))
      response.feed.setdefault('link', results['link'])
      response.feed.setdefault('description', "Search results for '{0}' at {1}".format(query, site))
      response.namespaces.setdefault("opensearch", "http://a9.com/-/spec/opensearch/1.1/")
      
      try:
        response.feed.setdefault("opensearch_totalresults", int(results['totalhits']))
        response.feed.setdefault("opensearch_itemsperpage", int(results['count']))
        response.feed.setdefault("opensearch_startindex", int(results['start']))
      except KeyError:
        response.feed.setdefault("opensearch_totalresults", 0)
        response.feed.setdefault("opensearch_itemsperpage", 0)
        response.feed.setdefault("opensearch_startindex", 0)
      
      for result in results['resultset_web']:
        try:
          response.entries.append({'title': result['title'], 'link': result['url'], 'summary': result['abstract']})
        except Exception, e:    # If there is a parsing problem, print out an error and just skip this individual result
          print "Skipping a result due to: {0} \nWhen parsing a result from: {1}\n".format(e, url)
          continue
      
      return response

	# Try and get the API key from config, if it's not there raise an API Key error - the application will have to deal with this
    try:
      appId = self.service.config["yahoo_api_key"]
    except KeyError:
      raise ApiKeyError("Yahoo!", "yahoo_api_key")

    # Now that an API key has been supplied try to get results from the search engine itself
    try:    
      pos = self._origin() + offset
      url = "http://boss.yahooapis.com/ysearch/web/v1/{0}?appid={1}&format=json&style=raw&filter=-p**n-hate&start={2}".format(urllib2.quote(query.search_terms), appId, str(pos))
    
      data = urllib2.urlopen(url).read()
      results = json.loads(data)
      return parse_yahoo_json('Yahoo!', query.search_terms, results['ysearchresponse'])

    # urllib2 - this catches http errors due to the service being down, lack of a proxy etc
    except urllib2.URLError, e:
      raise SearchEngineError("Yahoo!", e, errorType = 'urllib2', url = url)
示例#14
0
	# Try and get the API key from config, if it's not there raise an API Key error - the application will have to deal with this
    try:
      appId = self.service.config["yahoo_api_key"]
    except KeyError:
      raise ApiKeyError("Yahoo!", "yahoo_api_key")

    # Now that an API key has been supplied try to get results from the search engine itself
    try:    
      pos = self._origin() + offset
      url = "http://boss.yahooapis.com/ysearch/web/v1/{0}?appid={1}&format=json&style=raw&filter=-p**n-hate&start={2}".format(urllib2.quote(query.search_terms), appId, str(pos))
    
      data = urllib2.urlopen(url).read()
      results = json.loads(data)
      return parse_yahoo_json('Yahoo!', query.search_terms, results['ysearchresponse'])

    # urllib2 - this catches http errors due to the service being down, lack of a proxy etc
    except urllib2.URLError, e:
      raise SearchEngineError("Yahoo!", e, errorType = 'urllib2', url = url)

    # Check for a type error for offset
    except TypeError, e:
      if isinstance(offset, int) == False:
        note = "Please ensure that 'offset' is an integer."
        raise SearchEngineError("Yahoo!", e, note = note, offsetType = type(offset))

      raise SearchEngineError("Yahoo!", e, url = url)
	  
    # Catch Attribute error which deals with unexpected none type for the objects the wrapper uses and other associated issues
    except AttributeError, e:
      raise SearchEngineError("Yahoo!", e, url = url)
示例#15
0
          continue
        except Exception, e:
          print "Skipping a result due to: {0} \nWhen parsing a result from: {1}\n".format(e, url)
          continue

      # If the processing worked okay then set total results and items per page
      response.feed['opensearch_totalresults'] = len(response.entries)
      response.feed['opensearch_itemsperpage'] = len(response.entries)
      return response

    try:
      url = "https://maps.googleapis.com/maps/api/geocode/json?address={0}&sensor={1}".format(urllib2.quote(query.search_terms), self.sensor)   
      data = urllib2.urlopen(url).read()
      results = json.loads(data)
      return parse_geocode_json('Google Geocode', url, query.search_terms, results['results'])
    
    # urllib2 - this catches http errors due to the service being down, lack of a proxy etc
    except urllib2.URLError, e:
      raise SearchEngineError("Google Geocode", e, errorType = 'urllib2', url = url)

    # Check for a type error for offset or a generic type error if offset is valid
    except TypeError, e:
      if isinstance(offset, int) == False:
        note = "Please ensure that 'offset' is an integer."
        raise SearchEngineError("Google Geocode", e, note = note, offsetType = type(offset))

      raise SearchEngineError("Google Geocode", e, url = url)
	  
    # Catch Attribute error which deals with unexpected none type for the objects the wrapper uses and other associated issues
    except AttributeError, e:
      raise SearchEngineError("Google Geocode", e, url = url)
示例#16
0
            data = urllib2.urlopen(url)
            picassaResponse = Response.parse_feed(data.read())
            picassaResponse = addThumbnailShortcut(picassaResponse)
            return picassaResponse

# urllib2 - this catches http errors due to the service being down, lack of a proxy etc
        except urllib2.URLError, e:
            raise SearchEngineError("Picassa", e, errorType='urllib2', url=url)

        # Check for a type error for offset or resultsPerPage
        except TypeError, e:
            note = "Please ensure that 'offset' and 'resultsPerPage' are both integers."
            if isinstance(offset, int) == False:
                raise SearchEngineError("Picassa",
                                        e,
                                        note=note,
                                        offsetType=type(offset))

            if isinstance(self.resultsPerPage, int) == False:
                raise SearchEngineError("Picassa",
                                        e,
                                        note=note,
                                        resultsPerPageType=type(
                                            self.resultsPerPage))

            raise SearchEngineError("Picassa", e, url=url)

        # Catch Attribute error which deals with unexpected none type for the objects the wrapper uses and other associated issues
        except AttributeError, e:
            raise SearchEngineError("Picassa", e, url=url)
示例#17
0
            # template exists, use it to search
            search_url = self.template.replace(
                '{searchTerms}', urllib2.quote(query.search_terms))
            if (pos != 0):
                search_url = search_url.replace('{start}', urllib2.quote(pos))
            else:
                pass
            try:
                response = urllib2.urlopen(search_url).read()
                if self.results and self.xml:
                    return Response.parse_feed(response)
                elif not self.results and self.xml:
                    return Response.parse_xml_suggestions(response)
                elif not self.results and not self.xml:
                    return Response.parse_json_suggestions(response)
            except urllib2.URLError, e:
                print "Opensearch for {0} failed".format(self.url)
        else:
            # attempt to discover template
            try:
                # assign template and search
                self.template = self.find_template(self.discover_description())
                return self.search(query, pos)

            # urllib2 - this catches http errors due to the service being down, lack of a proxy etc
            except urllib2.URLError, e:
                raise SearchEngineError("'{0}' with OpenSearch Wrapper".format(
                    self.url),
                                        e,
                                        errorType='urllib2')
示例#18
0
class Picassa(SearchEngine):
    """
  Picassa search engine.

  Parameters:

  * resultsPerPage (int): select how many results per page

  * access (str): public, private (it is not recommended to change to private), all, visible
  
  * kind (str): photo is the only working option

  """
    def __init__(self,
                 service,
                 resultsPerPage=8,
                 access='public',
                 kind='photo',
                 **args):
        super(Picassa, self).__init__(service, **args)
        self.resultsPerPage = resultsPerPage
        self.access = access
        self.kind = kind

    def _origin(self):
        """ This overrides SearchEngine's default origin (for results from a search engine) for Picassa """
        return 1

    def search(self, query, offset):
        """
    Search function for Picassa.
    
    Parameters:
    
    * query (puppy.model.OpenSearch.Query)

    * offset (int): result offset for the search
    
    Returns:
    
    * puppy.model.OpenSearch.Response
    
    Raises:
    
    * urllib2.URLError
    
    """
        def addThumbnailShortcut(picassaResponse):
            """This goes through the results and adds a thumbnail shortcut"""
            for result in picassaResponse.entries:
                result['thumbnail'] = result['media_thumbnail'][2]['url']
            return picassaResponse

        try:
            pos = self._origin() + offset
            url = 'https://picasaweb.google.com/data/feed/api/all?q={0}&access={1}&kind={2}&start-index={3}&max-results={4}'.format(
                urllib2.quote(query.search_terms), self.access, self.kind, pos,
                self.resultsPerPage)

            data = urllib2.urlopen(url)
            picassaResponse = Response.parse_feed(data.read())
            picassaResponse = addThumbnailShortcut(picassaResponse)
            return picassaResponse

# urllib2 - this catches http errors due to the service being down, lack of a proxy etc
        except urllib2.URLError, e:
            raise SearchEngineError("Picassa", e, errorType='urllib2', url=url)

        # Check for a type error for offset or resultsPerPage
        except TypeError, e:
            note = "Please ensure that 'offset' and 'resultsPerPage' are both integers."
            if isinstance(offset, int) == False:
                raise SearchEngineError("Picassa",
                                        e,
                                        note=note,
                                        offsetType=type(offset))

            if isinstance(self.resultsPerPage, int) == False:
                raise SearchEngineError("Picassa",
                                        e,
                                        note=note,
                                        resultsPerPageType=type(
                                            self.resultsPerPage))

            raise SearchEngineError("Picassa", e, url=url)
示例#19
0
            url = "http://svc.webservius.com/v1/spellcheck/spellcheck/?wsvKey={0}&cmd=check_spelling&version=1.0&out_type=words&slang={1}&text={2}".format(
                appId, self.language, urllib2.quote(query.search_terms))
            data = urllib2.urlopen(url)
            return parse_web_spell_checker_xml('Web Spell Checker', url,
                                               query.search_terms, data.read())

        # urllib2 - this catches http errors due to the service being down, lack of a proxy etc
        except urllib2.URLError, e:
            raise SearchEngineError("Web Spell Checker",
                                    e,
                                    errorType='urllib2',
                                    url=url)

        # Catches any xml syntax errors that occur when lxml is parsing the results
        except etree.XMLSyntaxError, e:
            raise SearchEngineError("Web Spell Checker",
                                    e,
                                    errorType='lxml',
                                    url=url)

        # Generic lxml error for errors other than syntax ones (future work might be to select more specific ones like above)
        except etree.LxmlError, e:
            raise SearchEngineError("Web Spell Checker",
                                    e,
                                    errorType='lxml',
                                    url=url)

        # Catch Attribute error which deals with unexpected none type for the objects the wrapper uses and other associated issues
        except AttributeError, e:
            raise SearchEngineError("Web Spell Checker", e, url=url)
示例#20
0
            if self.source == 'image':
                bingResponse = addDefaultThumbnails(bingResponse)

            return bingResponse

# urllib2 - this catches http errors due to the service being down, lack of a proxy etc
        except urllib2.URLError, e:
            raise SearchEngineError("Bing", e, errorType='urllib2', url=url)

        # Check for a type error for offset or resultsPerPage
        except TypeError, e:
            note = "Please ensure that 'offset' and 'resultsPerPage' are integers if used"
            if isinstance(offset, int) == False:
                raise SearchEngineError("Bing",
                                        e,
                                        note=note,
                                        offsetType=type(offset))

            if isinstance(self.resultsPerPage, int) == False:
                raise SearchEngineError("Bing",
                                        e,
                                        note=note,
                                        resultsPerPageType=type(
                                            self.resultsPerPage))

            raise SearchEngineError("Bing", e, url=url)

        # Catch Attribute error which deals with unexpected none type for the objects the wrapper uses and other associated issues
        except AttributeError, e:
            raise SearchEngineError("Bing", e, url=url)
示例#21
0
        # urllib2 - this catches http errors due to the service being down, lack of a proxy etc
        except urllib2.URLError, e:
            raise SearchEngineError("SoundCloud Search API",
                                    e,
                                    errorType='urllib2',
                                    url=url)

        # Check for a type error for offset or resultsPerPage
        except TypeError, e:
            note = "Please ensure that 'resultsPerPage' and 'offset' are integers if used"

            if isinstance(self.resultsPerPage, int) == False:
                raise SearchEngineError("SoundCloud Search API",
                                        e,
                                        note=note,
                                        resultsPerPageType=type(
                                            self.resultsPerPage))

            if isinstance(offset, int) == False:
                raise SearchEngineError("SoundCloud Search API",
                                        e,
                                        note=note,
                                        offsetType=type(offset))

            raise SearchEngineError("SoundCloud Search API", e, url=url)

        # Catch Attribute error which deals with unexpected none type for the objects the wrapper uses and other associated issues
        except AttributeError, e:
            raise SearchEngineError("SoundCloud Search API", e, url=url)
示例#22
0
class Bing(SearchEngine, sitesearch):
    """
  Bing search engine wrapper.

  Note: you can only use location based searching with sourcetypes 'web' and 'phonebook'; however, with web, it doesn't appear to have any effect.

  Parameters:

  * sites: if you wish to search a specific website(s) for results

  * source (str): web, image, news are the options

  * adult (str): strict, i.e. safesearch not recommended to change from the default

  * market (str): i.e. which area's results are prioritised more - en-gb is the UK

  * resultsPerPage (int): How many results per page

  * lat (double): the latitude of the place you want to search in

  * lon (double): the longitude of the place you want to search in

  * radius (int): the radius to retrieve results from around lat and lon; 0-250miles is the limit
  """
    def __init__(self,
                 service,
                 source='web',
                 adult='Strict',
                 market='en-GB',
                 resultsPerPage=10,
                 lat=None,
                 lon=None,
                 radius=5,
                 sites=None,
                 **args):
        SearchEngine.__init__(self, service, **args)
        sitesearch.__init__(self, sites)
        self.sites = sites
        self.source = source
        self.adult = adult
        self.market = market
        self.resultsPerPage = resultsPerPage
        self.lat = lat
        self.lon = lon
        self.radius = radius

    def search(self, query, offset):
        """
    Search function for Microsoft Bing.
    
    Parameters:
    
    * query (puppy.model.Query)

    * offset (int): result offset for the search
    
    Returns:
    
    * results (puppy.model.Response)
    
    Raises:
    
    * urllib2.URLError
    """
        def addDefaultThumbnails(bingResponse):
            """This goes through the results and adds a easy access default thumbnail."""
            for result in bingResponse.entries:
                result['thumbnail'] = result['media_thumbnail'][0]['url']
                result['thumbnailWidth'] = result['media_thumbnail'][0][
                    'width']
                result['thumbnailHeight'] = result['media_thumbnail'][0][
                    'height']
            return bingResponse

        try:
            formattedQuery = urllib2.quote(
                self._modify_query(query.search_terms))
            pos = self._origin()

            if (offset > 0):
                pos = pos + (offset * self.resultsPerPage)

            url = 'http://api.search.live.net/rss.aspx?&query={0}&source={1}&{1}.count={2}&{1}.offset={3}&Adult={4}&Market={5}'.format(
                formattedQuery, self.source, self.resultsPerPage, pos,
                self.adult, self.market)

            # If the source type is web or phonebook we can add lon/lat/radius for local search
            if (self.source == 'web') or (self.source == 'phonebook'):
                if (self.lat) and (self.lon) and (self.radius):
                    url += "&Latitude={0}&Longitude={1}&Radius={2}".format(
                        self.lat, self.lon, self.radius)

            data = urllib2.urlopen(url)
            bingResponse = Response.parse_feed(data.read())

            if self.source == 'image':
                bingResponse = addDefaultThumbnails(bingResponse)

            return bingResponse

# urllib2 - this catches http errors due to the service being down, lack of a proxy etc
        except urllib2.URLError, e:
            raise SearchEngineError("Bing", e, errorType='urllib2', url=url)

        # Check for a type error for offset or resultsPerPage
        except TypeError, e:
            note = "Please ensure that 'offset' and 'resultsPerPage' are integers if used"
            if isinstance(offset, int) == False:
                raise SearchEngineError("Bing",
                                        e,
                                        note=note,
                                        offsetType=type(offset))

            if isinstance(self.resultsPerPage, int) == False:
                raise SearchEngineError("Bing",
                                        e,
                                        note=note,
                                        resultsPerPageType=type(
                                            self.resultsPerPage))

            raise SearchEngineError("Bing", e, url=url)
示例#23
0
    # Now that an API key has been supplied try to get results from the search engine itself
    try:    
      pos = self._origin() + offset
      appId = self.service.config["flickr_api_key"]
      url = "http://api.flickr.com/services/rest/?method=flickr.photos.search&api_key={0}&text={1}&sort={2}&safe_search={3}&media={4}&per_page={5}&page={6}&format=json&nojsoncallback=1".format(appId, urllib2.quote(query.search_terms), self.sortBy, self.safeSearch, self.mediaType, self.resultsPerPage, pos)
      
      if (self.bbox):
        url += "&bbox={0}".format(self.bbox)   
      data = urllib2.urlopen(url).read()
      results = json.loads(data)
      results['photos'].setdefault(u'link', url)
      return parse_flickr_json('Flickr', query.search_terms, results['photos'])

    # urllib2 - this catches http errors due to the service being down, lack of a proxy etc
    except urllib2.URLError, e:
      raise SearchEngineError("Flickr", e, errorType = 'urllib2', url = url)

    # Check for a type error for offset or resultsPerPage
    except TypeError, e:
      note = "Please ensure that 'offset' and 'resultsPerPage' are integers if used"
      if isinstance(offset, int) == False:
        raise SearchEngineError("Flickr", e, note = note, offsetType = type(offset))

      if isinstance(self.resultsPerPage, int) == False:
        raise SearchEngineError("Flickr", e, note = note, resultsPerPageType = type(self.resultsPerPage))

      raise SearchEngineError("Flickr", e, note = note)
	  
    # Catch Attribute error which deals with unexpected none type for the objects the wrapper uses and other associated issues
    except AttributeError, e:
      raise SearchEngineError("Flickr", e, url = url)
示例#24
0
            return emmaResponse

# urllib2 - this catches http errors due to the service being down, lack of a proxy etc
        except urllib2.URLError, e:
            raise SearchEngineError("PuppyIR Pathfinder Search",
                                    e,
                                    errorType='urllib2',
                                    url=url)

        # Check for a type error for offset or resultsPerPage
        except TypeError, e:
            note = "Please ensure that 'offset' and 'resultsPerPage' are integers if used"
            if isinstance(offset, int) == False:
                raise SearchEngineError("PuppyIR Pathfinder Search",
                                        e,
                                        note=note,
                                        offsetType=type(offset))

            if isinstance(self.resultsPerPage, int) == False:
                raise SearchEngineError("PuppyIR Pathfinder Search",
                                        e,
                                        note=note,
                                        resultsPerPageType=type(
                                            self.resultsPerPage))

            raise SearchEngineError("PuppyIR Pathfinder Search", e, note=note)

        # Catch Attribute error which deals with unexpected none type for the objects the wrapper uses and other associated issues
        except AttributeError, e:
            raise SearchEngineError("PuppyIR Pathfinder Search", e, url=url)
示例#25
0
    def search(self, query, offset):
        """Search function for Microsoft Bing.

       Parameters:

       * query (puppy.model.OpenSearch.Query)

       Returns:

       * puppy.model.OpenSearch.Response

       Raises:

       * urllib2.URLError

       """

        def parse_bing_xml_response(site, query, results, offset=0):



            def extractElementString(node, element):
                res =node.find(element)
                if res:
                    return res.string
                else:
                    return ''

            xmlSoup = BeautifulSoup(results)

            response = Response()
            response.version = 'xml'
            response.feed.setdefault('title', "{0}: {1}".format(site, query))
            response.feed.setdefault('description', "Search results for {0} at {1}".format(query, site))
            response.feed.setdefault('link', '')
            response.namespaces.setdefault('opensearch', 'http://a9.com/-/spec/opensearch/1.1/')

            resultCount = 0
            resultsRetrieved = 0
            for r in xmlSoup.findAll('entry'):

                # These element are in Web
                xmlTitleData = extractElementString(r, 'd:title')
                xmlURLData = extractElementString(r,'d:url')
                xmlDescriptionData = extractElementString(r,'d:description')
                # These elements are in News
                xmlSource = extractElementString(r, 'd:source')
                xmlDate = extractElementString(r, 'd:date')

                result_dict = {'title': xmlTitleData, 'link': xmlURLData, 'summary': xmlDescriptionData, 'source': xmlSource, 'date': xmlDate }

                # These elements are in Images
                xmlLink = extractElementString(r, 'd:mediaurl')
                if xmlLink: result_dict['link'] = xmlLink

                xmlSourceUrl = extractElementString(r, 'd:sourceurl')
                if xmlSourceUrl: result_dict['sourceLink'] = xmlSourceUrl

                xmlDisplayLink = extractElementString(r,'d:displayurl')
                if xmlDisplayLink: result_dict['displayLink'] = xmlDisplayLink

                xmlWidth = extractElementString(r,'d:width')
                if xmlWidth: result_dict['width'] = xmlWidth

                xmlHeight = extractElementString(r,'d:height')
                if xmlHeight: result_dict['height'] = xmlHeight

                thumbnail = r.find('d:thumbnail')

                if thumbnail:
                    xmlThumbnail = extractElementString(thumbnail,'d:mediaurl')
                    if xmlThumbnail: result_dict['thumbnail'] = xmlThumbnail

                    xmlThumbnailWidth = extractElementString(thumbnail,'d:width')
                    if xmlThumbnailWidth: result_dict['thumbnailWidth'] = xmlThumbnailWidth

                    xmlThumbnailHeight = extractElementString(thumbnail,'d:height')
                    if xmlThumbnailHeight: result_dict['thumbnailHeight'] = xmlThumbnailHeight



                response.entries.append(result_dict)
                resultsRetrieved += 1
                resultCount += 1

            response.feed.setdefault('opensearch_totalresults', resultCount+offset)
            response.feed.setdefault('opensearch_startindex', offset)
            response.feed.setdefault('opensearch_itemsperpage', resultsRetrieved)

            return response

        # Insert relevant details for Bing API here.
        username = ""
        try:
            appId = self.service.config["bing_api_key"]
        except KeyError:
            raise ApiKeyError("Bing V3", "bing_api_key")
            # Try and get the API key from config, if it's not there raise an API Key error - the application will have to deal with this
        queryBingFor = "'"+ query.search_terms +"'" # REMEMBER: use apostrophes within the string, this is what Bing expects
        quoted_query = urllib.quote(queryBingFor)

        # Create the API URL
        rootURL = "https://api.datamarket.azure.com/Bing/Search/"
        searchURL = "%s%s?$format=ATOM&$top=%d&$skip=%d&Query=%s" % (rootURL, self.source, self.resultsPerPage, offset, quoted_query)
        #searchURL = rootURL + self.source + "?$format=ATOM&Query=" + quoted_query
        print searchURL

        # Add the API key to the password manager
        password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
        password_mgr.add_password(None, searchURL, username, appId)

        # Prepare an authentication handler and open the URL
        try:
            handler = urllib2.HTTPBasicAuthHandler(password_mgr)
            opener = urllib2.build_opener(handler)
            urllib2.install_opener(opener)
            xmlresponse = urllib2.urlopen(searchURL)

        except urllib2.URLError, e:
            raise SearchEngineError("Bing V3", e, errorType = 'urllib2', url = searchURL)
示例#26
0
class EmmaSearch(SearchEngine):
    """
  EmmaSearch search engine.

  Parameters:

  * age (str): values - 'v' for adults (shows all 'a' and 'k' results too), 'a' for teenagers, and 'k' for children

  * resultsPerPage (int): How many results per page - the default for the emma search service is 10
  """
    def __init__(self, service, age='v', resultsPerPage=10, **args):
        super(EmmaSearch, self).__init__(service, **args)
        self.age = age
        self.resultsPerPage = resultsPerPage

    def _origin(self):
        """ This overrides SearchEngine's default origin (for results from a search engine) for Emma search """
        return 1

    # Go through and extract the item's id from the link
    def addEmmaItemId(self, emmaResponse):
        for result in emmaResponse.entries:
            link = result['link']
            result['id'] = link[link.find("item=") + 5:len(link)]
        return emmaResponse

    # Go through each result and assign based on the puppy_age parameter and numeric age classification to the results
    def addEmmaAge(self, emmaResponse):
        for result in emmaResponse.entries:
            if result.has_key('puppy_age'):
                if result['puppy_age'] == 'v':
                    result['minAge'] = 20
                    result['maxAge'] = 100
                elif result['puppy_age'] == 'a':
                    result['minAge'] = 13
                    result['maxAge'] = 19
                elif result['puppy_age'] == 'k':
                    result['minAge'] = 0
                    result['maxAge'] = 12
        return emmaResponse

    def search(self, query, offset):
        """
    Search function for retrieving results from the PuppyIR Pathfinder service which searches the information centre at the Emma Children's Hospital.
  
    Parameters:
  
    * query (puppy.model.Query)

    * offset (int): result offset for the search
  
    Returns:
  
    * results puppy.model.Response
  
    Raises:
  
    * urllib2.URLError
  
    """
        try:
            pos = self._origin() + offset
            format = 'rss'
            url = "http://pathfinder.cs.utwente.nl/cgi-bin/opensearch/ekz.cgi?query={0}&page={1}&format={2}&leeftijd={3}&size={4}".format(
                urllib2.quote(query.search_terms), pos, format, self.age,
                self.resultsPerPage)

            data = urllib2.urlopen(url)
            emmaResponse = Response.parse_feed(data.read())
            emmaResponse = self.addEmmaAge(emmaResponse)
            emmaResponse = self.addEmmaItemId(emmaResponse)
            return emmaResponse

# urllib2 - this catches http errors due to the service being down, lack of a proxy etc
        except urllib2.URLError, e:
            raise SearchEngineError("PuppyIR Pathfinder Search",
                                    e,
                                    errorType='urllib2',
                                    url=url)

        # Check for a type error for offset or resultsPerPage
        except TypeError, e:
            note = "Please ensure that 'offset' and 'resultsPerPage' are integers if used"
            if isinstance(offset, int) == False:
                raise SearchEngineError("PuppyIR Pathfinder Search",
                                        e,
                                        note=note,
                                        offsetType=type(offset))

            if isinstance(self.resultsPerPage, int) == False:
                raise SearchEngineError("PuppyIR Pathfinder Search",
                                        e,
                                        note=note,
                                        resultsPerPageType=type(
                                            self.resultsPerPage))

            raise SearchEngineError("PuppyIR Pathfinder Search", e, note=note)
示例#27
0
    * urllib2.URLError    
    """    
    try:	
      pos = self._origin() + offset   
      url = 'http://search.twitter.com/search.atom?q={0}&lang={1}&page={2}&result_type={3}&rpp={4}&include_entities={5}'.format(urllib2.quote(query.search_terms), self.language, pos, self.type, self.resultsPerPage, self.includeEntities)
	 
      if self.geocode:
        url += '&geocode:{0}'.format(self.geocode)

      data = urllib2.urlopen(url)
      return Response.parse_feed(data.read())

    # urllib2 - this catches http errors due to the service being down, lack of a proxy etc
    except urllib2.URLError, e:
      raise SearchEngineError("Twitter", e, errorType = 'urllib2', url = url)

    # Check for a type error for offset or resultsPerPage
    except TypeError, e:
      note = "Please ensure that 'offset' and 'resultsPerPage' are integers if used"
      if isinstance(offset, int) == False:
        raise SearchEngineError("Twitter", e, note = note, offsetType = type(offset))

      if isinstance(self.resultsPerPage, int) == False:
        raise SearchEngineError("Twitter", e, note = note, resultsPerPageType = type(self.resultsPerPage))

      raise SearchEngineError("Twitter", e, url = url)
	  
    # Catch Attribute error which deals with unexpected none type for the objects the wrapper uses and other associated issues
    except AttributeError, e:
      raise SearchEngineError("Twitter", e, url = url)
示例#28
0
            with self.queryIndex.searcher() as searcher:
                results = searcher.search(myquery)
                results.fragmenter = highlight.ContextFragmenter(surround=40)
                results.formatter = highlight.UppercaseFormatter()
                print "WhooshQueryEngine found: " + str(
                    len(results)) + " results"
                response = parse_whoosh_trec('WhooshQueryEngine',
                                             query.search_terms, results)
            return response

        # -----  The Following are Whoosh errors -----

        # There's a problem with the Whoosh query created from the users query
        except QueryError, e:
            raise SearchEngineError("Whoosh Query Engine",
                                    e,
                                    errorType="Whoosh",
                                    query=query)

        # Our Whoosh Index is empty
        except EmptyIndexError, e:
            raise SearchEngineError("Whoosh Query Engine",
                                    e,
                                    errorType="Whoosh")

        # Our Whoosh Index does not match our version of Whoosh
        except IndexVersionError, e:
            raise SearchEngineError("Whoosh Query Engine",
                                    e,
                                    errorType="Whoosh")

        # Generic Index error if the above don't cover the index error
示例#29
0
                    print "Skipping a result due to: {0} \nWhen parsing a result from: {1}\n".format(
                        e, url)
                    continue

            return response

        try:
            url = 'http://simple.wikipedia.org/w/api.php?action=opensearch&format=xml&search={0}&namespace=0&limit={1}'.format(
                urllib2.quote(query.search_terms), self.resultsPerPage)
            data = urllib2.urlopen(url)
            return parse_wiki_xml(url, query.search_terms, data.read())

# urllib2 - this catches http errors due to the service being down, lack of a proxy etc
        except urllib2.URLError, e:
            raise SearchEngineError("Simple Wikipedia",
                                    e,
                                    errorType='urllib2',
                                    url=url)

        # Catches any xml syntax errors that occur when lxml is parsing the results
        except etree.XMLSyntaxError, e:
            raise SearchEngineError("Simple Wikipedia",
                                    e,
                                    errorType='lxml',
                                    url=url)

        # Generic lxml error for errors other than syntax ones (future work might be to select more specific ones like above)
        except etree.LxmlError, e:
            raise SearchEngineError("Simple Wikipedia",
                                    e,
                                    errorType='lxml',
                                    url=url)
示例#30
0
    def search(self, query, offset):
        """
    Search function for YouTube.
    
    Parameters:
    
    * query (puppy.model.OpenSearch.Query)

    * offset (int): result offset for the search
    
    Returns:
    
    * puppy.model.OpenSearch.Response
    
    Raises:
    
    * urllib2.URLError
    
    """
        def addExtraFields(youtubeResponse):
            """This goes through the results and adds: the summary field, the embed url and adds a thumbnail shortcut."""
            for result in youtubeResponse.entries:
                author = result['author']
                fullDescription = result[
                    'media_group']  # This is author+description+'youtube'
                result['summary'] = fullDescription[
                    len(author):len(fullDescription) -
                    7]  #Remove author from start and 'youtube' from end - Perhaps find more elegant method
                result['embedUrl'] = 'http://www.youtube.com/embed/' + result[
                    'id'].split(':video:')[1]

                if len(
                        result['media_thumbnail']
                ) >= 2:  # If we have 2 or more thumbnails use the second (hq thumbnail)
                    result['thumbnail'] = result['media_thumbnail'][1]['url']
                elif len(
                        result['media_thumbnail']
                ) == 1:  # Otherwise use the first (it's pretty low res compared to above)
                    result['thumbnail'] = result['media_thumbnail'][0]['url']
                else:
                    result[
                        'thumbnail'] = ''  # If that fails just leave it blank

            return youtubeResponse

        try:
            pos = self._origin() + (offset * self.resultsPerPage)
            url = 'http://gdata.youtube.com/feeds/api/videos?q={0}&max-results={1}&safeSearch={2}&start-index={3}&orderby={4}&v=2'.format(
                urllib2.quote(query.search_terms), self.resultsPerPage,
                self.safeSearch, pos, self.orderBy)

            if self.format:
                url += "&format={0}".format(self.format)

            if self.location and self.locationRadius:
                url += "&location-radius={0}&location={1}".format(
                    self.locationRadius, self.location)
                if self.onlyLocation == True:
                    url += '!'  # This forces YouTube to only return results with a location

            data = urllib2.urlopen(url)
            youtubeResponse = Response.parse_feed(data.read())
            youtubeResponse = addExtraFields(
                youtubeResponse
            )  # Does some processing to get embed url, summary and thumbnail shortcut
            return youtubeResponse

        # urllib2 - this catches http errors due to the service being down, lack of a proxy etc
        except urllib2.URLError, e:
            raise SearchEngineError("YouTube V2",
                                    e,
                                    errorType='urllib2',
                                    url=url)